Demo entry 6644804

t

   

Submitted by anonymous on Oct 06, 2017 at 17:04
Language: Python 3. Code size: 2.3 kB.

from nltk.corpus import brown
from collections import Counter
from collections import defaultdict
from nltk.util import ngrams
import math

print('intialization....')
word = ['time','flies','like','an','arrow']
tokens, tags = zip(*brown.tagged_words())

tagCounter = Counter(tags)
tokenCounter = Counter(tokens)

tokenTags = defaultdict(Counter)
for token, tag in brown.tagged_words():
    tokenTags[token][tag] +=1

tagTags = defaultdict(Counter)
posBigrams = list(ngrams(tags, 2))

for tag1, tag2 in posBigrams:
    tagTags[tag1][tag2] += 1

posTags = []

for i in word:
    for k,v in dict(tokenTags[i]).items():
        posTags.append(k)

posTags = set(posTags)


##start
print('running start column.....')
offset = 0
initialTags = Counter()
for x in brown.sents():
    initTag = tags[offset]
    initialTags[initTag] += 1
    offset += len(x)

total_init = 0
for k,v in dict(initialTags).items():
    total_init+=v
    


print('running end column.....')
offset = 0
endTags = Counter()
for x in brown.sents():
    offset += len(x)-1
    endTag = tags[offset]
    endTags[endTag] += 1


total = len(tags)

#start calculating
print('calculating.....')
count = 0
tag_list = []
prob_list = []

for ak,av in dict(tokenTags[word[0]]).items():
    for bk,bv in dict(tokenTags[word[1]]).items():
        for ck,cv in dict(tokenTags[word[2]]).items():
            for dk,dv in dict(tokenTags[word[3]]).items():
                for ek,ev in dict(tokenTags[word[4]]).items():

                    thistag = [ak,bk,ck,dk,ek]
                    count +=1
                    prob = 1
                    tag_list.append((ak,bk,ck,dk,ek))
                                        
                    for i in range(5):
                        prob = prob * (tokenTags[word[i]][thistag[i]]/tagCounter[thistag[i]])
                      
                    for i in range(4):
                        prob = prob * (tagTags[thistag[i]][thistag[i+1]] / tagCounter[thistag[i]])

                    prob = prob * (initialTags[ak]/total_init)
                    prob = prob * (endTags[dk]/tagCounter[dk])
                    prob_list.append(prob)


max_value = max(prob_list)
max_index = prob_list.index(max_value)                          
                    
print(tag_list[max_index])

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).