第一章
1
12/(4+1)
2
26**100
4
len(text2)
len(set(text2))
7
len(list(nltk.bigrams(text5)))
15
[w for w in sorted(text5) if w.startswith('b')]
17
def find_word(text,word):
...: pos=0
...: while pos<len(text):
...: try:
...: pos=text.index(word,pos)+1
...: print(pos)
...: except Exception as e:
...: print('all have bean found!')
...: return
...:
find_word(list(text9),'sunset')
22
fd=FreqDist(text5)
[w for (w,_) in fd.most_common() if len(w)==4]
23
[w for w in text6 if w.isupper()]
24
[w for w in list(text6) if w.endswith('ize') and w.find('pt')!=-1 and w[0].isupper() and w[1:].islower()]
25
[w for w in sent if w .startswith('sh')]
[w for w in sent if len(w)>4]
28
def percent(word,text):
fd=FreqDist(text)
return '{}%'.format((fd[word])*100/len(text))
第二章
2
persusion==nltk.Text(nltk.corpus.gutenberg.words('austen-persuasion.txt'))
len(persusion)
len(set(persusion))
4
cfd=ConditionalFreqDist((target,fileid[:4]) for fileid in state_union.fileids() for word in
state_union.words(fileid) for target in ['men','women','people'] if target == word.lower()
)
8
male_names=names.words('male.txt')
female_names=names.words('female.txt')
fd_male=nltk.FreqDist(male_names)
fd_female=nltk.FreqDist(female_names)
cfd=nltk.ConditionalFreqDist((fd_male[name],name[0])
for fileid in names.fileids()
for name in names.words(fileid)
if fd_male[name]>fd_female[name])
12
len(set(w for (w,p) in cmudict.entries()))
fd=FreqDist([len(pron) for (word,pron) in cmudict.entries()])
fd.most_common()[0][1]/len(cmudict.entries())
15
fd=FreqDist(brown.words())
[w for (w,_) in fd.most_common() if fd[w]>3]
16
def word_diversity(words):
...: return len(words)/len(set(words))
for category in brown.categories():
...: diversity=word_diversity(brown.words(categories=category))
...: print('%s\t%.2f'%(category,diversity))
17
def fun(text):
fd=FreqDist([w.lower() for w in text if w not in stopwords.words('english')])
return [w for (w,_) in fd.most_common()[:50]]
18
def fun(text):
...: fd=FreqDist([(w1,w2) for (w1,w2) in bigrams(text) if w1 not in stopwords.words('english') and w2 not in stopwords.words('english')])
...: return [w for w in fd.most_common()[:50]]
20
def word_freq(text,word):
...: count=nltk.Text(text).count(word)
...: return count/len(text)
作者:Jasonhaven.D
链接:http://www.jianshu.com/u/ed031e432b82
來源:简书
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
网友评论