import jieba
#读取三国演义
with open("三国演义.txt",'r',encoding='utf-8') as f:
txt= f.read()
#进行中文分词
words=jieba.lcut(txt)
#利用停用词表剔除语气词等等
afterw=[]
with open("tingyongcibiao.txt",'r',encoding='utf-8') as fp:
tycb=fp.read()#字符串
tylist=tycb.split('\n')
ex=["将军","却说","二人","荆州","商议","主公","军士","军马","引兵","次日","大喜","天下","东吴","今日","魏兵","都督","人马"]
tylist+=ex
for word in words:
if word not in tylist:
afterw.append(word)
#主要人物出场人物次数统计
count={}
for word in afterw:
if len(word)==1:#长度为1,应该不是人物名
continue
elif word=="诸葛亮"or word=="孔明曰":
rword="孔明"
elif word=="玄德"or word=="玄德曰":
rword="刘备"
elif word=="关公"or word=="云长":
rword="关羽"
elif word=="孟德" or word=="丞相":
rword="曹操"
elif word=="子龙":
rword="赵云"
else:
rword=word
count[rword]=count.get(rword,0)+1
items=list(count.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
name,count=items[i]
print("{:<10}:{:>5}".format(name,count))
输出结果
曹操 : 1380
孔明 : 1353
刘备 : 1196
关羽 : 776
张飞 : 341
赵云 : 286
孙权 : 261
吕布 : 258
司马懿 : 221
周瑜 : 217
网友评论