Mac系统终端命令
-
cd change dir
-
cd / 切换到根目录
-
cd ~ 用户目录
-
执行文件 ./文件名
-
ls -a 列出文件详情
-
pwd 列出当前所处文件夹
import jieba
def sanguo_ana():
# 1、读取三国演义小说
with open('./novel/threekingdom.txt', mode='r', encoding='UTF-8') as f:
data = f.read()
# print(data)
print(len(data)) # 文章中的字数 55万
word_list = jieba.lcut(data)
print(word_list)
print(len(word_list)) # 35万
stop_words = {"将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议",
"如何", "主公", "军士", "军马", "左右", "次日", "引兵", "大喜", "天下",
"东吴", "于是", "今日", "不敢", "魏兵", "陛下", "都督", "人马", "不知",
"孔明曰", "玄德曰", "玄德", "云长"
}
counts = {}
for word in word_list:
if len(word) <= 1:
continue
else:
counts[word] = counts.get(word, 0) + 1
print(counts)
counts['孔明'] = counts['孔明'] + counts['孔明曰']
counts['刘备'] = counts['玄德'] + counts['玄德曰'] + counts['刘备']
counts['关公'] = counts['云长'] + counts['关公']
for word in stop_words:
del counts[word]
count_list = list(counts.items())
count_list.sort(key=lambda x: x[1], reverse=True)
for i in range(10):
name, count = count_list[i] # 拆包
print(name, count)
sanguo_ana()
网友评论