本文描述如何用python 实现统计一个文本中单词重复出现次数,并且对数据进行排序,代码如下:
# -- coding: utf-8 --
import sys
# 把数据写到文件中
def write2File(filePath, msg):
write_abstract_file = open(filePath, "a")
write_abstract_file.write(msg)
write_abstract_file.write('\n')
write_abstract_file.flush()
write_abstract_file.close()
## 入口
if __name__ == "__main__":
# 第一个参数为输入文件路径
filePath = sys.argv[1]
# 读取文件
with open(filePath) as file_object:
contents = file_object.read()
wordDic = {}
# 去掉部分特殊字符 TODO
contents = contents.replace(',',' ')
contents = contents.replace('(',' ')
contents = contents.replace(')',' ')
contents = contents.replace('&',' ')
contents = contents.replace('/',' ')
contents = contents.replace('\\',' ')
contents = contents.replace('\"',' ')
contents = contents.replace('- ',' ')
contents = contents.replace(' -',' ')
contents = contents.replace('\'',' ')
# 文字分割
wordArr = contents.split()
# 文字统计
for key in wordArr:
if (wordDic.has_key(key)):
wordDic[key]+=1
else:
wordDic[key] = 1;
# 用 value 对字典进行排序
wordTup = sorted(wordDic.items(), key=lambda x: x[1], reverse=True)
# 数据输出
for wordTup in wordTup:
write2File("result.CSV", wordTup[0]+","+ str(wordTup[1]))
# 关闭文件
file_object.close()
网友评论