https://pypi.org/project/wordcloud/
安装
pip install wordcloud
conda install -c conda-forge wordcloud
命令行
$ wordcloud_cli --text mytext.txt --imagefile wordcloud.png
PDF文档也能 处理
$ pdftotext mydocument.pdf - | wordcloud_cli --imagefile wordcloud.png
脚本
#!/usr/bin/env python2
# -*- coding: utf8 -*-
import csv
import codecs
import jieba
import numpy as np
from PIL import Image
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import random
from os import path
import sys
reload(sys)
sys.setdefaultencoding('utf8')
d = path.dirname(__file__)
# 定义中文词云函数
def word_cloud_plot(txt):
print path.join(d, "car.jpeg")
fp = Image.open(path.join(d, "car.jpeg"))
print fp.size
alice_mask = np.array(fp)
print alice_mask.shape
wordcloud = WordCloud(background_color="white", max_words=1000,
mask=alice_mask,
margin=2, width=1800, height=800) # 长宽度控制清晰程度
wordcloud.generate(txt)
wordcloud.to_file(path.join(d, "user.png"))
# Open a plot of the generated image.
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
plt.savefig
# plt.imshow(wordcloud)
# plt.axis("off")
# plt.figure()
# plt.imshow(alice_mask, cmap=plt.cm.gray)
# plt.axis("off")
# plt.show()
def plot_title_cloud(txtlist):
txt = r' '.join(txtlist)
print txt
print type(txt)
word_cloud_plot(txt)
def read_word(file_name):
fo = open(file_name, 'r+')
dic = map((lambda line: line.rstrip().decode("utf-8").split("\t")), fo)
temp = []
for t in dic:
print t[0]
temp.extend([t[0]] * int(t[1]))
fo.close()
print len(temp)
return temp
if __name__ == '__main__':
with codecs.open('/Users/西游记.txt', "r",'utf-8') as fo:
t1 = map(lambda line: line.rstrip().lower().split("\t")[0] + '\n'.encode("utf-8"), fo)
plot_title_cloud(t1)
网友评论