美文网首页
python-wordcloud

python-wordcloud

作者: 点点渔火 | 来源:发表于2018-08-21 10:50 被阅读0次

    https://pypi.org/project/wordcloud/

    安装

    pip install wordcloud
    
    conda install -c conda-forge wordcloud
    

    命令行

       $ wordcloud_cli --text mytext.txt --imagefile wordcloud.png
    

    PDF文档也能 处理

    $ pdftotext mydocument.pdf - | wordcloud_cli --imagefile wordcloud.png
    

    脚本

    #!/usr/bin/env python2
    # -*- coding: utf8 -*-
    
    import csv
    import codecs
    import jieba
    import numpy as np
    from PIL import Image
    from wordcloud import WordCloud
    import matplotlib.pyplot as plt
    import random
    from os import path
    import sys
    reload(sys)
    sys.setdefaultencoding('utf8')
    d = path.dirname(__file__)
    
    
    # 定义中文词云函数
    def word_cloud_plot(txt):
        print path.join(d, "car.jpeg")
        fp = Image.open(path.join(d, "car.jpeg"))
        print fp.size
        alice_mask = np.array(fp)
        print alice_mask.shape
        wordcloud = WordCloud(background_color="white", max_words=1000,
                              mask=alice_mask,
                              margin=2, width=1800, height=800)  # 长宽度控制清晰程度​
    
        wordcloud.generate(txt)
        wordcloud.to_file(path.join(d, "user.png"))
        # Open a plot of the generated image.
        plt.figure()
        plt.imshow(wordcloud)
        plt.axis("off")
        plt.show()
        plt.savefig
        # plt.imshow(wordcloud)
        # plt.axis("off")
        # plt.figure()
        # plt.imshow(alice_mask, cmap=plt.cm.gray)
        # plt.axis("off")
        # plt.show()
    
    
    def plot_title_cloud(txtlist):
        txt = r' '.join(txtlist)
        print txt
        print type(txt)
        word_cloud_plot(txt)
    
    
    def read_word(file_name):
        fo = open(file_name, 'r+')
        dic = map((lambda line: line.rstrip().decode("utf-8").split("\t")), fo)
        temp = []
        for t in dic:
            print t[0]
            temp.extend([t[0]] * int(t[1]))
        fo.close()
        print len(temp)
        return temp
    
    if __name__ == '__main__':
        with codecs.open('/Users/西游记.txt', "r",'utf-8') as fo:
            t1 = map(lambda line: line.rstrip().lower().split("\t")[0] + '\n'.encode("utf-8"), fo)
        plot_title_cloud(t1)
    

    相关文章

      网友评论

          本文标题:python-wordcloud

          本文链接:https://www.haomeiwen.com/subject/bzediftx.html