美文网首页
Jieba分词词频统计绘制词云图Wordcloud套图(2018

Jieba分词词频统计绘制词云图Wordcloud套图(2018

作者: 画奴 | 来源:发表于2018-11-03 23:36 被阅读0次

    import jieba

    from wordcloud import WordCloud

    import matplotlib.pyplot as plt

    #AI2017.txt来自http://www.gov.cn/zhengce/content/2017-07/20/content_5211996.htm

    with open('AI2017.txt', 'r') as f:

        renmin=f.read()

    jieba.load_userdict("AIDict.txt")  #添加词典

    seg_list = jieba.cut(renmin, cut_all=False) #分词

    tf = {}                  #统计词频

    for seg in seg_list:

        if seg in tf:    # 如果该键在集合tf的对象中,则该键所属对象值加1

            tf[seg] +=1

        else:            #否则,生成新词的键值对,初始值为1

            tf[seg] = 1

    ci=list(tf.keys())        #将字典的健值转为列表

    with open('stopword.txt','r') as ft:

        stopword=ft.read()

    for seg in ci:

        if tf[seg]<5 or len(seg)<2 or seg in stopword or '一'in seg:

            tf.pop(seg)

    #print(tf)

    #统计词频后绘制词云图

    from wordcloud import WordCloud,ImageColorGenerator

    import matplotlib.pyplot as plt

    from PIL import Image

    import numpy as np

    from scipy.misc import imread

    mask_img=np.array(Image.open("heart.png"))

    font=r'c:\Windows\Fonts\simfang.ttf'

    wc=WordCloud(background_color="white",mask=mask_img,collocations=False,font_path=font, max_font_size=200,width=1600,height=500,margin=0).generate_from_frequencies(tf)

    plt.imshow(wc)

    plt.axis('off')

    plt.show()

    # 基于彩色图像生成相应彩色

    image_colors = ImageColorGenerator(mask_img)

    plt.imshow(wc.recolor(color_func=image_colors))

    plt.axis('off')

    plt.show()

    wc.to_file('AI.jpg')

    相关文章

      网友评论

          本文标题:Jieba分词词频统计绘制词云图Wordcloud套图(2018

          本文链接:https://www.haomeiwen.com/subject/bewlxqtx.html