美文网首页
小案例:红楼梦人物热度分析

小案例:红楼梦人物热度分析

作者: code与有荣焉 | 来源:发表于2019-10-24 14:00 被阅读0次
import jieba
from wordcloud import WordCloud
import imageio
# 自定义词云形状
mask = imageio.imread("china.jpg")
# 打开文件,进行分词
with open('hongloumeng.txt', 'r', encoding='UTF-8') as f:
    data = f.read()
    word_list = jieba.lcut(data)
    # 建立一个集合用于存储无关词,方便筛选,用集合是由于其去重性
    excludes = {"什么", "一个", "我们", "你们", "如今", "说道", "知道", "姑娘", "起来", "这里",
                "出来", "众人", "那里", "自己", "太太", "一面", "只见", "两个", "怎么", "不是",
                "不知", "这个", "听见", "这样", "进来", "咱们", "就是", "东西", "告诉", "回来",
                "只是", "大家", "只得", "丫头", "这些", "他们", "不敢", "出去", "所以", "不过",
                "不好", "姐姐", "的话", "一时", "鸳鸯", "过来", "不能", "心里", "她们", "如此",
                "银子", "今日", "二人", "答应", "几个", "这么", "还有", "只管", "说话", "那边",
                "奶奶", "老太太", "贾政", "凤姐儿", "没有", "贾宝玉", "宝二爷", '老祖宗'}   # 截到至333频次
     # "",
    # 建立一个字典用于存储信息
    counts = {}
    # 去掉长度小于等于1的数据,筛选出名字,添加到字典中
    for words in word_list:
        if len(words) <= 1:
            continue
        else:
            counts[words] = counts.get(words, 0) + 1

    # 进行排序

    # 合并同义词
    counts['贾母'] = counts['老太太'] + counts['老祖宗'] + counts['贾母']
    counts['老爷'] += counts['贾政']
    counts['凤姐'] += counts['凤姐儿']
    counts['宝玉'] = counts['贾宝玉'] + counts['宝玉'] + counts['宝二爷']
    # 删除excludes 中的 无关词
    for word in excludes:
        del counts[word]
    # 转化为列表
    sort_list = list(counts.items())
    # print(sort_list)
    # 对列表进行排序
    sort_list = sorted(sort_list, key=lambda x: x[1], reverse=True)
    print(sort_list)
    # 先取出前20为了合并词语与去除无关词
    # for i in range(20):
    #     role, count = sort_list[i]
    #     print(role, count)
    # 筛选出出出现次数前10的人名,放入list1中便于生成词云
    list1 = []
    for i in range(10):
        role, count = sort_list[i]
        print(role, count)
        for _ in range(count):
            list1.append(role)
    print(list1)
    # 把list1转化为字符串
    text = ' '.join(list1)

    # 这个print用于查找这个人名是否存在,方便进行同义词合并
    # print(counts['人名'])

    # 生成词云
    WordCloud(
        background_color="white",
        font_path='msyh.ttc',
        mask=mask,
        collocations=False
    ).generate(text).to_file('红楼梦人物分析.png')

生成词云

相关文章

网友评论

      本文标题:小案例:红楼梦人物热度分析

      本文链接:https://www.haomeiwen.com/subject/avxrvctx.html