python实现微博评论情绪分析，并生成情绪时空分布图

作者: Cache_wood | 来源:发表于2021-10-20 01:01 被阅读0次

python实现微博评论情绪分析，并生成情绪时空分布图
2020-03-30
python爬虫：微博评论分析
网易云音乐评论抓取实验(1)接口获取
Python语言结合机器学习算法进行微博预测
1A04第九周作业
预判是分析，情绪是表达
利用rwda包抓取微博评论并分析
利用python微信库itchat实现微信自动回复功能，pyth
《基于Python实现的微信好友数据分析》分享下载

import jieba   #用于分词
import re  #正则匹配
import matplotlib.pyplot as plt  #绘图
from pyecharts.charts import Geo
from pyecharts import options as opts
from pyecharts.globals import GeoType

#jieba添加自定义字典
def addword(filename):
    path = '.\emotion_lexicon\\'
    for i in range(len(filename)):
        jieba.load_userdict(path + filename[i]) 

#评论清洗，去除网址url,@,无意义内容等
def cleanword():        
    with open('weibo.txt','r',encoding='utf-8') as f:
        txt = f.readlines()
        for text in txt:
            #print(text)
            if '分享图片' in text:
                continue
            text = re.sub(r"(回复)?(//)?\s*@\S*?\s*(:| |$)", " ", text)  # 去除正文中的@和回复/转发中的用户名
            text = re.sub(r"\[\S+\]", "", text)      # 去除表情符号
            # text = re.sub(r"#\S+#", "", text)      # 保留话题内容
            URL_REGEX = re.compile(
                r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))',
                re.IGNORECASE)
            text = re.sub(URL_REGEX, "", text)       # 去除网址
            text = text.replace("我在:", "")       # 去除无意义的词语
            text = text.replace("我在这里:","")
            text = re.sub(r"\s+", " ", text) # 合并正文中过多的空格

            file = open('cleanword.txt','a',encoding='utf-8')
            file.write(text+'\n')

#构造闭包，导入情绪字典，并分析评论的情绪
def emodict(filename):
    emodict = []
    path = '.\emotion_lexicon\\'
    for i in range(len(filename)):
        file = open(path + filename[i],'r',encoding='utf-8')
        emodict.append([line.strip() for line in file.readlines()])
        file.close()

    def splitword():  #分词获取情绪以及对应的时间地点
        nonlocal emodict
        emotion_list,time_list,address_list = [],[],[]
        with open('cleanword.txt','r',encoding='utf-8') as f:
            txt = f.readlines()
            for line in txt:
                sline = line.strip().split(' +0800 2013 ')
                address = sline[-1]
                sline = sline[0].split()
                time = sline[-4:]
                sentence = ''.join(sline[:-4])
                #print(sentence)
                
                emotion_dict = {'anger':0,'disgust':0,'fear':0,'joy':0,'sadness':0}

                splitword = jieba.lcut(sentence)
                    #print(splitword)
                for word in splitword:
                    if word in emodict[0]:
                        emotion_dict['anger']+=1
                    elif word in emodict[1]:
                        emotion_dict['disgust']+=1
                    elif word in emodict[2]:
                        emotion_dict['fear']+=1
                    elif word in emodict[3]:
                        emotion_dict['joy'] +=1
                    elif word in emodict[4]:
                        emotion_dict['sadness']+=1
                if max(emotion_dict.values())==0:
                    emotion = 'no'
                else:
                    emotion = max(emotion_dict,key=emotion_dict.get)
                emotion_list.append(emotion)
                time_list.append(time)
                address_list.append(eval(address))
                #print(emotion_dict,end = ' ')
            print(len(emotion_list))
            print(len(time_list))
            print(len(address_list))
        return emotion_list,time_list,address_list

    return splitword

#绘制某种情绪咋某个时间模式下的变化趋势
def plotime(emotion,time,emotion_list,time_list):
    week = ['Mon','Tus','Wed','Ths','Fri','Sat','Sun']
    week_dict = {}
    week_dict = week_dict.fromkeys(week,0)

    month = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    month_dict = {}
    month_dict = month_dict.fromkeys(month,0)

    hour = ['{:0>2d}'.format(i) for i in range(24)]
    hour_dict = {}
    hour_dict = hour_dict.fromkeys(hour,0)

    if time == 'week':
        for tm in time_list:
            if emotion_list[time_list.index(tm)] == emotion:
                week_dict[tm[0]] += 1
        week_value = []
        for value in week_dict.values():
            week_value.append(value)
        plt.plot(week,week_value,'o-',color='r',label='week_{}'.format(emotion))
        plt.xlabel("week")#横坐标名字
        plt.ylabel("times")#纵坐标名字
        plt.legend(loc = "best")#图例
        for a,b in zip(week,week_value):
            plt.text(a,b+1,b,ha = 'center',va = 'bottom',fontsize=10)
        #print(week_dict)
    elif time == 'month':
        for tm in time_list:
            if emotion_list[time_list.index(tm)] == emotion:
                month_dict[tm[1]] += 1    
        month_value = []
        for value in month_dict.values():
            month_value.append(value)
        plt.plot(month,month_value,'o-',color='b',label='month_{}'.format(emotion))
        plt.xlabel("month")#横坐标名字
        plt.ylabel("times")#纵坐标名字
        plt.legend(loc = "best")#图例
        for a,b in zip(month,month_value):
            plt.text(a,b+1,b,ha = 'center',va = 'bottom',fontsize=10)
        #print(month_dict)    
    elif time == 'hour':
        for tm in time_list:
            if emotion_list[time_list.index(tm)] == emotion:
                tm = tm[-1].split(':')
                hour_dict[tm[0]] += 1
        hour_value = []
        for value in hour_dict.values():
            hour_value.append(value)
        plt.plot(hour,hour_value,'o-',color='y',label='hour_{}'.format(emotion))
        plt.xlabel("hour")#横坐标名字
        plt.ylabel("times")#纵坐标名字
        plt.legend(loc = "best")#图例
        for a,b in zip(hour,hour_value):
            plt.text(a,b+1,b,ha = 'center',va = 'bottom',fontsize=10)
        #print(hour_dict)
    else:
        print('enter error!')
    plt.savefig('{}_{}.png'.format(time,emotion),dpi=800)
    plt.show()

#以某点为中心，r为半径的区域内各种情绪的占比
def distance(emotion_list,address_list,k=50,r=0.3):
    emo1 = {'sadness':0,'joy':0,'fear':0,'disgust':0,'anger':0}
    center = address_list[k]
    for i in range(len(address_list)):
        if emotion_list[i]!='no':
            if sum([(center[j]-address_list[i][j])**2 for j in range(2)]) <= r**2:
                emo1[emotion_list[i]] +=1
    print(emo1)
    plt.figure(figsize=(6,9)) #调节图形大小
    labels = ['sadness','joy','fear','disgust','anger'] #定义标签
    sizes = []
    for emo in emo1.values():
        sizes.append(emo)
    if sum(sizes)==0:
        print('No message in the area!')
    colors = ['red','yellow','green','blue','pink'] #每块颜色定义
    explode = (0,0,0,0,0) #将某一块分割出来，值越大分割出的间隙越大
    plt.pie(sizes,explode=explode,labels=labels,colors=colors,
            autopct = '%3.2f%%', #数值保留固定小数位
            shadow = False, #无阴影设置
            startangle =90, #逆时针起始角度设置
            pctdistance = 0.6) #数值距圆心半径倍数距离
    #patches饼图的返回值，texts1饼图外label的文本，texts2饼图内部的文本
    # x，y轴刻度设置一致，保证饼图为圆形
    plt.axis('equal')
    plt.savefig('pie.png',dpi=800)
    plt.show()

#在北京地图上标注出不同情绪的空间分布
def test_geo(emotion_list,address_list):
    emo = {'sadness':5,'joy':15,'fear':25,'disgust':35,'anger':45}
    g = Geo()
    #print(address_list)
    data_pair = []
    g.add_schema(maptype='北京')
    for k in range(len(emotion_list)):
        #address_list[k] = eval(address_list[k])
        if emotion_list[k] !='no':
            data_pair.append((emotion_list[k]+str(k),emo[emotion_list[k]]))  
            #print(type(address_list[k]))
            g.add_coordinate(emotion_list[k]+str(k),address_list[k][1],address_list[k][0])
                # 定义坐标对应的名称，添加到坐标库中 add_coordinate(name, lng, lat)
    # 将数据添加到地图上
    #print(data_pair)
    g.add('', data_pair, type_=GeoType.EFFECT_SCATTER, symbol_size=5)
    # 设置样式
    g.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    # 自定义分段 color 可以用取色器取色
    pieces = [
        {'min': 1, 'max': 10, 'label': 'sadness', 'color': '#3700A4'},
        {'min': 10, 'max': 20, 'label': 'joy', 'color': '#81AE9F'},
        {'min': 20, 'max': 30, 'label': 'fear', 'color': '#E2C568'},
        {'min': 30, 'max': 40, 'label': 'disgust', 'color': '#FCF84D'},
        {'min': 40, 'max': 50, 'label': 'anger', 'color': '#DD0200'}
    ]
    #  is_piecewise 是否自定义分段， 变为true 才能生效
    g.set_global_opts(
        visualmap_opts=opts.VisualMapOpts(is_piecewise=True, pieces=pieces),
        title_opts=opts.TitleOpts(title="北京-情绪分布"),
    )
    return g

#主函数调用各函数
def main():
    filename = ['anger.txt','disgust.txt','fear.txt','joy.txt','sadness.txt']
    addword(filename)
    cleanword()
    f1 = emodict(filename)
    emotion_list,time_list,address_list = f1()
    emotion = input('please enter the emotion:')
    time = input('please enter the time:')
    plotime(emotion,time,emotion_list,time_list)
    distance(emotion_list,address_list)
    
    g = test_geo(emotion_list,address_list)   
    g.render('test_render.html')   # 渲染成html, 可用浏览器直接打开

#调用主函数
if __name__ == '__main__':
    main()

原始评论 weibo.txt

分享图片 我在这里:http://t.cn/z8L6aJV   Fri Oct 11 21:25:07 +0800 2013  [39.88293, 116.37024]
 @高娅洁 是黑妹吗？ 我在:http://t.cn/zRGIa79  Fri Oct 11 19:44:31 +0800 2013  [39.964324, 116.354873]
男士秋冬新款小脚裤！ 我在:http://t.cn/zRq5Uhl   Sat Oct 12 21:05:40 +0800 2013  [39.83868, 116.37965]
楚国的灭亡皆是因接受了贿赂，让她的小伎俩离自己远一点 我在:http://t.cn/zRGqrYC   Fri Oct 11 17:39:22 +0800 2013  [39.929925, 116.42866]
我们的志愿者在全国盲人柔道锦标赛现场 我在:http://t.cn/zRbkKjP   Fri Oct 11 10:45:42 +0800 2013  [39.935349, 116.283485]
还没睡醒就被抓来补课 @悠长的  [偷笑]请问新店是只对理工科开放么？文科生在此好惶恐@jimmyzhuang [思考]    Fri Oct 11 12:44:07 +0800 2013  [39.926456, 116.450493]
cx.                   :     说我不真实，我活在现实，0K？ 我在这里:http://t.cn/z8AtYtq    Fri Oct 11 20:43:56 +0800 2013  [39.84068, 116.31896]

清洗之后的cleanword.txt

清洗之后去掉了无意义评论，以及网址，@，表情包等等无意义内容

 是黑妹吗？ Fri Oct 11 19:44:31 +0800 2013 [39.964324, 116.354873] 
男士秋冬新款小脚裤！ Sat Oct 12 21:05:40 +0800 2013 [39.83868, 116.37965] 
楚国的灭亡皆是因接受了贿赂，让她的小伎俩离自己远一点 Fri Oct 11 17:39:22 +0800 2013 [39.929925, 116.42866] 
我们的志愿者在全国盲人柔道锦标赛现场 Fri Oct 11 10:45:42 +0800 2013 [39.935349, 116.283485] 
还没睡醒就被抓来补课 请问新店是只对理工科开放么？文科生在此好惶恐 Fri Oct 11 12:44:07 +0800 2013 [39.926456, 116.450493] 
cx. : 说我不真实，我活在现实，0K？ Fri Oct 11 20:43:56 +0800 2013 [39.84068, 116.31896]

emotion_list (如果没有情绪词定义为'no',其他五种情绪正常)

['no', 'no', 'disgust', 'joy', 'fear', 'no', 'no', 'joy', 'anger', 'joy', 'no', 'no', 'no', 'disgust', 'no', 'joy', 'sadness', 'joy', 'sadness', 'joy', 'joy', 'sadness', 'no', 'joy', 'no', 'joy', 'joy', 'sadness', 'sadness', 'joy', 'no', 'no', 'joy', 'joy', 'no', 'no', 'no', 'no', 'no', 'sadness', 'no', 'joy', 'no', 'no', 'joy', 'fear', 'no', 'no', 'joy', 'disgust', 'no', 'no', 'anger', 'joy', 'no', 'no', 'joy', 'no', 'sadness', 'joy', 'joy', 'no', 'no', 'no', 'joy', 'joy', 'joy', 'no', 'no', 'no', 'joy', 'no', 'joy', 
'no', 'joy', 'no', 'no', 'joy', 'no', 'no', 'sadness', 'fear', 'no', 'anger', 'no', 'no', 'no', 'joy', 'disgust', 'no', 'no', 'joy', 'sadness', 'no', 'fear', 'joy', 'no', 'sadness', 'sadness', 'no', 'no', 'no', 'no', 'joy', 'joy', 'no', 'sadness', 'no', 'no', 'no', 'sadness', 'joy', 'fear', 'no', 'sadness', 'joy', 'no', 'no', 'no', 'sadness',

情绪的时间变化趋势

五种情绪配置三种时间模式，共有15种图像

情绪的空间比例分布

以某一坐标为原点，某一距离为半径的范围内各种情绪的分布比例图

情绪的空间具体分布并用不同颜色的点表示

python实现微博评论情绪分析，并生成情绪时空分布图
原始评论 weibo.txt 清洗之后的cleanword.txt 清洗之后去掉了无意义评论，以及网址，@，表情包...
2020-03-30
张文宏医生说他不开微博，因为扰乱情绪，看到不好的评论，会影响情绪，看到好的评论，也会影响情绪，所以，还是不开微博的...
python爬虫：微博评论分析
最近王和李的离婚闹得沸沸扬扬，相信大伙们都已经吃了不少的瓜。本文结合李的第一篇文章发文下面的网友们的评论来看看大家...
网易云音乐评论抓取实验(1)接口获取
（后续文章已更新：网易云音乐评论抓取实验(2)朴素贝叶斯入门：通过概率对评论情绪分类）上篇文章Python实现电...
Python语言结合机器学习算法进行微博预测
本文是基于Python语言结合基础的机器学习算法来对微博传播广度下的微博转发次数来进行预测的，并分析了微博在转发过...
1A04第九周作业
1、在层间日记记录自己情绪反位，并分析情绪问题的根源，并寻求改善。
预判是分析，情绪是表达
四词故事:预判、情绪、内容、评论预判是分析，情绪是表达，内容是展现，评论是互动。预判是分析预判是我们对于一个...
利用rwda包抓取微博评论并分析
工具：Rstudio，R(3.3.4) 参考文章： R微博数据分析用R爬取微博评论数据
利用python微信库itchat实现微信自动回复功能，pyth
利用python微信库itchat实现微信自动回复功能，pythonitchat 用Python登录微信并实现自动...
《基于Python实现的微信好友数据分析》分享下载
书籍信息书名: 基于Python实现的微信好友数据分析标签: 基于Python实现的微信好友数据分析,免费,程...

python实现微博评论情绪分析，并生成情绪时空分布图

情绪的时间变化趋势

情绪的空间比例分布

情绪的空间具体分布并用不同颜色的点表示

相关文章

python实现微博评论情绪分析，并生成情绪时空分布图

2020-03-30

python爬虫：微博评论分析

网易云音乐评论抓取实验(1)接口获取

Python语言结合机器学习算法进行微博预测

1A04第九周作业

预判是分析，情绪是表达

利用rwda包抓取微博评论并分析

利用python微信库itchat实现微信自动回复功能，pyth

《基于Python实现的微信好友数据分析》分享下载

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

想法

散文

简友广场