美文网首页想法散文简友广场
python实现微博评论情绪分析,并生成情绪时空分布图

python实现微博评论情绪分析,并生成情绪时空分布图

作者: Cache_wood | 来源:发表于2021-10-20 01:01 被阅读0次
import jieba   #用于分词
import re  #正则匹配
import matplotlib.pyplot as plt  #绘图
from pyecharts.charts import Geo
from pyecharts import options as opts
from pyecharts.globals import GeoType

#jieba添加自定义字典
def addword(filename):
    path = '.\emotion_lexicon\\'
    for i in range(len(filename)):
        jieba.load_userdict(path + filename[i]) 

#评论清洗,去除网址url,@,无意义内容等
def cleanword():        
    with open('weibo.txt','r',encoding='utf-8') as f:
        txt = f.readlines()
        for text in txt:
            #print(text)
            if '分享图片' in text:
                continue
            text = re.sub(r"(回复)?(//)?\s*@\S*?\s*(:| |$)", " ", text)  # 去除正文中的@和回复/转发中的用户名
            text = re.sub(r"\[\S+\]", "", text)      # 去除表情符号
            # text = re.sub(r"#\S+#", "", text)      # 保留话题内容
            URL_REGEX = re.compile(
                r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))',
                re.IGNORECASE)
            text = re.sub(URL_REGEX, "", text)       # 去除网址
            text = text.replace("我在:", "")       # 去除无意义的词语
            text = text.replace("我在这里:","")
            text = re.sub(r"\s+", " ", text) # 合并正文中过多的空格

            file = open('cleanword.txt','a',encoding='utf-8')
            file.write(text+'\n')

#构造闭包,导入情绪字典,并分析评论的情绪
def emodict(filename):
    emodict = []
    path = '.\emotion_lexicon\\'
    for i in range(len(filename)):
        file = open(path + filename[i],'r',encoding='utf-8')
        emodict.append([line.strip() for line in file.readlines()])
        file.close()

    def splitword():  #分词获取情绪以及对应的时间地点
        nonlocal emodict
        emotion_list,time_list,address_list = [],[],[]
        with open('cleanword.txt','r',encoding='utf-8') as f:
            txt = f.readlines()
            for line in txt:
                sline = line.strip().split(' +0800 2013 ')
                address = sline[-1]
                sline = sline[0].split()
                time = sline[-4:]
                sentence = ''.join(sline[:-4])
                #print(sentence)
                
                emotion_dict = {'anger':0,'disgust':0,'fear':0,'joy':0,'sadness':0}

                splitword = jieba.lcut(sentence)
                    #print(splitword)
                for word in splitword:
                    if word in emodict[0]:
                        emotion_dict['anger']+=1
                    elif word in emodict[1]:
                        emotion_dict['disgust']+=1
                    elif word in emodict[2]:
                        emotion_dict['fear']+=1
                    elif word in emodict[3]:
                        emotion_dict['joy'] +=1
                    elif word in emodict[4]:
                        emotion_dict['sadness']+=1
                if max(emotion_dict.values())==0:
                    emotion = 'no'
                else:
                    emotion = max(emotion_dict,key=emotion_dict.get)
                emotion_list.append(emotion)
                time_list.append(time)
                address_list.append(eval(address))
                #print(emotion_dict,end = ' ')
            print(len(emotion_list))
            print(len(time_list))
            print(len(address_list))
        return emotion_list,time_list,address_list

    return splitword

#绘制某种情绪咋某个时间模式下的变化趋势
def plotime(emotion,time,emotion_list,time_list):
    week = ['Mon','Tus','Wed','Ths','Fri','Sat','Sun']
    week_dict = {}
    week_dict = week_dict.fromkeys(week,0)

    month = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
    month_dict = {}
    month_dict = month_dict.fromkeys(month,0)

    hour = ['{:0>2d}'.format(i) for i in range(24)]
    hour_dict = {}
    hour_dict = hour_dict.fromkeys(hour,0)

    if time == 'week':
        for tm in time_list:
            if emotion_list[time_list.index(tm)] == emotion:
                week_dict[tm[0]] += 1
        week_value = []
        for value in week_dict.values():
            week_value.append(value)
        plt.plot(week,week_value,'o-',color='r',label='week_{}'.format(emotion))
        plt.xlabel("week")#横坐标名字
        plt.ylabel("times")#纵坐标名字
        plt.legend(loc = "best")#图例
        for a,b in zip(week,week_value):
            plt.text(a,b+1,b,ha = 'center',va = 'bottom',fontsize=10)
        #print(week_dict)
    elif time == 'month':
        for tm in time_list:
            if emotion_list[time_list.index(tm)] == emotion:
                month_dict[tm[1]] += 1    
        month_value = []
        for value in month_dict.values():
            month_value.append(value)
        plt.plot(month,month_value,'o-',color='b',label='month_{}'.format(emotion))
        plt.xlabel("month")#横坐标名字
        plt.ylabel("times")#纵坐标名字
        plt.legend(loc = "best")#图例
        for a,b in zip(month,month_value):
            plt.text(a,b+1,b,ha = 'center',va = 'bottom',fontsize=10)
        #print(month_dict)    
    elif time == 'hour':
        for tm in time_list:
            if emotion_list[time_list.index(tm)] == emotion:
                tm = tm[-1].split(':')
                hour_dict[tm[0]] += 1
        hour_value = []
        for value in hour_dict.values():
            hour_value.append(value)
        plt.plot(hour,hour_value,'o-',color='y',label='hour_{}'.format(emotion))
        plt.xlabel("hour")#横坐标名字
        plt.ylabel("times")#纵坐标名字
        plt.legend(loc = "best")#图例
        for a,b in zip(hour,hour_value):
            plt.text(a,b+1,b,ha = 'center',va = 'bottom',fontsize=10)
        #print(hour_dict)
    else:
        print('enter error!')
    plt.savefig('{}_{}.png'.format(time,emotion),dpi=800)
    plt.show()

#以某点为中心,r为半径的区域内各种情绪的占比
def distance(emotion_list,address_list,k=50,r=0.3):
    emo1 = {'sadness':0,'joy':0,'fear':0,'disgust':0,'anger':0}
    center = address_list[k]
    for i in range(len(address_list)):
        if emotion_list[i]!='no':
            if sum([(center[j]-address_list[i][j])**2 for j in range(2)]) <= r**2:
                emo1[emotion_list[i]] +=1
    print(emo1)
    plt.figure(figsize=(6,9)) #调节图形大小
    labels = ['sadness','joy','fear','disgust','anger'] #定义标签
    sizes = []
    for emo in emo1.values():
        sizes.append(emo)
    if sum(sizes)==0:
        print('No message in the area!')
    colors = ['red','yellow','green','blue','pink'] #每块颜色定义
    explode = (0,0,0,0,0) #将某一块分割出来,值越大分割出的间隙越大
    plt.pie(sizes,explode=explode,labels=labels,colors=colors,
            autopct = '%3.2f%%', #数值保留固定小数位
            shadow = False, #无阴影设置
            startangle =90, #逆时针起始角度设置
            pctdistance = 0.6) #数值距圆心半径倍数距离
    #patches饼图的返回值,texts1饼图外label的文本,texts2饼图内部的文本
    # x,y轴刻度设置一致,保证饼图为圆形
    plt.axis('equal')
    plt.savefig('pie.png',dpi=800)
    plt.show()

#在北京地图上标注出不同情绪的空间分布
def test_geo(emotion_list,address_list):
    emo = {'sadness':5,'joy':15,'fear':25,'disgust':35,'anger':45}
    g = Geo()
    #print(address_list)
    data_pair = []
    g.add_schema(maptype='北京')
    for k in range(len(emotion_list)):
        #address_list[k] = eval(address_list[k])
        if emotion_list[k] !='no':
            data_pair.append((emotion_list[k]+str(k),emo[emotion_list[k]]))  
            #print(type(address_list[k]))
            g.add_coordinate(emotion_list[k]+str(k),address_list[k][1],address_list[k][0])
                # 定义坐标对应的名称,添加到坐标库中 add_coordinate(name, lng, lat)
    # 将数据添加到地图上
    #print(data_pair)
    g.add('', data_pair, type_=GeoType.EFFECT_SCATTER, symbol_size=5)
    # 设置样式
    g.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    # 自定义分段 color 可以用取色器取色
    pieces = [
        {'min': 1, 'max': 10, 'label': 'sadness', 'color': '#3700A4'},
        {'min': 10, 'max': 20, 'label': 'joy', 'color': '#81AE9F'},
        {'min': 20, 'max': 30, 'label': 'fear', 'color': '#E2C568'},
        {'min': 30, 'max': 40, 'label': 'disgust', 'color': '#FCF84D'},
        {'min': 40, 'max': 50, 'label': 'anger', 'color': '#DD0200'}
    ]
    #  is_piecewise 是否自定义分段, 变为true 才能生效
    g.set_global_opts(
        visualmap_opts=opts.VisualMapOpts(is_piecewise=True, pieces=pieces),
        title_opts=opts.TitleOpts(title="北京-情绪分布"),
    )
    return g

#主函数调用各函数
def main():
    filename = ['anger.txt','disgust.txt','fear.txt','joy.txt','sadness.txt']
    addword(filename)
    cleanword()
    f1 = emodict(filename)
    emotion_list,time_list,address_list = f1()
    emotion = input('please enter the emotion:')
    time = input('please enter the time:')
    plotime(emotion,time,emotion_list,time_list)
    distance(emotion_list,address_list)
    
    g = test_geo(emotion_list,address_list)   
    g.render('test_render.html')   # 渲染成html, 可用浏览器直接打开

#调用主函数
if __name__ == '__main__':
    main()

原始评论 weibo.txt

分享图片 我在这里:http://t.cn/z8L6aJV   Fri Oct 11 21:25:07 +0800 2013  [39.88293, 116.37024]
 @高娅洁 是黑妹吗? 我在:http://t.cn/zRGIa79  Fri Oct 11 19:44:31 +0800 2013  [39.964324, 116.354873]
男士秋冬新款小脚裤! 我在:http://t.cn/zRq5Uhl   Sat Oct 12 21:05:40 +0800 2013  [39.83868, 116.37965]
楚国的灭亡皆是因接受了贿赂,让她的小伎俩离自己远一点 我在:http://t.cn/zRGqrYC   Fri Oct 11 17:39:22 +0800 2013  [39.929925, 116.42866]
我们的志愿者在全国盲人柔道锦标赛现场 我在:http://t.cn/zRbkKjP   Fri Oct 11 10:45:42 +0800 2013  [39.935349, 116.283485]
还没睡醒就被抓来补课 @悠长的  [偷笑]请问新店是只对理工科开放么?文科生在此好惶恐@jimmyzhuang [思考]    Fri Oct 11 12:44:07 +0800 2013  [39.926456, 116.450493]
cx.                   :     说我不真实,我活在现实,0K? 我在这里:http://t.cn/z8AtYtq    Fri Oct 11 20:43:56 +0800 2013  [39.84068, 116.31896]

清洗之后的cleanword.txt

清洗之后去掉了无意义评论,以及网址,@,表情包等等无意义内容

 是黑妹吗? Fri Oct 11 19:44:31 +0800 2013 [39.964324, 116.354873] 
男士秋冬新款小脚裤! Sat Oct 12 21:05:40 +0800 2013 [39.83868, 116.37965] 
楚国的灭亡皆是因接受了贿赂,让她的小伎俩离自己远一点 Fri Oct 11 17:39:22 +0800 2013 [39.929925, 116.42866] 
我们的志愿者在全国盲人柔道锦标赛现场 Fri Oct 11 10:45:42 +0800 2013 [39.935349, 116.283485] 
还没睡醒就被抓来补课 请问新店是只对理工科开放么?文科生在此好惶恐 Fri Oct 11 12:44:07 +0800 2013 [39.926456, 116.450493] 
cx. : 说我不真实,我活在现实,0K? Fri Oct 11 20:43:56 +0800 2013 [39.84068, 116.31896] 

emotion_list (如果没有情绪词定义为'no',其他五种情绪正常)

['no', 'no', 'disgust', 'joy', 'fear', 'no', 'no', 'joy', 'anger', 'joy', 'no', 'no', 'no', 'disgust', 'no', 'joy', 'sadness', 'joy', 'sadness', 'joy', 'joy', 'sadness', 'no', 'joy', 'no', 'joy', 'joy', 'sadness', 'sadness', 'joy', 'no', 'no', 'joy', 'joy', 'no', 'no', 'no', 'no', 'no', 'sadness', 'no', 'joy', 'no', 'no', 'joy', 'fear', 'no', 'no', 'joy', 'disgust', 'no', 'no', 'anger', 'joy', 'no', 'no', 'joy', 'no', 'sadness', 'joy', 'joy', 'no', 'no', 'no', 'joy', 'joy', 'joy', 'no', 'no', 'no', 'joy', 'no', 'joy', 
'no', 'joy', 'no', 'no', 'joy', 'no', 'no', 'sadness', 'fear', 'no', 'anger', 'no', 'no', 'no', 'joy', 'disgust', 'no', 'no', 'joy', 'sadness', 'no', 'fear', 'joy', 'no', 'sadness', 'sadness', 'no', 'no', 'no', 'no', 'joy', 'joy', 'no', 'sadness', 'no', 'no', 'no', 'sadness', 'joy', 'fear', 'no', 'sadness', 'joy', 'no', 'no', 'no', 'sadness', 

情绪的时间变化趋势

五种情绪配置三种时间模式,共有15种图像




情绪的空间比例分布

以某一坐标为原点,某一距离为半径的范围内各种情绪的分布比例图


情绪的空间具体分布并用不同颜色的点表示

相关文章

网友评论

    本文标题:python实现微博评论情绪分析,并生成情绪时空分布图

    本文链接:https://www.haomeiwen.com/subject/umhboltx.html