美文网首页
Python绘图练习

Python绘图练习

作者: C_Z_Q_ | 来源:发表于2019-10-30 11:31 被阅读0次

1.豆瓣即将上映电影top5(条形图、横向条形图、电影国家占比饼图)

from xpinyin import Pinyin
import requests
from lxml import html
from matplotlib import pyplot as plt
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import pandas as pd
# pip install xpinyin
def spider(city):
    # splitter 是分隔使用符号,默认是 '-'
    city_pinyin = Pinyin().get_pinyin(city,splitter='')
    url = 'https://movie.douban.com/cinema/later/{}/'.format(city_pinyin)
    print('您要爬取的目标站点是', url)
    print('爬虫进行中,请稍后.........')
    # 请求头信息, 目的是伪装成浏览器进行爬虫
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}
    # 获取网页的源代码
    response = requests.get(url, headers = headers)
    html_data = response.text
    # print(html_data)
    # 提取我们想要的内容
    selector = html.fromstring(html_data)
    div_list = selector.xpath('//div[@id="showing-soon"]/div')
    print('您好,{}市共查询到{}部即将上映的电影'.format(city, len(div_list)))
    movie_info_list = []
    counts={}
    for div in div_list:
        # 获取电影名字
        movie_name = div.xpath('div[1]/h3/a/text()')
        # if len(movie_name)==0:
        #     movie_name = '没有查询到数据'
        # else:
        #     movie_name = movie_name[0]
        movie_name = '对不起,没有查询到数据' if len(movie_name) == 0 else movie_name[0]
        # print(movie_name)

        # 上映日期
        date = div.xpath('div[1]/ul/li[1]/text()')
        date = '对不起,没有查询到数据' if len(date) == 0 else date[0]
        # print(date)

        # 类型
        type = div.xpath('div[1]/ul/li[2]/text()')
        type = '对不起,没有查询到数据' if len(type) == 0 else type[0]
        # print(type)

        # 国家
        country = div.xpath('div[1]/ul/li[3]/text()')
        country = '对不起,没有查询到数据' if len(country) == 0 else country[0]
        # print(country)

        # 想看人数
        want_see = div.xpath('div[1]/ul/li[4]/span/text()')
        want_see = '对不起,没有查询到数据' if len(want_see) == 0 else want_see[0]
        want_see = int(want_see.replace('人想看', ''))
        # print(want_see)

        # 图片链接
        img_link = div.xpath('a/img/@src')[0]


        movie_info_list.append({
            "movie_name": movie_name,
            "date": date,
            "type": type,
            "country": country,
            "want_see": want_see,
            "img_link": img_link
            })
    for movie in movie_info_list:
        counts[movie['country']] = counts.get(movie['country'], 0) +1
    # print(counts)
    labels = list(counts.keys())
    num = list(counts.values())
    # print(num)
    # print(labels)

    movie_info_list.sort(key=lambda x: x['want_see'],reverse=True)
    # y = [movie['want_see'] for movie in movie_info_list]
    # y = [y[i] for i in range(5)]
    # print(y)
    # x = [movie['movie_name'] for movie in movie_info_list]
    # x = [x[i] for i in range(5)]
    # print(x)
    #优化版
    x = [movie['movie_name'] for movie in movie_info_list[:5]]       
    y =[movie['want_see'] for movie in movie_info_list[:5]]



    #条形图
    plt.bar(x, y)
    plt.grid()
    plt.title('豆瓣即将上映热度前五')
    plt.xlabel('电影名')
    plt.ylabel('热度')
    plt.show()
    #横向条形图
    plt.barh(x, y)
    plt.grid()
    plt.title('豆瓣即将上映热度前五')
    plt.ylabel('电影名')
    plt.xlabel('热度')
    plt.show()
    #饼状图
    colors = ['red', 'green', 'yellow']
    # 每一个元素距离中心点的距离,可选值0~1
    explode = [0, 0, 0, 0, 0, 0, 0, 0.2]
    plt.pie(x=num,
            labels=labels,
            colors=colors,
            shadow=True,
            startangle=270,  # 开始角度
            # explode=explode,
            autopct='%1.1f%%'  # 显示百分比
            )
    plt.axis('equal')  # 设置成标准圆形
    plt.legend(loc=2)  # 指定象线
    plt.title('电影国家占比')
    plt.show()

    # 在屏幕中输入请输入您要查看即将上映电影信息的城市
city = input('输入您要查看即将上映电影信息的城市')
    # 调用函数
spider(city)
条形图 横向条形图 电影国家占比

2.三国人物分析top10(条形图和饼状图)

import jieba
from matplotlib import pyplot as plt

#设置支持中文字体
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
#import imageio
# 读取文件
#mask = imageio.imread('china.jpg')
with open('novel/threekingdom.txt', 'r', encoding='UTF-8') as f:
    data = f.read()

    # 分词
    words_list = jieba.lcut(data)
    # print(words_list)

    #  构建一个集合,定义无关词
    excludes = {"将军","却说","二人","不可","荆州","不能","如此","丞相",
                "商议","如何","主公","军士","军马","左右","次日","引兵",
                "大喜","天下","东吴","于是","今日","不敢","魏兵","陛下",
                "都督","人马","不知","孔明曰","玄德曰","玄德","云长"}
                #,"","","","","","","","","","",

    # 构建一个容器,存储我们要的数据
    # {"夏侯渊":34,"害怕":33......}\
    counts = {}
    # 遍历wordlist 目标是筛选出人名
    for word in words_list:
        # print(word)
        if len(word) <= 1:
            # 过滤无关词语即可
            continue
        else:
            # 向字典counts里更新值
            # counts[word] = 字典中原来该词出现的次数 + 1

            # counts[word] = counts[word] + 1
            # counts["正文"] = counts["正文"] +1
            counts[word] = counts.get(word, 0) + 1
    # print(counts)

    # 指向同一个词的人进行合并
    counts['孔明'] = counts['孔明'] + counts['孔明曰']
    counts['刘备'] = counts['玄德'] + counts['玄德曰'] + counts['刘备']
    counts['关公'] = counts['关公'] + counts['云长']

    # 删除无关的词语
    for word in excludes:
        del counts[word]
    # 排序筛选
    # 吧字典转化成列表 [(),()]   [{},{}]
    items = list(counts.items())
    # 按照词频次数进行排序
    items.sort(key=lambda x: x[1],reverse=True)
    # print(items)
    x = [role[0] for role in items]
    x = [x[i] for i in range(10)]
    print(x)
    y = [counts[1] for counts in items]
    y = [y[i] for i in range(10)]


#绘制条形图
plt.bar(x, y)
plt.grid()
plt.title('三国人物前10排名')
plt.xlabel('人物')
plt.ylabel('热度')
plt.show()



#绘制饼形图
colors=['red','green','yellow','pink','orange','blue']
labels=x
#每一个元素距离中心点的距离,可选值0~1
explode = [0,0,0,0,0,0,0,0.2]
plt.pie(x=y,#饼图中的数值
        labels=labels,#饼图外围的标签
        colors=colors,
        shadow=True,
        #startangle=270,#开始角度
        #explode=explode,
        autopct='%1.1f%%'#显示百分比
        )
plt.axis('equal')    #设置成标准圆形
plt.legend(loc=2)    #指定象线
plt.title('三国人物top10')
plt.show()
条形图 饼形图

相关文章

网友评论

      本文标题:Python绘图练习

      本文链接:https://www.haomeiwen.com/subject/hblovctx.html