美文网首页
爬虫城市房产网--分析北京各区房价平均值

爬虫城市房产网--分析北京各区房价平均值

作者: 狼牙战士 | 来源:发表于2017-07-12 14:45 被阅读0次

    欢迎交流,QQ:2544100193
    文件下载地址

    一、获取所有的二手房的网址并保存

    Snip20170712_17.png

    代码

    import requests
    import re
    import csv
    
    def get_allurls(page_numbers):
        url = 'http://bj.cityhouse.cn/forsale/pg{}/'
        urls = []
        for x in range(2,int(page_numbers)):
            urls.append(url.format(x))
        uurrllss = []
        for x in urls:
            r = requests.get(x)
            r.encoding = 'utf-8'
            re_x = re.compile(r'<h4 class="tit"><a target="_blank" href="(.*?)"')
            content = re.findall(re_x,r.text)
            for item in content:
                uurrllss.append('http://bj.cityhouse.cn'+item)
        with open('urls.csv','w') as f:
            x = csv.writer(f)
            x.writerow(['url_name'])
            for y in uurrllss:
                x.writerow([y])
    
    if __name__ == '__main__':
        get_allurls(20)
    

    结果:


    Snip20170712_18.png

    二、爬取每个网页所需要的信息并保存到CSV文件

    代码:

    import requests
    from bs4 import BeautifulSoup
    import re
    import csv
    import pandas as pd
    import numpy as np
    
    def get_alldatas():
        data = pd.read_csv('urls.csv')
        urls = np.array(data["url_name"])
        mianji = []
        danjia = []
        quyu = []
        for url in urls[0:100]:
            r = requests.get(url)
            soup = BeautifulSoup(r.text,"lxml")
            print(soup.find(id="fyt_bldgarea"))
            if soup.find(id="fyt_bldgarea") is not None:
                mianji.append(((soup.find(id="fyt_bldgarea")).string)[:-1])
                danjia.append(''.join(re.findall('[0-9]',(soup.find(id="fyt_price")).string)))
                quyu.append(((soup.find(id="fyt_district")).string))
        dataframe = pd.DataFrame({"面积":mianji,'每平米价格':danjia,'地区':quyu})
        dataframe.to_csv("data.csv",index=False,encoding="gb2312")
    
    if __name__ == '__main__':
        get_alldatas()
    

    结果:


    Snip20170713_19.png

    三、画图对比各区平均房价

    代码:

    #-*- coding=utf-8 -*-
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    
    def get_datas():
        data = pd.read_csv('data.csv',encoding="gb2312")
        diqus = np.array(data["地区"])
        jiages = np.array(data["每平米价格"])
        mianjis = np.array(data["面积"])
        
        #遍历
        for index,value in enumerate(diqus):
            if value == '石景山区':
                shijingshan.append(jiages[index])
            elif value == '密云区':
                miyun.append(jiages[index])
            elif value == '朝阳区':
                chaoyang.append(jiages[index])
            elif value == '大兴区':
                daxing.append(jiages[index])
            elif value == '丰台区':
                fengtai.append(jiages[index])
            elif value == '房山区':
                fangshan.append(jiages[index])
            elif value == '东城区':
                dongcheng.append(jiages[index])
            elif value == '海淀区':
                haidian.append(jiages[index])
            elif value == '通州区':
                tongzhou.append(jiages[index])
        print(np.mean(chaoyang))
    
    def huatu():
        labels = np.array([u'shijingshan',u'miyun',u'chaoyang',u'daxing',u'fengtai',u'fangshan',u'dongcheng',u'haidian',u'tongzhou'])
        datalenth = 9
        dat = np.array([np.mean(shijingshan),np.mean(miyun),np.mean(chaoyang),np.mean(daxing),np.mean(fengtai),np.mean(fangshan),np.mean(dongcheng),np.mean(haidian),np.mean(tongzhou)])
    
        angles = np.linspace(0, 2*np.pi, datalenth, endpoint=False)
        data = np.concatenate((dat, [dat[0]])) # 闭合
        angles = np.concatenate((angles, [angles[0]])) # 闭合
    
        fig = plt.figure()
        ax = fig.add_subplot(111, polar=True)# polar参数!!
        ax.plot(angles, data, 'bo-', linewidth=2)# 画线
        ax.fill(angles, data, facecolor='r', alpha=0.25)# 填充
        ax.set_thetagrids(angles * 180/np.pi, labels)
        ax.set_title(u"123", va='bottom')
        ax.grid(True)
        plt.show()
    
    if __name__ == '__main__':
        shijingshan = []
        miyun = []
        chaoyang = []
        daxing = []
        fengtai = []
        fangshan = []
        dongcheng = []
        haidian = []
        tongzhou = []
        get_datas()
        huatu()
    

    结果:

    Snip20170713_20.png

    相关文章

      网友评论

          本文标题:爬虫城市房产网--分析北京各区房价平均值

          本文链接:https://www.haomeiwen.com/subject/akqbhxtx.html