美文网首页
Python练习 抓取AQI

Python练习 抓取AQI

作者: By_syk | 来源:发表于2017-05-14 21:56 被阅读46次

通过正则表达式解析数据

from urllib import request
from xpinyin import Pinyin
import re


def get_aqi(city_pinyin):
    print('get_aqi(\'%s\')' % city_pinyin)

    result = {
        'city': '',  # 城市名称
        'aqi': 0,  # AQI指数值
        'level': '',  # AQI指数等级
        'pm25': 0,  # PM2.5浓度(微克/立方米 μg/m³)
        'weather': '',  # 天气
        'source': ''  # 数据来源信息
    }

    res = request.urlopen('http://www.pm25.com/' + city_pinyin + '.html')
    page = res.read()  # 获取网页字节码
    print('page_size: %d bytes' % len(page))
    page = page.decode('utf-8')  # 解码

    search_obj = re.search(r'<h2 class="bi_loaction_city">(.+?)</h2>', page)
    if search_obj:
        result['city'] = search_obj.group(1)
    search_obj = re.search(r'<a class="bi_aqiarea_num">(\d+)</a>', page)
    if search_obj:
        result['aqi'] = int(search_obj.group(1))
    search_obj = re.search(r'<span class="bi_aqiarea_wuran.*?">(.+?)</span>', page)
    if search_obj:
        result['level'] = search_obj.group(1)
    search_obj = re.search(r'<span class="pm25_span">(\d+)</span>', page)
    if search_obj:
        result['pm25'] = int(search_obj.group(1))
    search_obj = re.search(r'<p class="bi_info_weather">.+?<span>(.+?)</span>(.+?)</p>', page)
    if search_obj:
        result['weather'] = search_obj.group(1) + search_obj.group(2)
    search_obj = re.search(r'<p class="bi_info_tips">数据来源:(.+?)(?:&nbsp;)*最后更新:(.+?)</p>', page)
    if search_obj:
        result['source'] = search_obj.group(1) + ' ' + search_obj.group(2)

    return result


def get_pinyin(city):
    print('get_pinyin(\'%s\')' % city)

    p = Pinyin()
    return p.get_pinyin(city, '')


if __name__ == '__main__':
    print('''=== 城市AQI指数查询
===== 数据来自 http://www.pm25.com''')
    city = input('目标城市(如“成都”或“chengdu”):')
    info = get_aqi(get_pinyin(city))
    print(info)

通过 BeautifulSoup 解析数据

from urllib import request
from xpinyin import Pinyin
from bs4 import BeautifulSoup


def get_aqi(city_pinyin):
    print('get_aqi(\'%s\')' % city_pinyin)

    result = {
        'city': '',  # 城市名称
        'aqi': 0,  # AQI指数值
        'level': '',  # AQI指数等级
        'pm25': 0,  # PM2.5浓度(微克/立方米 μg/m³)
        'weather': '',  # 天气
        'source': ''  # 数据来源信息
    }

    res = request.urlopen('http://www.pm25.com/' + city_pinyin + '.html')
    page = res.read()  # 获取网页字节码
    print('page_size: %d bytes' % len(page))
    page = page.decode('utf-8')  # 解码

    soup = BeautifulSoup(page)
    result['city'] = soup.find(class_='bi_loaction_city').text
    result['aqi'] = int(soup.find('a', class_='bi_aqiarea_num').text)
    result['level'] = soup.find('span', {'class': 'bi_aqiarea_wuran'}).text
    result['pm25'] = int(soup.select('.bi_aqiarea_bottom span')[0].text)
    result['weather'] = soup.find('p', class_='bi_info_weather').text.strip()
    result['source'] = soup.find('p', class_='bi_info_tips').text

    return result


def get_pinyin(city):
    print('get_pinyin(\'%s\')' % city)

    p = Pinyin()
    return p.get_pinyin(city, '')


if __name__ == '__main__':
    print('''=== 城市AQI指数查询
===== 数据来自 http://www.pm25.com''')
    city = input('目标城市(如“成都”或“chengdu”):')
    info = get_aqi(get_pinyin(city))
    print(info)

相关文章

网友评论

      本文标题:Python练习 抓取AQI

      本文链接:https://www.haomeiwen.com/subject/xdccxxtx.html