python爬虫：一个关于去哪儿吃饭的问题

作者: 北山学者 | 来源:发表于2017-10-17 23:01 被阅读0次

python爬虫：一个关于去哪儿吃饭的问题
微软大佬全新出版《Python3网络爬虫开发实战》，极致经典，堪
爬虫入门
去哪儿网图片爬虫及Scrapy使用详解
元旦去哪儿？python爬虫告诉你！
Python chromedriver库的问题
Python中的编码，针对Python中经常出现的乱码（阶段性总
python爬虫解决网页重定向问题
2016年7月25日（Python爬虫玩的不亦乐乎）
python爬取简历模板

每天中午到了饭点，都在为去哪儿吃饭发愁。自从有了这个程序一切都简单了，
where_list.txt文件中存放的是想要去吃饭的地方或者想要吃的午餐。比如：

0 东来顺
1 楼外楼
0 松鹤楼
1 全聚德
0 谭家菜馆
1 北京饭店
1 油条豆浆

其中0表示近距离，1表示远距离

程序需要安装使用BeautifulSoup模块，请提前安装。
希望能给你吃饭带来帮助。

#!/bin/python
# -*- coding: utf-8 -*-

import urllib2
import sys
from bs4 import BeautifulSoup
import os
import re
from random import choice

#获取网页内容
def getUrlContents(url, num=5):
    if not url:
        print '参数URL为空'
        exit(0)

    try:
        html = urllib2.urlopen(url).read()
    except urllib2.URLError as e:
        print 'Download error: ', e.reason
        html = None
        if num > 0 :
            time.sleep(2)
            return download(url, num-1)

    return html

#获取城市空气质量
def getAirFromPm25(city):
    html = getUrlContents('http://www.pm25.com/'+city+'.html')
    if not html:
        exit(0)
    
    html = BeautifulSoup(html, "lxml")
    info0 = html.find(class_="bi_loaction_city").get_text()
    ret =  u"\n🏘  所在城市: "+info0+u"天气"+" \n\n", 
    info0 = html.find(class_="bi_info_weather")
    info1 = html.find(class_="bi_aqiarea_num").get_text()
    info2 = html.find(class_="bi_aqiarea_right").get_text()
    info2 = re.split('\n*', info2)
    for txt in info2:
        if re.search("AQI", txt):
            info2.remove(txt)
        if not txt:
            info2.remove(txt)

    ret += u"🍀  污染指数: "+info1+' '+''.join(info2)+' \n', 
    info3 = html.find(class_="bi_aqiarea_bottom").get_text().strip()
    info3 = re.split('\n*', info3);
    tmp = '';
    for xx in info3:
        tmp += u"   "+xx+' \n'
    ret += (tmp,)

    return ''.join(ret)

#污染指数
def getAqi(city):
    html = getUrlContents('http://www.pm25.com/'+city+'.html')
    if not html:
        exit(0)
    
    html = BeautifulSoup(html, "lxml")
    aqi = html.find(class_="bi_aqiarea_num").get_text().strip()

    return int(aqi)

#污染得分
def getMsgByAqi(aqi):
    msg = ''
    if aqi*1.0<=150:
        msg = {"info":u'🚜 🚜 🚜  适宜远距离吃饭', 'tag':1}
    else:
        msg = {"info":u'🚶🏻 🚶🏻 🚶🏻  适宜近距离吃饭', 'tag':0}

    return msg

#获取吃饭商家
def getWhereList(file_name):
    if not os.path.exists('./'+file_name):
        print "\033[43;31m😫    无法获取吃饭资源 \033[0m\n"
        exit(0)

    content = ''
    with open('./'+file_name, 'rb') as f:
        content = f.read()
    if not content:
        print "\033[43;31m👿 👿 👿    无法获取吃饭位置  \033[0m\n"
        exit(0)

    content = re.split('\n*', content)
    cont0 = []
    cont1 = []
    for cont in content:
        if not cont:
            continue
        cont = re.split('\s*', cont)
        if int(cont[0]):
            cont1 += [cont[-1], ]
        else:
            cont0 = [cont[-1], ]

    return {0:cont0, 1:cont1}

#获取吃饭地方
def getMeatLocation(where_list):
    if not where_list:
        print "\033[43;31m🌳 🌳 🌳  没有合适的吃饭地方，请自便  \033[0m\n"

    meat = "\033[43;31m  🍚 🥝 🍎   今天去："
    meat += choice(where_list)
    meat += "  🍇 🍓 🍒   \033[0m\n"

    return meat


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print '请输入城市'
        exit(0)
    
    where_list = 'where_list.txt'
    if len(sys.argv) > 2:
        where_list = sys.argv[2]
    #http://www.pm25.com//shijiazhuang.html
    city = sys.argv[1]
    air = getAirFromPm25(city)
    print air

    msg = getMsgByAqi(getAqi(city))
    print msg['info']
    where = getWhereList(where_list)
    meat = getMeatLocation(where[msg['tag']])
    print meat