最近要分析天气对业务的影响,所以做了一个历史天气的爬虫
import demjson
import requests
class Weather_2345():
def __init__(self,area,begin_time,end_time):
self.area,self.begin_time,self.end_time= area,begin_time,end_time
self.headers= {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",}
self.prefix_url= "http://tianqi.2345.com/t/wea_history/js/{1}/{0}_{1}.js"
self.code_url= "http://tianqi.2345.com/js/citySelectData.js"
self.code_str= None
def get_weather(self):
'''
:return: 区域,日期,最高气温,最低气温,天气,风向,风力,空气指数,空气情况,空气等级'''
result = []
time_list = self.__generate_time_list(self.begin_time,self.end_time)
for curr_month in time_list:
area_code = self.get_areacode(self.area)
url = self.prefix_url.format(area_code,curr_month)
try:
response = requests.get(url,headers=self.headers).text[16:-1]
response_dict = demjson.decode(response)
city = response_dict['city']
for line in response_dict['tqInfo']:
if line:
if "aqi" in line:
result.append((city,line["ymd"],line["bWendu"].rstrip("℃"),line["yWendu"].rstrip("℃"),
line["tianqi"],line["fengxiang"],line["fengli"],line["aqi"],
line["aqiInfo"],line["aqiLevel"]))
else:
result.append((city,line["ymd"],line["bWendu"].rstrip("℃"),line["yWendu"].rstrip("℃"),
line["tianqi"],line["fengxiang"],line["fengli"],None,None,None))
except Exception as e:
print("error url:" + url)
return result
def get_areacode(self,area):
if not self.code_str:
self.code_str= requests.get(self.code_url,headers=self.headers).text
try:
area_index = self.code_str.index(area,203)
except Exception as e:
return None
return self.code_str[area_index - 8: area_index - 3]
def __generate_time_list(self,begin_time,end_time):
time_list = []
for y in range(int(begin_time[:4]),int(end_time[:4])+ 1):
for m in range(1,13):
time_list.append("{:0>4d}{:0>2d}".format(y,m))
time_list = time_list[int(begin_time[4:])- 1:int(end_time[4:])- 12]
return time_list
if __name__== '__main__':
# 可以这么用
weather = Weather_2345('广州','201811','201901')
print(weather.get_weather())
# 也可以这么用
weather = Weather_2345('下面会再次设置区域这里可以随便填','201811','201901')
area_list = ["白云","从化","花都","海珠","黄埔","荔湾","南沙","番禺","天河","越秀","增城"]
with open('./weather.log','w',encoding='utf-8')as fw:
fw.write("区域,日期,最高气温,最低气温,天气,风向,风力,空气指数,空气情况,空气等级" + "\n")
for area in area_list:
weather.area= area
weather_list = weather.get_weather()
for lines in weather_list:
fw.write(",".join(lines)+ '\n')
网友评论