美文网首页
历史天气爬虫

历史天气爬虫

作者: shixuexeon | 来源:发表于2019-02-07 15:20 被阅读0次

    最近要分析天气对业务的影响,所以做了一个历史天气的爬虫

    
    import demjson
    
    import requests
    
    class Weather_2345():
    
      def __init__(self,area,begin_time,end_time):
    
          self.area,self.begin_time,self.end_time= area,begin_time,end_time
    
          self.headers= {
    
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",}
    
          self.prefix_url= "http://tianqi.2345.com/t/wea_history/js/{1}/{0}_{1}.js"
    
          self.code_url= "http://tianqi.2345.com/js/citySelectData.js"
    
          self.code_str= None
    
      def get_weather(self):
    
          '''
    
          :return: 区域,日期,最高气温,最低气温,天气,风向,风力,空气指数,空气情况,空气等级'''
    
          result = []
    
          time_list = self.__generate_time_list(self.begin_time,self.end_time)
    
    for curr_month in time_list:
    
            area_code = self.get_areacode(self.area)
    
    url = self.prefix_url.format(area_code,curr_month)
    
    try:
    
                response = requests.get(url,headers=self.headers).text[16:-1]
    
                response_dict = demjson.decode(response)
    
    city = response_dict['city']
    
                for line in response_dict['tqInfo']:
    
                  if line:
    
                      if "aqi" in line:
    
                        result.append((city,line["ymd"],line["bWendu"].rstrip("℃"),line["yWendu"].rstrip("℃"),
    
    line["tianqi"],line["fengxiang"],line["fengli"],line["aqi"],
    
    line["aqiInfo"],line["aqiLevel"]))
    
    else:
    
                        result.append((city,line["ymd"],line["bWendu"].rstrip("℃"),line["yWendu"].rstrip("℃"),
    
    line["tianqi"],line["fengxiang"],line["fengli"],None,None,None))
    
    except Exception as e:
    
                print("error url:" + url)
    
    return result
    
      def get_areacode(self,area):
    
          if not self.code_str:
    
            self.code_str= requests.get(self.code_url,headers=self.headers).text
    
    try:
    
            area_index = self.code_str.index(area,203)
    
    except Exception as e:
    
            return None
    
          return self.code_str[area_index - 8: area_index - 3]
    
      def __generate_time_list(self,begin_time,end_time):
    
          time_list = []
    
          for y in range(int(begin_time[:4]),int(end_time[:4])+ 1):
    
            for m in range(1,13):
    
                time_list.append("{:0>4d}{:0>2d}".format(y,m))
    
    time_list = time_list[int(begin_time[4:])- 1:int(end_time[4:])- 12]
    
          return time_list
    
    if __name__== '__main__':
    
      # 可以这么用
    
      weather = Weather_2345('广州','201811','201901')
    
    print(weather.get_weather())
    
    # 也可以这么用
    
      weather = Weather_2345('下面会再次设置区域这里可以随便填','201811','201901')
    
    area_list = ["白云","从化","花都","海珠","黄埔","荔湾","南沙","番禺","天河","越秀","增城"]
    
      with open('./weather.log','w',encoding='utf-8')as fw:
    
          fw.write("区域,日期,最高气温,最低气温,天气,风向,风力,空气指数,空气情况,空气等级" + "\n")
    
    for area in area_list:
    
            weather.area= area
    
            weather_list = weather.get_weather()
    
    for lines in weather_list:
    
                fw.write(",".join(lines)+ '\n')
    
    

    相关文章

      网友评论

          本文标题:历史天气爬虫

          本文链接:https://www.haomeiwen.com/subject/atxgsqtx.html