Python爬虫-获取天气城市code

作者: Fitz916 | 来源:发表于2017-08-08 23:59 被阅读117次

    这里的城市包括全球各个国家,可以根据需要剔除,当然国外的在天气网站上也是可以查天气的~

    #!/usr/bin/env python3
    # -*- coding : utf-8 -*-
    
    '全国天气城市code'
    
    import re
    import requests
    from bs4 import BeautifulSoup
    import datetime
    import pymysql
    
    # 打开数据库连接
    db = pymysql.connect(host="localhost", user="dev", passwd="000000", db="test", use_unicode=True, charset="utf8")
    # 获得一个游标
    cursor = db.cursor()
    
    city_rul = 'https://my.oschina.net/joanfen/blog/140364'
    
    header = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.8',
            'Connection': 'keep-alive',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
                          '(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
        }
    
    
    def get_data(city_rul,header):
        response = requests.get(city_rul, headers=header)
        response.encoding = 'utf-8'
        html = response.text
        data = BeautifulSoup(html, 'html.parser')
        content = data.find('div', {'class': 'BlogContent'})
        # 获取省列表
        # h4 = content.find_all('h4')
        # for h in h4:
        #     print(h.contents)
    
        p = content.find_all('p')
        city = re.split('\n', content.get_text().strip())
        city_end = [['北京', '101010100']]
        for c in city:
            if c != '':
                cd = re.split(r'\s', c.strip())
                cic = []
                for ct in cd:
                    if ct != '':
                        # 去除单个的省名:
                        if len(ct) > 3:
                            code = re.search(r'[0-9]{9}', ct)
                            st = re.search(r'[\u4e00-\u9fa5]{2,9}', ct)
                            if code.group() == '101010100':
                                pass
                            else:
                                cic.append(st.group())
                                cic.append(code.group())
                            city_end.append(cic)
        return city_end # 返回一个list
    
    
    def write_sql(city_list, file='G:/python/city_code.sql'):
        with open(file, 'w') as f: # 如果直接保存到数据库,则不需要打开文件
            now = datetime.datetime.now()
            i = 1
            for city in city_list:
                sql = 'insert into city_code(id, city_name, city_code, gmt_created, gmt_modified)' \
                      ' values (%d,\'%s\',\'%s\',\'%s\',\'%s\')' % (i, city[0], city[1], now, now)+';'
                # f.write(sql) # 保存到sql
                i += 1
                # print(sql)
                # 直接写入到数据库
                try:
                    # 执行sql语句
                    cursor.execute(sql)
                    # 提交到数据库执行
                    db.commit()
                except:
                    # 如果发生错误则回滚
                    db.rollback()
                # 关闭数据库连接
            db.close()
    if __name__ == '__main__':
        city_list = get_data(city_rul, header)
        write_sql(city_list,)
    
    
    
    
    

    相关文章

      网友评论

        本文标题:Python爬虫-获取天气城市code

        本文链接:https://www.haomeiwen.com/subject/niaxrxtx.html