美文网首页Python
Python基础爬取数据存数据库

Python基础爬取数据存数据库

作者: elijah777 | 来源:发表于2020-01-27 13:40 被阅读0次

    Python爬虫操作

    一、基础爬取数据存数据库

    通过数据库的初始数据,包含搜索key信息。

    1、读取数据库内容

    2、遍历key

    3、发送get请求

    4、获取并解析数据 从返回的数据中解析出来

    5、更新数据库 包含获取的数据内容

    import json
    
    import pymysql
    import requests
    import datetime
    import  time
    host = '127.0.0.1'
    user = 'root'
    psd = '123456'
    db = 'test'
    c = 'utf8' 
    port = 3306
    TABLE_NAME = 'snow'
     
    def getBrand(symbol, company_name):
        
        url = "https://stock.xxxxx.com/v5/stock/finance/cn/balance.json?symbol="+symbol+"&type=Q4&is_detail=true&count=6&timestamp=1819955200001"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
            "Cookie":"xq_a_token=xxxxx;"
        }
        response = requests.get(url, headers = headers )
        result = json.loads(response.text)
        datas = result['data']['list']
        quote_name = result['data']['quote_name']
        if  quote_name != company_name:
            return
        pdata = []
        pdata.append(quote_name)
    
        for data in datas :
            item_info = {}
            item_info['report_name'] = data['report_name']
            item_info['net_increase_in_cce'] =  data['net_increase_in_cce'][0]
            pdata.append(data['net_increase_in_cce'][0])
            print(item_info)
        return pdata;
    
    
    def process_item():
        # 数据库连接
        con = pymysql.connect(host=host, user=user, passwd=psd, db=db, charset=c, port=port)
        # 数据库游标
        cue = con.cursor()
        try:
           
            # 查询数据
            query_sql = "select  ID,  HEBING,       DATA_ID,       COMPANY_NAME,       STOCK_CODE, " \
                        "   ASSETS_TOTAL18, ASSETS_TOTAL17, ASSETS_TOTAL16, ASSETS_TOTAL15,  ASSETS_TOTAL14, ASSETS_TOTAL13,   " \
                        "    CASH_FLOW18,    CASH_FLOW17,   CASH_FLOW16,   CASH_FLOW15,   CASH_FLOW14,    CASH_FLOW13,   " \
                        "    DESCRIPTION,       CREATE_DATE,       LAST_UP_DATE " \
                        " from " + TABLE_NAME + "  where COMPANY_NAME is not null and STOCK_CODE  is not null "
     
            cue.execute(query_sql)  # 执行sql
    
            # 查询所有数据,返回结果默认以元组形式,所以可以进行迭代处理
            for i in cue.fetchall():
                s = i[2].split('.')
                sc = s[1] + s[0]
                pdata = getBrand(sc,i[3])
                if pdata == None:
                    continue
                update_sql = "update " + TABLE_NAME + "  set CASH_FLOW18=%s " \
                                                      ", CASH_FLOW17=%s, CASH_FLOW16=%s, CASH_FLOW15=%s" \
                                                      ", CASH_FLOW14=%s, CASH_FLOW13=%s, DESCRIPTION=%s " \
                                                      " where ID=%s"
               
                data = (pdata[1], pdata[2], pdata[3], pdata[4], pdata[5], pdata[6],sc,i[0])
    
                try:
                    cue.execute(update_sql, data)
                    print(data)
                except Exception as e:
                    print('Insert error:', e)
                    con.rollback()
                else:
                    con.commit()
      
            update_sql = "update " + TABLE_NAME + "  set CASH_FLOW18=%s " \
                                                   ", CASH_FLOW17=%s, CASH_FLOW16=%s, CASH_FLOW15=%s" \
                                                   ", CASH_FLOW14=%s, CASH_FLOW13=%s, DESCRIPTION=%s " \
                                                  " where ID=%s"
    
        except Exception as e:
            print('Insert error:', e)
            con.rollback()
        else:
            con.commit()
    
        cue.close()  # 关闭游标
       
    
    if __name__ == '__main__':
        # getBrand()
        process_item()
    
    
    

    此段代码 仅用于数据的获取,爬虫学习,不做任何商业用途

    2020/01/27 春节新型冠状病毒期间于许昌

    相关文章

      网友评论

        本文标题:Python基础爬取数据存数据库

        本文链接:https://www.haomeiwen.com/subject/gcqethtx.html