Python爬虫操作
一、基础爬取数据存数据库
通过数据库的初始数据,包含搜索key信息。
1、读取数据库内容
2、遍历key
3、发送get请求
4、获取并解析数据 从返回的数据中解析出来
5、更新数据库 包含获取的数据内容
import json
import pymysql
import requests
import datetime
import time
host = '127.0.0.1'
user = 'root'
psd = '123456'
db = 'test'
c = 'utf8'
port = 3306
TABLE_NAME = 'snow'
def getBrand(symbol, company_name):
url = "https://stock.xxxxx.com/v5/stock/finance/cn/balance.json?symbol="+symbol+"&type=Q4&is_detail=true&count=6×tamp=1819955200001"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
"Cookie":"xq_a_token=xxxxx;"
}
response = requests.get(url, headers = headers )
result = json.loads(response.text)
datas = result['data']['list']
quote_name = result['data']['quote_name']
if quote_name != company_name:
return
pdata = []
pdata.append(quote_name)
for data in datas :
item_info = {}
item_info['report_name'] = data['report_name']
item_info['net_increase_in_cce'] = data['net_increase_in_cce'][0]
pdata.append(data['net_increase_in_cce'][0])
print(item_info)
return pdata;
def process_item():
# 数据库连接
con = pymysql.connect(host=host, user=user, passwd=psd, db=db, charset=c, port=port)
# 数据库游标
cue = con.cursor()
try:
# 查询数据
query_sql = "select ID, HEBING, DATA_ID, COMPANY_NAME, STOCK_CODE, " \
" ASSETS_TOTAL18, ASSETS_TOTAL17, ASSETS_TOTAL16, ASSETS_TOTAL15, ASSETS_TOTAL14, ASSETS_TOTAL13, " \
" CASH_FLOW18, CASH_FLOW17, CASH_FLOW16, CASH_FLOW15, CASH_FLOW14, CASH_FLOW13, " \
" DESCRIPTION, CREATE_DATE, LAST_UP_DATE " \
" from " + TABLE_NAME + " where COMPANY_NAME is not null and STOCK_CODE is not null "
cue.execute(query_sql) # 执行sql
# 查询所有数据,返回结果默认以元组形式,所以可以进行迭代处理
for i in cue.fetchall():
s = i[2].split('.')
sc = s[1] + s[0]
pdata = getBrand(sc,i[3])
if pdata == None:
continue
update_sql = "update " + TABLE_NAME + " set CASH_FLOW18=%s " \
", CASH_FLOW17=%s, CASH_FLOW16=%s, CASH_FLOW15=%s" \
", CASH_FLOW14=%s, CASH_FLOW13=%s, DESCRIPTION=%s " \
" where ID=%s"
data = (pdata[1], pdata[2], pdata[3], pdata[4], pdata[5], pdata[6],sc,i[0])
try:
cue.execute(update_sql, data)
print(data)
except Exception as e:
print('Insert error:', e)
con.rollback()
else:
con.commit()
update_sql = "update " + TABLE_NAME + " set CASH_FLOW18=%s " \
", CASH_FLOW17=%s, CASH_FLOW16=%s, CASH_FLOW15=%s" \
", CASH_FLOW14=%s, CASH_FLOW13=%s, DESCRIPTION=%s " \
" where ID=%s"
except Exception as e:
print('Insert error:', e)
con.rollback()
else:
con.commit()
cue.close() # 关闭游标
if __name__ == '__main__':
# getBrand()
process_item()
此段代码 仅用于数据的获取,爬虫学习,不做任何商业用途
2020/01/27 春节新型冠状病毒期间于许昌
网友评论