import requests
import json
import random
import openpyxl
import time
def get_content(page):
url = 'http://gs.amac.org.cn/amac-infodisc/api/pof/fund?rand={}&page={}&size=20'.format(random.random(),page)
headers = {'Content-Type':'application/json',
'usr-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.42 Safari/537.36'}
data = {}
rsp = requests.post(url=url,headers=headers,data=json.dumps(data))
json_str = json.loads(rsp.text)
return json_str
json_str = get_content(0)
totalPages = json_str['totalPages']
xls = openpyxl.Workbook()
sheet = xls.active
title = ['基金名称','私募基金管理人名称','托管人名称','成立时间','备案时间']
sheet.append(title)
for page in range(0,totalPages+1):
print("当前第{}页中".format(page+1))
json_str = get_content(page)
for item in json_str['content']:
if item['establishDate'] == None:
start_time = ''
else:
timeArray = time.localtime(item['establishDate']/1000)
start_time = time.strftime("%Y-%m-%d",timeArray)
if item['putOnRecordDate'] == None:
start_time = ''
else:
timeArray = time.localtime(item['putOnRecordDate']/1000)
end_time = time.strftime("%Y-%m-%d",timeArray)
sheet.append([item['fundName'],item['managerName'],item["mandatorName"],start_time,end_time])
time.sleep(1)
xls.save("zjzj.xlsx")
print("爬取完成")
网友评论