import requests
import logging
import json
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S', # Mon, 16 Jul 2018 16:29:20 test.py[line:36] INFO
# filename='log/log.txt', # 日志打印到的文件
# filemode='w'
)
logger_root = logging.getLogger("")
def read_file():
with open("./oid.txt","r",encoding="utf-8",newline="\n") as f:
while True:
data = f.readline().strip()
if data is None or not data:
break
yield data
def post_http(post_data,company_url):
response = requests.post(company_url,data=post_data)
if response.status_code != 200:
logger_root.info(response.status_code)
logger_root.info(response)
return
return json.loads(response.text)
def save_file(data_list):
with open("./result.txt","a",encoding="utf-8",newline="\n") as f:
data = "\n".join(data_list) + "\n" # 注意这里一定要在最后加一个\n ,不然每次追加文件,就会造成上次的最后一行和下次第一行写在同一行
f.write(data)
def start():
data_list = list()
post_data = json.dumps(["companies"],ensure_ascii=False).encode("utf-8")
company_url = "http://114.55.103.126/yq/tool/fetch_news?oid={}"
industry_url = "http://114.55.103.126/yq/tool/fetch_industry"
i = 0
for oid in read_file():
i += 1
row_data = dict()
row_list = list()
row_data[oid] = row_list
url = company_url.format(oid)
company_list = post_http(post_data,url)
if company_list:
company_list = company_list["companies"].split("|")
logger_root.info("程序执行了 {} 次 oid 获取=========================================".format(i))
for company in company_list:
body_data = dict()
body_data["name"] = company
industry_name = post_http(json.dumps(body_data,ensure_ascii=False).encode("utf-8"),industry_url)
if industry_name is None:
continue
industry_name = industry_name.get("l1_domain_name",None)
row_list.append({"company":company,"industry":industry_name})
data_list.append(json.dumps(row_data,ensure_ascii=False))
if i % 100 == 0:
logger_root.info("存储 file {} 次 =============================================".format(i / 100))
save_file(data_list)
data_list = list()
if data_list:
save_file(data_list)
if __name__ == '__main__':
start()
网友评论