美文网首页python技巧
使用request库小案例一

使用request库小案例一

作者: 陆_志东 | 来源:发表于2018-08-23 10:10 被阅读0次
    import requests
    import logging
    import json
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',   # Mon, 16 Jul 2018 16:29:20 test.py[line:36] INFO
        # filename='log/log.txt',   # 日志打印到的文件
        # filemode='w'
    )
    logger_root = logging.getLogger("")
    
    
    def read_file():
        with open("./oid.txt","r",encoding="utf-8",newline="\n") as f:
            while True:
                data = f.readline().strip()
                if data is None or not data:
                    break
                yield data
    
    
    def post_http(post_data,company_url):
        response = requests.post(company_url,data=post_data)
        if response.status_code != 200:
            logger_root.info(response.status_code)
            logger_root.info(response)
            return
        return json.loads(response.text)
    
    
    def save_file(data_list):
        with open("./result.txt","a",encoding="utf-8",newline="\n") as f:
            data = "\n".join(data_list) + "\n"    # 注意这里一定要在最后加一个\n ,不然每次追加文件,就会造成上次的最后一行和下次第一行写在同一行
            f.write(data)
    
    
    
    def start():
        data_list = list()
        post_data = json.dumps(["companies"],ensure_ascii=False).encode("utf-8")
        company_url = "http://114.55.103.126/yq/tool/fetch_news?oid={}"
        industry_url = "http://114.55.103.126/yq/tool/fetch_industry"
    
        i = 0
        for oid in read_file():
            i += 1
            row_data = dict()
            row_list = list()
            row_data[oid] = row_list
            url = company_url.format(oid)
            company_list = post_http(post_data,url)
            if company_list:
                company_list = company_list["companies"].split("|")
                logger_root.info("程序执行了 {} 次 oid 获取=========================================".format(i))
                for company in company_list:
                    body_data = dict()
                    body_data["name"] = company
                    industry_name = post_http(json.dumps(body_data,ensure_ascii=False).encode("utf-8"),industry_url)
                    if industry_name is None:
                        continue
                    industry_name = industry_name.get("l1_domain_name",None)
                    row_list.append({"company":company,"industry":industry_name})
            data_list.append(json.dumps(row_data,ensure_ascii=False))
            if i % 100 == 0:
                logger_root.info("存储 file {} 次 =============================================".format(i / 100))
                save_file(data_list)
                data_list = list()
    
        if data_list:
            save_file(data_list)
    
    
    
    
    
    if __name__ == '__main__':
        start()
    

    相关文章

      网友评论

        本文标题:使用request库小案例一

        本文链接:https://www.haomeiwen.com/subject/rftuiftx.html