美文网首页
py爬虫4:练习之函数式编程

py爬虫4:练习之函数式编程

作者: _百草_ | 来源:发表于2022-07-22 14:34 被阅读0次
    import os
    import time
    from typing import Dict,  AnyStr
    from urllib import parse
    from faker import Faker
    from urllib.request import Request, urlopen
    
    
    # 函数式修改程序
    # 使得程序思路更清晰
    
    # 拼接url
    def get_url(base_url, param: Dict):
        """
        获取编码后的url
        :param base_url: 基础url
        :param param:
        :return:
        """
        return "{}?{}".format(base_url, parse.urlencode(param))
    
    
    # 发送请求
    def get_req(url: AnyStr):
        """
        发送请求
        :param url:
        :return:
        """
        fake = Faker(locale="zh_CN")
        ua = fake.user_agent()
        headers = {
            "User-agent": ua
        }
        req = Request(url, headers=headers)
        resp = urlopen(req)
        # text = resp.read().decode("utf-8")
        # resp_header = resp.info()  # Variable in function should be lowercase
        return resp
    
    
    # 获取文件后缀
    def get_extension(resp):
        """
        获取文件类型即后缀名
        :param resp:
        :return:
        """
        "Content-Type: text/html"
        content_type = resp.info()["Content-Type"]  # response Header
        if "text/html" in content_type:
            # html类型
            return ".html"
        else:
            print("不支持的类型")
    
    
    # 保存文件
    def save_file(resp):
        """
        返回信息(字节)保存为文件
        :param resp:
        :return:
        """
        ext = get_extension(resp)
        if not ext:
            return "不支持的文件类型"
        filename = os.path.join(os.path.dirname(__file__), f"{time.strftime('%Y%m%d%H%M%S')}{ext}")
        content = resp.read().decode("utf-8")
        with open(filename, "w", encoding="utf-8") as f:
            f.write(content)
    
    
    if __name__ == "__main__":
        host = "https://www.baidu.com/s"
        word = {"wd": "百草"}
        res = get_req(get_url(host, word))
        save_file(res)
    
    

    相关文章

      网友评论

          本文标题:py爬虫4:练习之函数式编程

          本文链接:https://www.haomeiwen.com/subject/npvmirtx.html