import os
import time
from typing import Dict, AnyStr
from urllib import parse
from faker import Faker
from urllib.request import Request, urlopen
# 函数式修改程序
# 使得程序思路更清晰
# 拼接url
def get_url(base_url, param: Dict):
"""
获取编码后的url
:param base_url: 基础url
:param param:
:return:
"""
return "{}?{}".format(base_url, parse.urlencode(param))
# 发送请求
def get_req(url: AnyStr):
"""
发送请求
:param url:
:return:
"""
fake = Faker(locale="zh_CN")
ua = fake.user_agent()
headers = {
"User-agent": ua
}
req = Request(url, headers=headers)
resp = urlopen(req)
# text = resp.read().decode("utf-8")
# resp_header = resp.info() # Variable in function should be lowercase
return resp
# 获取文件后缀
def get_extension(resp):
"""
获取文件类型即后缀名
:param resp:
:return:
"""
"Content-Type: text/html"
content_type = resp.info()["Content-Type"] # response Header
if "text/html" in content_type:
# html类型
return ".html"
else:
print("不支持的类型")
# 保存文件
def save_file(resp):
"""
返回信息(字节)保存为文件
:param resp:
:return:
"""
ext = get_extension(resp)
if not ext:
return "不支持的文件类型"
filename = os.path.join(os.path.dirname(__file__), f"{time.strftime('%Y%m%d%H%M%S')}{ext}")
content = resp.read().decode("utf-8")
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
if __name__ == "__main__":
host = "https://www.baidu.com/s"
word = {"wd": "百草"}
res = get_req(get_url(host, word))
save_file(res)
网友评论