class RotateUserAgentMiddleware(object):
def process_request(self, request, spider):
# 随机UA
# user_agent = UserAgent()
# ua = user_agent.random
# 下载了fake_useragent的api的json数据存放,自定义随机获取UA
# 网站为:https://fake-useragent.herokuapp.com/browsers/0.1.11
# print('User-Agent', ua)
request.headers.setdefault('User-Agent', random_ua())
# 随机代理池
# get_ip = GetIP() # 自己定义获取proxy ip函数
# request.meta["proxy"] = get_ip
# 代理池
# ipp = "211.23.149.28:80, "211.23.149.29:80"
# request.meta['proxy'] = f"http://{ipp}:{portt}"
ua_random.py
# 将User-Agent 全部下载
def random_ua():
import json, random
with open('C:/WorkProgram/Philippine_news/Philippine_news/spiders/tools/user_agent.json') as uas:
u = ['chrome', 'opera', 'firefox', 'internetexplorer', 'safari']
uas = json.load(uas)
return random.choice(uas['browsers'].get(random.choice(u)))
网友评论