fake_useragent 类库使用比较简单方便
安装
pip install fake_useragent
配置中间件
DOWNLOADER_MIDDLEWARES = {
'down_code.middlewares.RandomUserAgentMiddleware': 543,
//貌似是官方说明需要把scrapy自带的写成这样子
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
}
在down_code.middlewares.RandomUserAgentMiddleware中添加一下代码
class RandomUserAgentMiddleware(object):
# 随机更换user_agent
def __init__(self, crawler):
super(RandomUserAgentMiddleware, self).__init__()
self.ua = UserAgent()
self.ua_type = crawler.settings.get("RANDOM_UA_TYPE", "random") # 为了随机获得浏览器类型
@classmethod
def from_crawler(cls, crawler):
return cls(crawler) # 导入crawler,从而获取其他的配置文件里的数据
def process_request(self, request, spider):
def get_ua():
return getattr(self.ua, self.ua_type) # 获取ua的ua_type属性,也就是获得random
request.headers.setdefault('User-Agent', get_ua())
ok 大功告成
网友评论