在用requests写爬虫的时候,难免会有请求不到数据的时候,这时候最好的办法是重复几次请求,如果几次请求还是请求不到数据,就跳过这个url并且把这个url设置一个标记,,本人平时一般会把获取到的url会先标记为0,在请求详细数据的时候,请求成功的就会改为1,请求出错的改为-1,这样就会知道哪些成功了,哪些没有成功。
附上重复请求代码:
# get请求
def requests_get(self, url):
try:
response = requests.get(url, headers=self.headers, timeout=5)
if response.status_code == 200:
return response
except Exception as e:
print(e)
# 在初始化的时候设置一个常量作为一个中间判断
self.NETWORK_STATUS = False
if self.NETWORK_STATUS is False:
'''此时应该是请求超时'''
# 设置重复请求次数,这里设置的是5次
for i in range(1, 5):
print('请求超时,第%s次重复请求' % i)
response = requests.get(url, headers=self.headers, timeout=5)
if response.status_code == 200:
return response
return '-1'
# post请求
def requests_post(self, url):
try:
response = requests.post(url, headers=self.headers, timeout=5)
if response.status_code == 200:
return response
except Exception as e:
print(e)
self.NETWORK_STATUS = False
if self.NETWORK_STATUS is False:
'''此时应该是请求超时'''
for i in range(1, 5):
print('请求超时,第%s次重复请求' % i)
response = requests.post(url, headers=self.headers, timeout=5)
if response.status_code == 200:
return response
return '-1'
网友评论