今天在用requests爬取网站数据时,遇到了一个问题,代码和报错信息如下:
import re
import requests
class Handle_Lagou(object):
def __init__(self):
#使用session保存cookies信息
self.lagou_session = requests.session()
self.header = {
'Connection': 'close',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
self.city_list=""
#获取全国所有城市列表的方法
def handle_city(self):
city_search = re.compile(r'zhaopin/">(.*?)</a>')
city_url = "https://www.lagou.com/jobs/allCity.html"
city_result = self.handle_request(method="GET",url=city_url)
print(city_result)
def handle_request(self,method,url,data=None,info=None):
if method == "GET":
response = self.lagou_session.get(url=url,headers=self.header)
return response.text
if __name__=='__main__':
lagou = Handle_Lagou()
lagou.handle_city()
运行后发现出现了两个错误
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='www.lagou.com', port=443): Max retries exceeded with url: /jobs/allCity.html (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))
requests.exceptions.SSLError: HTTPSConnectionPool(host='www.lagou.com', port=443): Max retries exceeded with url: /jobs/allCity.html (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))
然后就上网找了找,看了大部分相关问题的博客后,找到了两个解决办法(其实是一个):
import re
import requests
#在此处设置取消警告信息
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# 这是另一个大佬的办法,可惜过于复杂,我没看懂,简简单单才是真
# requests.packages.urllib3.disable_warnings()
# requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += 'HIGH:!DH:!aNULL'
# try:
# requests.packages.urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST += 'HIGH:!DH:!aNULL'
# except AttributeError:
# # no pyopenssl support used / needed / available
# pass
class Handle_Lagou(object):
def __init__(self):
#使用session保存cookies信息
self.lagou_session = requests.session()
self.header = {
'Connection': 'close',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
self.city_list=""
#获取全国所有城市列表的方法
def handle_city(self):
city_search = re.compile(r'zhaopin/">(.*?)</a>')
city_url = "https://www.lagou.com/jobs/allCity.html"
city_result = self.handle_request(method="GET",url=city_url)
print(city_result)
def handle_request(self,method,url,data=None,info=None):
if method == "GET":
# 在此处设置verify = False
response = self.lagou_session.get(url=url,headers=self.header,verify=False)
return response.text
if __name__=='__main__':
lagou = Handle_Lagou()
lagou.handle_city()
网友评论