下面介绍一段python爬虫的代理池
def get_ip_list():
print("正在获取代理列表...")
url = r'http://www.xicidaili.com/nn/'
html = requests.get(url=url,headers=self.headers).text
soup = BeautlfulSoup(html,'lxml')
ips = soup.find(id='ip_list').find_all('tr')
ip_list = []
for i in range(1,len(ips)):
ip_info = ips[i]
tds= ip_info.find_all('td')
ip_list.append(tds[1].text + ":" + tds[2].text)
print("代理列表抓取成功")
return ip_list
def get_random_ip(ip_list):
print("正在设置随机代理...")
proxy_list = []
for ip in ip_list:
proxy_list.append('http://' + ip)
proxy_ip = random.choice(proxy_list)
proxies = {'http':proxy_ip}
print("代理设置成功")
return proxies
网友评论