1. 首先新建一个proxy.txt文件,将批量提取的ip保存进去,一行一个ip,格式为122.96.51.9:8080
"""
该程序用来获取proxy.txt中可用的免费国外代理ip
"""
import os
import telnetlib
def create_files():
"""
创建文件
:return:
"""
if not os.path.exists('proxy.txt'):
with open('proxy.txt', 'w') as f:
f.write('')
if not os.path.exists('useful.txt'):
with open('useful.txt', 'w') as f:
f.write('')
def deal_datas():
"""
处理proxy.txt中的数据
:return:
"""
with open('proxy.txt', 'r', encoding='utf-8') as f:
content = f.readlines()
ip_list = []
for i in content:
ip = i.split(':')[0]
port = i.split(':')[1].strip()
msg = {
'ip': ip,
'端口': port,
}
ip_list.append(msg)
return ip_list
def get_useful_ip(ip_list):
"""
筛选出可用的ip
:param ip_list:
:return:
"""
print('正在筛选可用ip...')
useful_ip = []
for msg in ip_list:
ip = msg['ip']
port = msg['端口']
message = {'http': '%s:%s' % (ip, port)}
try:
telnetlib.Telnet(ip, port=port, timeout=20)
except:
print('不可用:%s' % str(message))
continue
else:
print('可用:%s' % message)
useful_ip.append(message)
with open('useful.txt', 'a', encoding='utf-8') as f:
f.write(str(message) + '\n')
if not useful_ip:
print('本次筛选无可用ip')
else:
print('筛选完成,本次共筛选%s个ip,%s个可用' % (str(len(ip_list)), str(len(useful_ip))))
def run():
create_files()
ip_list = deal_datas()
get_useful_ip(ip_list)
if __name__ == '__main__':
run()
再来一个获取快代理网站的ip(正则提取)
import re
import requests
import time
import telnetlib
"""
该程序用来获取快代理的免费代理,并检测可用性
"""
def get_ip():
"""
获取代理ip
:return:
"""
url = 'https://www.kuaidaili.com/free/inha/'
# 生成url列表,10代表只爬取10页(按需求更改)
url_list = [url + str(i + 1) for i in range(10)]
ip_list = []
print('正在筛选...')
for i in range(len(url_list)):
url = url_list[i]
html = requests.get(url=url, ).text
regip = '<td.*?>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>.*?<td.*?>(\d{1,5})</td>'
matcher = re.compile(regip, re.S)
ipstr = re.findall(matcher, html)
time.sleep(1)
for j in ipstr:
# ip+端口号
ip_list.append(j[0] + ':' + j[1])
print('共获取到%d个代理ip' % len(ip_list))
print(ip_list)
return ip_list
def ip_check(ip_list):
usable_ip = []
for i in ip_list:
ip = i.split(':')[0]
port = i.split(':')[-1]
try:
telnetlib.Telnet(ip, port=port, timeout=20)
except:
print(i + '不可用')
else:
print('可用ip:%s' % i)
usable_ip.append(i)
print('共%s个IP可用:' % len(usable_ip))
print(usable_ip)
return usable_ip
if __name__ == '__main__':
ip_list = get_ip()
ip_check(ip_list)
网友评论