美文网首页爬虫
如何验证西刺代理,选出好用的

如何验证西刺代理,选出好用的

作者: 木马音响积木 | 来源:发表于2016-12-16 08:04 被阅读0次

    注意是window 环境下 ,程序运行完毕后,选出来的这些地址,都是不好用的,
    请高手帮助,指导如何选出好用的地址,
    我的代码有什么问题,如果您能指出,感谢,
    100个的ip 是从西刺代理api 直接取出来的,感谢西刺代理, 我们同时也希望每个地址都是好用的.

    106.91.35.28:8998
    121.31.48.6:8123
    203.115.102.146:8080
    183.144.40.128:8998
    106.91.21.92:8998
    95.173.179.54:1881
    171.38.207.76:8123
    183.66.93.105:8998
    80.91.188.46:3128
    115.201.149.59:8998
    190.248.134.246:8080
    60.250.81.97:80

    
    #!C:\Python35\python.exe
    # coding=utf-8
    # encoding=utf8
    from bs4 import BeautifulSoup
    import urllib
    import requests
    import socket
    import traceback
    import sys
    import lxml
    
    # User_Agent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
    # header = {}
    # header['User-Agent'] = User_Agent
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
    }
    
    #获取所有代理IP地址
    
    def getProxyIp():
        proxy = []
        for i in range(1, 66):
            try:
                url = 'http://www.xicidaili.com/nn/' + str(i)
                # url = 'http://www.xicidaili.com/nn/66'
                req = requests.get(url, headers = headers)
                soup = BeautifulSoup(req.text ,'lxml')
                print(soup)
                print(i)
                ips = soup.findAll('tr')
                for x in range(1, len(ips)):
                    ip = ips[x]
                    tds = ip.findAll("td")
                    ip_temp = tds[1].contents[0] + ":" + tds[2].contents[0]
                    proxy.append(ip_temp)
            except:
                continue
        return proxy
        #print(proxy)
    
    
    # 验证获得的代理IP地址是否可用
    
    
    def validateIp(proxy):
    
        url = "http://ip.chinaz.com/getip.aspx"
        f = open("d:\ip.txt", "w")
        socket.setdefaulttimeout(3)
        print(len(proxy))
        #for i in range(0,99):
            #kk=proxy.split()[i]
        for i in range(0, len(proxy)):
            try:
                #ip = proxy[i].strip().split("\t")
                #print(ip)
                #proxy_host = "http://" + ip[0] + ":" + ip[1]
                proxy_host = proxy.split()[i]
                proxy_temp = {"http": proxy_host}
                print(proxy_temp)
                #res = urllib.urlopen(url, proxies = proxy_temp).read()
                wb_data = requests.get(url, headers=headers, proxies=proxy_temp)
                soup = BeautifulSoup(wb_data.text, 'lxml')
                print(soup)
                f.write(proxy_host + '\n')
                print(proxy[i])
            except:
            #except Exception, e:
                continue
        f.close()
    
    proxylist ='''
    183.144.36.48:8998
    114.104.51.245:8998
    106.91.35.28:8998
    222.188.88.10:8998
    119.53.129.180:8118
    183.66.84.249:8998
    106.91.34.54:8998
    27.18.130.134:8998
    183.144.51.19:8998
    113.250.102.245:8998
    113.251.175.198:8998
    180.251.72.9:8080
    115.225.197.22:8998
    183.144.35.127:8998
    113.251.176.62:8998
    113.251.158.228:8998
    58.217.184.48:8998
    121.31.48.6:8123
    183.66.75.28:8998
    183.66.82.7:8998
    203.115.102.146:8080
    183.144.52.196:8998
    58.217.79.117:8998
    122.244.7.127:8998
    183.144.46.154:8998
    114.106.179.118:8998
    114.106.86.70:8998
    180.242.113.29:8080
    106.91.114.21:8998
    123.97.16.94:8998
    111.79.244.182:8998
    119.85.176.197:8998
    106.91.42.81:8998
    36.56.231.53:8998
    171.38.197.200:8123
    183.144.39.7:8998
    183.144.40.128:8998
    109.224.39.75:8080
    183.66.74.170:8998
    106.91.21.92:8998
    180.136.105.220:8998
    183.140.84.115:3128
    95.173.179.54:1881
    121.31.139.113:8123
    115.225.112.233:8998
    115.225.70.205:8998
    106.91.17.203:8998
    219.223.42.160:8998
    121.31.177.236:8123
    106.91.45.37:8998
    171.38.207.76:8123
    27.18.184.241:8998
    183.66.93.105:8998
    171.13.58.98:8998
    210.101.131.231:8080
    183.66.82.193:8998
    106.91.43.131:8998
    125.85.183.9:8998
    115.200.118.163:8998
    121.61.96.7:8118
    36.68.243.120:8080
    120.1.34.49:8118
    80.91.188.46:3128
    114.106.46.229:8998
    106.91.31.146:8998
    122.244.193.210:8998
    114.106.190.50:8998
    123.97.8.121:8998
    125.126.126.244:8998
    36.68.247.17:8080
    183.66.73.61:8998
    106.91.33.69:8998
    183.185.25.227:9797
    106.91.20.225:8998
    119.129.116.112:9797
    106.91.43.6:8998
    118.123.45.228:8998
    123.97.21.185:8998
    183.66.53.186:8998
    183.66.91.143:8998
    183.140.82.77:3128
    125.108.166.42:8998
    183.66.91.123:8998
    106.91.30.87:8998
    183.144.196.163:8998
    116.225.250.226:63000
    115.201.149.59:8998
    111.78.128.25:8998
    190.248.134.246:8080
    27.21.205.246:8998
    60.250.81.97:80
    183.66.90.123:8998
    183.144.42.167:8998
    123.97.19.95:8998
    106.91.24.148:8998
    117.65.107.135:8998
    182.88.205.48:8123
    183.140.86.97:3128
    114.106.206.157:8998
    83.68.39.26:3128
    '''
    
    if __name__ == '__main__':
    
        #proxy = getProxyIp()
        #kk=proxylist.split()
            #print(kk)
        validateIp(proxylist)
    
    

    相关文章

      网友评论

        本文标题:如何验证西刺代理,选出好用的

        本文链接:https://www.haomeiwen.com/subject/upqnmttx.html