美文网首页
简易ip池

简易ip池

作者: He | 来源:发表于2019-01-09 20:54 被阅读0次
    from selenium import webdriver
    from bs4 import BeautifulSoup
    from lxml import etree
    from fake_useragent import UserAgent
    import random
    import requests
    UA = UserAgent()
    
    class GetIP:
        def __init__(self):
            self.browser = webdriver.Chrome('d:\chromedriver.exe')
            self.url = 'http://www.xicidaili.com/nn'
    
        def get_html(self):
            self.browser.get(self.url)
            html = self.browser.page_source
            html_tree = etree.HTML(html)
            ip_list = []
            all_ip = html_tree.xpath('//*[@id="ip_list"]/tbody/tr/td[2]')
            port = html_tree.xpath('//*[@id="ip_list"]/tbody/tr/td[3]')
            for i in range(len(all_ip)):
                ip = all_ip[i].text + ":" + port[i].text
                ip_list.append(ip)
            return ip_list
        def check_up(self):
            ip_list = self.get_html()
            headers = {
                'UserAgent':UA.random
            }
            url = 'http://www.ip138.com/'
            for ip in ip_list:
                proxies = {
                    "http": "http://" + ip,
                    "https": "https://" + ip
                }
                try:
                    html = requests.get(url,headers=headers,proxies=proxies)
                    if html.status_code == 200:
                        print(ip)
                    else:
                        return False
                except Exception as e:
                    print(e)
    
    
    if __name__ == '__main__':
        getIp = GetIP()
        getIp.check_up()
    

    附上验证代码

    from selenium import webdriver
    chromeOptions = webdriver.ChromeOptions()
    
    # 设置代理
    chromeOptions.add_argument("--proxy-server=http://119.101.117.130:9999")
    # 一定要注意,=两边不能有空格,不能是这样--proxy-server = http://202.20.16.82:10152
    browser = webdriver.Chrome(r"D:\chromedriver.exe",chrome_options = chromeOptions)
    
    # 查看本机ip,查看代理是否起作用
    browser.get("http://httpbin.org/ip")
    print(browser.page_source)
    

    相关文章

      网友评论

          本文标题:简易ip池

          本文链接:https://www.haomeiwen.com/subject/iahnrqtx.html