代理IP

作者: 垃圾桶边的狗 | 来源:发表于2019-03-06 00:29 被阅读0次

爬虫（2）--- 构建简单代理IP池
匿名程度不同的代理ip
如何给自己搭建一个爬虫代理IP池？
js逆向之全网代理IP的爬取
电脑修改不同地区网络IP方法！
在价格上代理ip的划分
代理多ip服务器可以抢购、刷人气
scrapy1.5 代理设置
代理
国内ip代理软件推荐

移动端wraing报错SSL： https://blog.csdn.net/zahuopuboss/article/details/52964809

代理IP：http://h.zhimaruanjian.com/getapi/

代理:http://tools.jb51.net/table/useragent

selenium and 移动

import datetime
import os
import random
import re
import time

import requests
from lxml import etree
from selenium import webdriver

mUA = 'User-Agent, Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'
mWidth = 520
mHeight = 20431
PIXEL_RATIO = 3.0

mobileEmulation = {"deviceMetrics": {"width": mWidth, "height": mHeight, "pixelRatio": PIXEL_RATIO},
                   "userAgent": mUA}


def create_chrome():
    ops = webdriver.ChromeOptions()
    ops.add_experimental_option('mobileEmulation', mobileEmulation)
    # ops.add_argument('--headless')
    # ops.add_argument('--disable-gpu')

    web = webdriver.Chrome(chrome_options=ops)
    web.set_page_load_timeout(10)
    web.set_script_timeout(10)
    web.set_window_size(mWidth, mHeight)
    return web


driver = create_chrome()


def parse_url(url=None, func=None):
    driver.maximize_window()
    # url = 'https://list.tmall.com/search_product.htm?spm=a220m.1000858.0.0.af6e5cb21bM0ce&brand=30111&q=iphone+x&sort=s&style=g&from=.list.pc_1_searchbutton&type=pc'

    driver.get(url=url)
    time.sleep(2)
    # js = 'document.body.scrollTop=30000'
    # driver.execute_script(js)

    js = "var q=document.documentElement.scrollTop=100000"
    driver.execute_script(js)

    content = driver.page_source
    time.sleep(2)
    return func(content)
    # return first_page_parse(content)
    # return content


def first_page_parse(content):
    tree = etree.HTML(content)
    # time.sleep(100000)
    # price = tree.xpath('//p[@class="productPrice"]//em/text()')
    odiv = tree.xpath('//section[@id="J_srp"]//a[@class="tile_item"]')
    for a_div in odiv:
        item = {}
        # 详情连接
        detail_href = 'https:' + str(a_div.xpath('./@href')[0])
        # 商品id
        mode = re.compile(r'\d+')
        product_id = mode.search(detail_href).group()
        # 月销量
        yue_xiao = str(a_div.xpath('.//div[@class="tii_price"]//span[@class="tii_sold"]/text()')[0])
        # 标题
        title = str(a_div.xpath('.//div[@class="tii_title"]/h3/text()')[0]).strip()
        # 价格
        price = str(a_div.xpath('string(.//div[@class="tii_price"])')).replace('   \ue609  ', '').split('月销')[0].strip()
        # 店名称
        Shop_name = str(a_div.xpath('./div[@class="j_shop_more shop_more"]/span[1]/text()')[0])

        item['detail_href'] = detail_href
        item['yue_xiao'] = yue_xiao
        item['title'] = title
        item['price'] = price
        item['Shop_name'] = Shop_name
        item['product_id'] = product_id

        # 调用pase_url 解析详情页链接
        parse_url(url=detail_href, func=detail_page)
        print(item)


def detail_page(content):
    # print(content)
    # if '查看全部' in content:
    # print('123456789'*80)
    # common = driver.find_element_by_link_text('查看全部').click()
    driver.find_element_by_class_name('mui-tagscloud-more').click()
    time.sleep(2)
    conmon = driver.page_source
    # print(type(conmon))
    time.sleep(2)
    js = "var q=document.documentElement.scrollTop=100000"
    driver.execute_script(js)
    time.sleep(2)
    tree = etree.HTML(conmon)
    all_li = tree.xpath('//*[@id="J_CommentsWrapper"]/ul[2]/li')
    for li in all_li:
        a1 = li.xpath('./text()')
        a = li.xpath('./li[@class="tag-product"]/text()')
        print(a)
    print('1234567890')


if __name__ == '__main__':
    url = 'https://list.tmall.com/search_product.htm?spm=a220m.1000858.0.0.af6e5cb21bM0ce&brand=30111&q=iphone+x&sort=s&style=g&from=.list.pc_1_searchbutton&type=pc'
    parse_url(url, first_page_parse)

selenium登陆

import random

from selenium import webdriver
import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import ActionChains

chrome_options = Options()
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--disable-gpu')

path = r'/usr/bin/chromedriver'
driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options)
driver.maximize_window()
url = 'https://list.tmall.com/search_product.htm?spm=a220m.1000858.0.0.2c467adcD1Hb2U&s=120&q=%CA%B3%C6%B7&sort=s&style=g&smAreaId=110100&type=pc#J_Filter'
driver.get(url)
# er wei ma
itchat = driver.find_element_by_id('J_Quick2Static')
itchat.click()

# dian ji
time.sleep(3)
coun = driver.find_element_by_id('TPL_username_1')
coun.send_keys('18611138595')
time.sleep(3)
pwd = driver.find_element_by_id('TPL_password_1')
pwd.click()
time.sleep(2)
pwd.send_keys('5921haohao')

# hua kuai
action = ActionChains(driver)
dragger = driver.find_element_by_id('nc_1_n1z')
action.click_and_hold(dragger).perform()
action.release()
# action.move_by_offset(180, 0).perform()
# action.click_and_hold(dragger).move_by_offset(8000, 0).perform()

x = 0
while x < 90:
    x += random.randint(50, 90)
    print(x)
    action.click_and_hold(dragger).move_by_offset(x, 0).perform()

    time.sleep(0.3)
# login
login = driver.find_element_by_id('J_SubmitStatic')
login.click()
time.sleep(3)
driver.quit()
print(driver.page_source)



















# time.sleep(3)
# search = driver.find_element_by_link_text('手机天猫')
# search.click()
# time.sleep(3)
# my_input = driver.find_element_by_id('mq')
# my_input.click()
# my_input.send_keys('shipin')
#
# time.sleep(3)
# search1 = driver.find_element_by_link_text('搜索')
# search1.click()
# driver.quit()


# baidusds
# url = 'http://www.baidu.com/'
# driver.get(url)
# # time.sleep(20)
# driver.save_screenshot('l1.png')
# driver.implicitly_wait(20)
#

# myinput = driver.find_element_by_id('kw')
#
# myinput.send_keys('hello')
# # myinput.send_keys(Keys.ENTER)
# time.sleep(3)
# driver.save_screenshot('l2.png')
#
#
# mybutton = driver.find_element_by_id('su')
# mybutton.click()
# time.sleep(4)
# driver.save_screenshot('l3.png')
# # 找到指定美女点击
# # image = driver.find_element_by_xpath('//div[@id="1"]/div[1]/a[1]')
# # image.click()
# # time.sleep(3)
#
#
# oa = driver.find_element_by_link_text('图片')
# oa.click()
# driver.save_screenshot('l4.png')

自段

class TmallProduct(scrapy.Item):
    name = scrapy.Field()
    url = scrapy.Field()
    sellerId = scrapy.Field()
    price = scrapy.Field()
    comment_num = scrapy.Field()
    sold_num = scrapy.Field()
    shop_name = scrapy.Field()
    brand_id = scrapy.Field()
    item_id = scrapy.Field()
    
class TmallComment(scrapy.Item):
    p_id = scrapy.Field()
    comment_id = scrapy.Field()
    comment_text = scrapy.Field()
    user_name = scrapy.Field()
    parames = scrapy.Field()
    vip_level = scrapy.Field()
    create_time = scrapy.Field()

import datetime
import os
import random
import time

import requests
from lxml import etree
from selenium import webdriver
# import config
import threading

# import numpy as np

mUA_list = [
    'Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Version/11.0 Mobile/15C153 Safari/604.1',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
    'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11',
    'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
    'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)'
]

pcUA = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
mWidth = 1440
mHeight = 2000
PIXEL_RATIO = 3.0

mobileEmulation = {"deviceMetrics": {"width": mWidth, "height": mHeight, "pixelRatio": PIXEL_RATIO},
                   }  # "userAgent": random.choice(pcUA)


# def writelog(msg, log):
#     nt = datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
#     text = "[%s] %s " % (nt, msg)
#     os.system("echo %s >> %s" % (text.encode('utf8'), log))


def create_chrome():
    ops = webdriver.ChromeOptions()
    ops.add_experimental_option('mobileEmulation', mobileEmulation)
    # ops.add_argument('--headless')
    # ops.add_argument('--disable-gpu')

    web = webdriver.Chrome(chrome_options=ops)
    web.set_page_load_timeout(10)
    web.set_script_timeout(10)
    web.set_window_size(mWidth, mHeight)
    return web


driver = create_chrome()
driver.maximize_window()
# print(driver.get_cookies())
# cook = 'cookie'
# value = 'cna=Tj0GFfQd7WoCAX0jSx7nusPu; enc=oRKYDCU5y1m1VFVbIWUMn2QyXwEqt7CA9BI5KpjgZeRE90sX%2FwrUsIKhOECrqEBmKlG%2B1ZTiHV%2BGjobUqVhjTw%3D%3D; _med=dw:375&dh:667&pw:750&ph:1334&ist:0; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; lid=%E6%8F%90%E6%96%AF%E6%8B%89%E7%88%B8%E7%88%B8; hng=CN%7Czh-CN%7CCNY%7C156; t=b07e9de440f1fb0e3ab2b66e17f7b37a; _tb_token_=53fb9ef3e81ee; cookie2=111d252490c66778480de2076c9fc47b; _uab_collina=155193672433852538332314; swfstore=92842; x5sec=7b22746d616c6c7365617263683b32223a226437353135306166616162383530643634326366326565323761303333366338434a4b72672b5146454a6938753671693538535247786f504d6a49774d44637a4d7a51784f4467344e547335227d; tt=login.tmall.com; cq=ccp%3D0; _m_h5_tk=49a86b858caee07c8da2f5b320428fd1_1551958233749; _m_h5_tk_enc=3472a233e78156946253b38bd2380ad3; res=scroll%3A1585*6302-client%3A1585*410-offset%3A1585*6302-screen%3A1600*900; pnm_cku822=098%23E1hv3vvUvbpvUvCkvvvvvjiPRLzy6j1HRF5h6jnEPmPOljnVPLMUAjlnPszZ1jYWRphvCvvvvvvPvpvhvv2MMQyCvhQpdCQvCsN6YPoxdB9aW4c6D704d5YVtnFZTEIOwZkQ0f0DW3CQog0HsXZpejD2AXcBlLyzOvxrtjc6%2BultE8AUDfyTh7QHYWLh0C%2B4Kphv8vvvvvtvpvvvvvv2UhCvCVIvvvW9phvWh9vvvACvpvQXvvv2UhCv2CeivpvUvvmvrCaXk5JEvpvVmvvCvcaVvphvC9v9vvCvp8wCvvpvvUmm; whl=-1%260%260%260; l=bBxv7lDcvBd4CvYzBOfNKdyzzWbOoIRb4sPP7GHipICP_25p7eR5WZ1YC_T9C3GVw1oDR3yzWgmLBeYBY1f..; isg=BPLyKFIq-o2B_8ZOqUcSY3XyQzG5ffDCA6l-SrzLn6WNT5JJpBDcLGHlP6vWP261; uc1=cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&cookie21=Vq8l%2BKCLiYYu&cookie15=VT5L2FSpMGV7TQ%3D%3D&existShop=false&pas=0&cookie14=UoTZ5boqII3%2F1A%3D%3D&tag=8&lng=zh_CN; uc3=vt3=F8dByEvxcemrgAbQKyI%3D&id2=UUphyu7opSokkbNd8Q%3D%3D&nk2=r7Qc2M7TAvy3RA%3D%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D; tracknick=%5Cu63D0%5Cu65AF%5Cu62C9%5Cu7238%5Cu7238; _l_g_=Ug%3D%3D; ck1=""; unb=2200733418885; lgc=%5Cu63D0%5Cu65AF%5Cu62C9%5Cu7238%5Cu7238; cookie1=W80vOuO9AY8m2yPvjGw2CQE%2B%2Bjh7a7z5PnzPvOgtEs0%3D; login=true; cookie17=UUphyu7opSokkbNd8Q%3D%3D; _nk_=%5Cu63D0%5Cu65AF%5Cu62C9%5Cu7238%5Cu7238; uss=""; csg=85c7d2ee; skt=30020c8f212a64b9'

# driver.get('https://list.tmall.com')
driver.get(
    'https://list.tmall.com/search_product.htm?q=iphone&type=p&spm=a220o.0.a2227oh.d100&from=.detail.pc_1_searchbutton')
cookies = {
    'access-control-allow-origin': '*',
    'age': '32161',
    'ali-swift-global-savetime': '1551888350',
    'cache-control': 'max-age=31104000,s-maxage=31104000',
    'content-encoding': 'gzip',
    'content-length': '49246',
    'content-md5': '6A/IvftxPNYVdO4F8j0X9A==',
    'content-type': 'application/javascript',
    'date': 'Wed, 06 Mar 2019 16:05:50 GMT',
    'eagleid': '77f9359615519205117826673e',
    'server': 'Tengine',
    'status': '200',
    'timing-allow-origin': '*',
    'vary': 'Accept-Encoding',
    'via': 'cache15.l2cn859[0,200-0,H], cache39.l2cn859[0,0], cache8.cn213[0,200-0,H], cache2.cn213[1,0]',
    'x-cache': 'HIT TCP_MEM_HIT dirn:12:366660859',
    'x-oss-hash-crc64ecma': '7642643058677396894',
    'x-oss-object-type': 'Normal',
    'x-oss-request-id': '5C7FEFDE1233EC5EC05D75D9',
    'x-oss-server-time': '1',
    'x-oss-storage-class': 'Standard',
    'x-source-scheme': 'https',
    'x-swift-cachetime': '31103968',
    'x-swift-savetime': 'Wed, 06 Mar 2019 16:06:22 GMT',

    'Origin': 'https://list.tmall.com',
    'Referer': 'https://list.tmall.com/search_product.htm?spm=a220m.1000858.0.0.af6e5cb21bM0ce&brand=30111&q=iphone+x&sort=s&style=g&from=.list.pc_1_searchbutton&type=pc',
    'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',

}

driver.add_cookie(cookie_dict=cookies)
# url = 'https://list.tmall.com/search_product.htm?spm=a220m.1000858.0.0.af6e5cb21bM0ce&brand=30111&q=iphone+x&sort=s&style=g&from=.list.pc_1_searchbutton&type=pc'
# url = 'https://detail.tmall.com/item.htm?id=560905201223&skuId=3510680937618&pic=//img.alicdn.com/bao/uploaded/i3/TB1zK1TDOrpK1RjSZFhUNhSdXXa_043502.jpg_560x840Q50s50.jpg_.webp&itemTitle=Apple/%E8%8B%B9%E6%9E%9C%20iPhone%20X&price=6299.00&from=h5'
# driver.get(url=url)
time.sleep(2)




# js = 'document.body.scrollTop=30000'
# driver.execute_script(js)
js = "var q=document.documentElement.scrollTop=100000"
driver.execute_script(js)
time.sleep(4)
content = driver.page_source

# tree = etree.HTML(content)
# # price = tree.xpath('//p[@class="productPrice"]//em/text()')
# prices = tree.xpath('//*[@id="J_listPanel1"]/a//div[2]/div[3]/text()')
# # print(''.join(prices).strip(' '))
# title = tree.xpath('//div[@class="tii_title"]//h3/text()')
# # print(title)
# sales_volume = tree.xpath('//div[@class="ti_info"]//div[3]/span[2]/text()')
# # print(sales_volume)
#
# href_list = tree.xpath('//*[@id="J_listPanel1"]//a/@href')
# # print(href_list)
#
# driver.find_element_by_class_name('ti_img_wrap').click()
# # print(driver.current_url)
# id_str = str(driver.current_url)
# id = ''.join([i for i in id_str.split('=')[2] if i.isdigit()])
# # print(id)
# # driver.save_screenshot('1.png')
#
#
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
# }
#
#
# def request_handler(url):
#     # res = requests.get(url=url, headers=headers)
#     # print(res.text)
#     driver.get(url)
#     time.sleep(3)
#     detail_conteng_parse(driver.page_source)
#
#
# def detail_conteng_parse(content):
#     tree = etree.HTML(content)
#     sales_volume = tree.xpath('//span[@class="sales"]/text()')
#     print(sales_volume)
#     # // *[ @ id = "J_CommentsWrapper"] / ul[1] / li / text()
#
#
# for ul in href_list:
#     url = 'https:' + ul
#     request_handler(url)

middle

# -*- coding: utf-8 -*-

# Define here the models for your spider middleware
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
import random
import time

from scrapy import signals


class TaobaoSpiderMiddleware(object):
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the spider middleware does not modify the
    # passed objects.

    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s

    def process_spider_input(self, response, spider):
        # Called for each response that goes through the spider
        # middleware and into the spider.

        # Should return None or raise an exception.
        return None

    def process_spider_output(self, response, result, spider):
        # Called with the results returned from the Spider, after
        # it has processed the response.

        # Must return an iterable of Request, dict or Item objects.
        for i in result:
            yield i

    def process_spider_exception(self, response, exception, spider):
        # Called when a spider or process_spider_input() method
        # (from other spider middleware) raises an exception.

        # Should return either None or an iterable of Response, dict
        # or Item objects.
        pass

    def process_start_requests(self, start_requests, spider):
        # Called with the start requests of the spider, and works
        # similarly to the process_spider_output() method, except
        # that it doesn’t have a response associated.

        # Must return only requests (not items).
        for r in start_requests:
            yield r

    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)


class TaobaoDownloaderMiddleware(object):
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the downloader middleware does not modify the
    # passed objects.

    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s

    def process_request(self, request, spider):
        # Called for each request that goes through the downloader
        # middleware.

        # Must either:
        # - return None: continue processing this request
        # - or return a Response object
        # - or return a Request object
        # - or raise IgnoreRequest: process_exception() methods of
        #   installed downloader middleware will be called
        return None

    def process_response(self, request, response, spider):
        # Called with the response returned from the downloader.

        # Must either;
        # - return a Response object
        # - return a Request object
        # - or raise IgnoreRequest
        return response

    def process_exception(self, request, exception, spider):
        # Called when a download handler or a process_request()
        # (from other downloader middleware) raises an exception.

        # Must either:
        # - return None: continue processing this exception
        # - return a Response object: stops process_exception() chain
        # - return a Request object: stops process_exception() chain
        pass

    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)


class RandomIPMiddleware(object):
    def __init__(self):
        self.ip_pool = self.getip()


    def getip(self):
        return [
            '218.60.8.99:3129',
            '113.200.56.13:8010',
            '140.143.96.216:80',
        ]

    def process_request(self, request, spider):
        # 随机一个ip
        self.ip = random.choice(self.ip_pool)
        print('*' * 100)
        print('当前使用的ip为--%s--' % self.ip)
        print('*' * 100)
        request.meta['proxy'] = 'http://' + self.ip
        request.meta['download_timeout'] = 5

    def process_exception(self, request, exception, spider):
        print('#' * 100)
        print(exception)
        print('#' * 100)
        # 删除不可用代理
        self.ip_pool.remove(self.ip)
        if len(self.ip_pool) < 5:
            self.ip_pool = self.getip()
        # 让request重新发送
        return request


class RandomUAMiddleware(object):
    def __init__(self):
        self.ua_list = [
            'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
            'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
            'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'
        ]

    # 每次发请求都会调用这个函数
    def process_request(self, request, spider):
        # 从上面的列表中随机抽取一个ua
        ua = random.choice(self.ua_list)
        # print('*' * 100)
        # print('当前使用的ua为--%s--' % ua)
        # print('*' * 100)

        # 给请求添加这个头部
        request.headers.setdefault('User-Agent', ua)




# from lxml import etree
from pyvirtualdisplay import Display

from scrapy import signals
from selenium.common.exceptions import TimeoutException
from scrapy.http import HtmlResponse
from logging import getLogger
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import random
# from selenium.webdriver.common.by import By
# from selenium.webdriver.support import expected_conditions as EC
# from selenium.webdriver.support.ui import WebDriverWait

# from qq.ua import uas_dict
# from qq.utils import Mysql
chrome_n=no_se_n=0

class Selenium_Chrome_Middleware():
    # display = Display(visible=0, size=(800, 800))
    # display.start()
    chrome_options = Options()
    # chrome_options.add_argument('--no-sandbox')
    # chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    # path = r'/home/kou/soft/chromedriver'
    path = r'/usr/bin/chromedriver'
    # browser = webdriver.Chrome(chrome_options=chrome_options)
    driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options)

    def __init__(self):
        self.logger = getLogger(__name__)
        # self.mysql = Mysql()


    def __del__(self):
        self.driver.close()

    def process_request(self,request,spider):
        """
        用PhantomJS抓取页面
        :param request:
        :param spider:
        :return: HtmlResponse
        """

        # if not request.meta.get('index'):

        global chrome_n
        chrome_n += 1

        self.logger.info('chrome_headless浏览器 正在启动...%d' % chrome_n)
        try:
            self.driver.get(request.url)
            # login = self.driver.find_element_by_class_name('forget-pwd').click()
            # login = self.driver.find_element_by_id('J_Static2Quick')
            # login.click()
            time.sleep(3)
            self.driver.save_screenshot('b0.png')
            login = self.driver.find_element_by_id('J_Quick2Static')
            # login = driver.find_element_by_class_name('forget-pwd J_Quick2Static')
            login.click()
            print('kjgkjgk')
            time.sleep(3)
            self.driver.save_screenshot('ta11212.png')
            time.sleep(3)
            # myinput = self.driver.find_element_by_id('kw')
            # 往框里面写内容
            # myinput.send_keys('气质美女')
            # time.sleep(3)
            # self.driver.save_screenshot('baidu1.png')

            # 点击百度一下
            # mybutton = self.driver.find_element_by_id('su')
            # time.sleep(3)
            # mybutton.click()
            # time.sleep(3)
            # self.driver.implicily_wait(10)
            # self.driver.save_screenshot('baidu2.png')
            js = 'document.body.scrollTop=10000'
            self.driver.execute_script(js)
            time.sleep(5)
            self.driver.save_screenshot('douban2.png')
            # time.sleep(4)
            # print('6767'*100)

            return HtmlResponse(url=request.url, body=self.driver.page_source, request=request, encoding='utf-8',status=200)

        except TimeoutException:
            print('6767'*100)
            return HtmlResponse(url=request.url, status=500, request=request)
        # else:
        #
        #     global no_se_n
        #     no_se_n += 1
        #
        #     self.logger.info('不走selenium中间件...%d' % no_se_n)
        #     return None

formdata={＇data＇: ＇{"pageCode":"mallIndex","ua":"Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1","params":"{"url":"https://www.tmall.com/#J_Filter","referrer":"","oneId":null,"fid":"c8kEFncYgv6"}"}＇}

爬虫（2）--- 构建简单代理IP池
目录 1. 何为代理IP池？2. 代理IP池构建2.1 浏览器伪装2.2 代理IP爬取2.3 代理IP验证2.4 ...
匿名程度不同的代理ip
透明代理ip、匿名代理ip、高匿名代理ip这三者有什么不同? 透明代理(TransparentProxies)：服...
如何给自己搭建一个爬虫代理IP池？
本文关键词：爬虫代理IP池,稳定的爬虫代理ip,搭建代理ip池在这篇文章之前, 应该不少人都看过很多搭建代理ip...
js逆向之全网代理IP的爬取
之前搭建IP代理池的时候爬取过全网代理IP，全网代理IP免费的代理虽然只有首页的20个代理，但是可用程度非常高，可...
电脑修改不同地区网络IP方法！
什么是IP代理，代理IP又是怎么修改IP的呢，我们看看IP代理的原理与应用及实用的范围。【兔子IP】每一台电脑都...
在价格上代理ip的划分
要选择代理ip，用户不仅要选择ip类型，在价格方面，ip代理也有免费以及付费的区分。现在的http代理ip主要有...
代理多ip服务器可以抢购、刷人气
在网络上我们经常能看到“代理IP”这个名词，那么什么是代理IP呢?其实很好理解，代理IP就是将用户的真实IP进行替...
scrapy1.5 代理设置
1、下载设置代理ip2、正常访问设置代理ip 下载设置代理ip 1、设置setting 2、编写代码正常访问设置...
代理
同一ip单位时间访问次数过多，被封ip，需要借助代理伪装ip进行爬取。代理设置 request设置代理 sele...
国内ip代理软件推荐
国内IP代理软件哪个好用稳定IP地址多同时支持电脑手机代理IP软件哪个比较好用，如果有用过代理Ip软件的朋友肯定...

代理IP

移动端wraing报错SSL： https://blog.csdn.net/zahuopuboss/article/details/52964809

代理IP：http://h.zhimaruanjian.com/getapi/

selenium and 移动

selenium登陆

自段

相关文章

爬虫（2）--- 构建简单代理IP池

匿名程度不同的代理ip

如何给自己搭建一个爬虫代理IP池？

js逆向之全网代理IP的爬取

电脑修改不同地区网络IP方法！

在价格上代理ip的划分

代理多ip服务器可以抢购、刷人气

scrapy1.5 代理设置

代理

国内ip代理软件推荐

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读