美文网首页
python爬虫之selenium知乎小Demo

python爬虫之selenium知乎小Demo

作者: Pickupthesmokes | 来源:发表于2018-12-30 01:03 被阅读0次
    from selenium import webdriver
    import time
    from PIL import Image
    from io import BytesIO
    import base64
    from zheye import zheye
    from pytesseract import image_to_string
    from selenium.webdriver import ActionChains
    from selenium.common.exceptions import NoSuchElementException
    driver = webdriver.Chrome(executable_path='/home/ljh/桌面/driver/chromedriver')
    # driver = webdriver.Firefox(executable_path='/home/ljh/桌面/driver/geckodriver')
    driver.get('https://www.zhihu.com/signup?next=%2F')
    driver.find_element_by_xpath('//div[@class="SignContainer-    switch"]/span').click()
    time.sleep(3)
    
    driver.find_element_by_xpath('//input[@name="username"]').send_keys('18518753265')
    
    driver.find_element_by_xpath('//input[@name="password"]').send_keys('ljh12345678')
    driver.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click()
    time.sleep(3)
      try:
        image_element = driver.find_element_by_xpath('//img[@class="Captcha-chineseImg"]')
    location = image_element.location
    size = image_element.size
    image_data = image_element.get_attribute('src').replace('data:image/jpg;base64,','').replace('%0A','\n')
    chineseImg = base64.b64decode(image_data)
    with open('chineseImg.gif','wb') as file:
        file.write(chineseImg)
    #识别倒立文字验证码
    z = zheye()
    #获得识别结果
    postions = z.Recognize('chineseImg.gif')
    print('zheye',postions)
    for location in postions:
        print(location)
    ActionChains(driver).move_to_element_with_offset(image_element,int(location[1])/2,int(location[0])/2).click().perform()
        time.sleep(1)
    time.sleep(5)
    driver.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click()
    except NoSuchElementException:
    print('不是倒立验证码,是正常的英文字母验证码')
    # 图形验证码
    image_element = driver.find_element_by_xpath('//img[@class="Captcha-englishImg"]')
    image_data = image_element.get_attribute('src').replace('data:image/jpg;base64,', '').replace('%0A', '\n')
    image_data = base64.b64decode(image_data)
    with open('englishImg.gif','wb') as file:
        file.write(image_data)
    image = Image.open('englishImg.gif')
    image_str = image_to_string(image)
    print(image_str)
    #这里采用打码平台识别就可以了

    相关文章

      网友评论

          本文标题:python爬虫之selenium知乎小Demo

          本文链接:https://www.haomeiwen.com/subject/evmmlqtx.html