美文网首页
Python 爬虫 - 验证码识别

Python 爬虫 - 验证码识别

作者: 莫名ypc | 来源:发表于2019-01-08 16:37 被阅读0次
import time
from io import BytesIO

from PIL import Image
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from chaojiying import main1

chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless')
browser = webdriver.Chrome(chrome_options=chrome_options)

screen_width = 1400
screen_height = 700

# browser = webdriver.Chrome()
browser.set_window_size(screen_width, screen_height)
# 显式等待 针对某个节点的等待
wait = WebDriverWait(browser, 5)


# 取浏览器窗口内全图
def get_big_image():
    # browser.execute_script('window.scrollTo(0, 300)')
    screenshot = browser.get_screenshot_as_png()
    screenshot = Image.open(BytesIO(screenshot))
    return screenshot


def get_captha_position():
    captha = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#captchaImg')))
    location = captha.location
    size = captha.size
    x1 = location['x']
    y1 = location['y']
    width = size['width']
    height = size['height']
    x2 = x1 + width
    y2 = y1 + height
    print(x1, y1, x2, y2)
    print(width, height)
    return (x1, y1, x2, y2)


def get_page():
    url = 'https://login.10086.cn/html/login/login.html?channelID=12002&backUrl=https%3A%2F%2Fshop.10086.cn%2Fmall_280_280.html%3Fforcelogin%3D1#'
    browser.get(url)

    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#mail_login_2')))
    button.click()

    username = 'ypc624@qq.com'
    password = 'ypc426069'
    input_username = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#e_name')))
    input_password = wait.until(EC.presence_of_element_located
                                 ((By.CSS_SELECTOR, '#e_pwd')))
    input_username.clear()
    input_username.send_keys(username)
    input_password.clear()
    input_password.send_keys(password)
    time.sleep(3)

    full_image = get_big_image()
    print(full_image.width, full_image.height)
    full_image.save('mobile_full_image.png')

    img_width = full_image.width
    img_height = full_image.height

    width_ratio = img_width / screen_width
    height_ratio = img_height / screen_height

    print(width_ratio)

    # 获取验证码左上角和右下角的坐标
    x1, y1, x2, y2 = get_captha_position()
    x1, y1, x2, y2 = (x1 * width_ratio, y1, x2 * width_ratio, y2)

    print(x1, y1, x2, y2)

    captha_img = full_image.crop((x1, y1, x2, y2))
    captha_img.save('mobile_captcha.png')
    captha_str = main1('mobile_captcha.png')
    print(captha_str)

    input_code = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#inputCode')))
    input_code.clear()
    input_code.send_keys(captha_str)

    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#submit_bt')))
    button.click()




def main():
    html = get_page()


if __name__ == '__main__':
    main()

相关文章

网友评论

      本文标题:Python 爬虫 - 验证码识别

      本文链接:https://www.haomeiwen.com/subject/cxiyrqtx.html