Python 爬虫 - 验证码识别

作者: 莫名ypc | 来源:发表于2019-01-08 16:37 被阅读0次

python利用Tesseract识别验证码的方法
python验证码识别模块
爬虫-3 网站验证码识别&破解
Python 爬虫 - 验证码识别
python图像识别
Python+OCR图片文字识别，验证码识别，银行卡识别
Python爬虫学习--Python爬虫模拟登录带验证码网站
爬虫：9. 验证码识别
反爬必修课之----(3)极验滑动验证码识别
反爬必修课之----(4)点触验证码识别

import time
from io import BytesIO

from PIL import Image
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from chaojiying import main1

chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless')
browser = webdriver.Chrome(chrome_options=chrome_options)

screen_width = 1400
screen_height = 700

# browser = webdriver.Chrome()
browser.set_window_size(screen_width, screen_height)
# 显式等待 针对某个节点的等待
wait = WebDriverWait(browser, 5)


# 取浏览器窗口内全图
def get_big_image():
    # browser.execute_script('window.scrollTo(0, 300)')
    screenshot = browser.get_screenshot_as_png()
    screenshot = Image.open(BytesIO(screenshot))
    return screenshot


def get_captha_position():
    captha = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#captchaImg')))
    location = captha.location
    size = captha.size
    x1 = location['x']
    y1 = location['y']
    width = size['width']
    height = size['height']
    x2 = x1 + width
    y2 = y1 + height
    print(x1, y1, x2, y2)
    print(width, height)
    return (x1, y1, x2, y2)


def get_page():
    url = 'https://login.10086.cn/html/login/login.html?channelID=12002&backUrl=https%3A%2F%2Fshop.10086.cn%2Fmall_280_280.html%3Fforcelogin%3D1#'
    browser.get(url)

    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#mail_login_2')))
    button.click()

    username = 'ypc624@qq.com'
    password = 'ypc426069'
    input_username = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#e_name')))
    input_password = wait.until(EC.presence_of_element_located
                                 ((By.CSS_SELECTOR, '#e_pwd')))
    input_username.clear()
    input_username.send_keys(username)
    input_password.clear()
    input_password.send_keys(password)
    time.sleep(3)

    full_image = get_big_image()
    print(full_image.width, full_image.height)
    full_image.save('mobile_full_image.png')

    img_width = full_image.width
    img_height = full_image.height

    width_ratio = img_width / screen_width
    height_ratio = img_height / screen_height

    print(width_ratio)

    # 获取验证码左上角和右下角的坐标
    x1, y1, x2, y2 = get_captha_position()
    x1, y1, x2, y2 = (x1 * width_ratio, y1, x2 * width_ratio, y2)

    print(x1, y1, x2, y2)

    captha_img = full_image.crop((x1, y1, x2, y2))
    captha_img.save('mobile_captcha.png')
    captha_str = main1('mobile_captcha.png')
    print(captha_str)

    input_code = wait.until(EC.presence_of_element_located
                                ((By.CSS_SELECTOR, '#inputCode')))
    input_code.clear()
    input_code.send_keys(captha_str)

    button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#submit_bt')))
    button.click()




def main():
    html = get_page()


if __name__ == '__main__':
    main()