美文网首页
day70-验证码识别

day70-验证码识别

作者: barriers | 来源:发表于2019-02-22 17:22 被阅读0次

    在验证码的图像验证码识别中以前经常用到tesseract;但它的识别率较低,故现在已不常用。

    pip install tesseract 安装三方库
    

    现在用于普通图像验证码识别的主要有pillow图像处理库及与超级鹰第三方网站联合使用。

    pip install pillow
    

    1考生之家验证码识别

    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from urllib.parse import quote
    from lxml import etree
    from PIL import Image
    from io import BytesIO
    from chaojiying import main1
    import time
    
    chrome_options = webdriver.ChromeOptions()
    # chrome_options.add_argument('--headless') # 无头浏览器,如果使用,将不打开浏览器,但功能都会正常执行
    browser = webdriver.Chrome(chrome_options=chrome_options)
    
    # browser = webdriver.Chrome()
    browser.set_window_size(1400, 700)
    # 显式等待 针对某个节点的等待
    wait = WebDriverWait(browser, 10)    
    def get_page():
        url = 'http://bm.e21cn.com/log/reg.aspx'
        browser.get(url)
        html = browser.page_source
        return html    
    # 取浏览器窗口内全图
    def get_big_image():
        browser.execute_script('window.scrollTo(0, 300)')
        screenshot = browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screenshot))
        return screenshot    
    # 取验证码坐标位置(左上角和右下角)
    def get_position():
        img = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#imgCheckCode')))
        loc = img.location
        size = img.size
        print(loc)
        print(size)
        x1 = loc['x']
        # 记住减去滚动高度
        y1 = loc['y'] - 300
        x2 = loc['x'] + size['width']
        y2 = y1 + size['height']
        return (x1, y1, x2, y2)    
    def parse_html(html):
        # etree_html = etree.HTML(html)
        screenshot = get_big_image()
        screenshot.save('full_screen.png')    
        x1, y1, x2, y2 = get_position()
        crop_image = screenshot.crop((x1, y1, x2, y2))
        file_name = 'crop.png'
        crop_image.save(file_name)
        captha_str = main1(file_name)        
        username = '剑圣'
        password = '123456'
        tel = '18362537333'    
        print(captha_str)    
        input_username = wait.until(EC.presence_of_element_located
                           ((By.CSS_SELECTOR, 'input#username')))
        input_password1 = wait.until(EC.presence_of_element_located
                           ((By.CSS_SELECTOR, 'input#pwd')))
        input_password2 = wait.until(EC.presence_of_element_located
                                     ((By.CSS_SELECTOR, 'input#pwd_Q')))
        input_tel = wait.until(EC.presence_of_element_located
                                     ((By.CSS_SELECTOR, 'input#tel')))
        input_check = wait.until(EC.presence_of_element_located
                                     ((By.CSS_SELECTOR, 'input#CheckCode')))
        sublime = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input#btn_login')))
        input_username.send_keys(username)
        input_password1.send_keys(password)
        input_password2.send_keys(password)
        input_tel.send_keys(tel)
        input_check.send_keys(captha_str)
        time.sleep(2)
        sublime.click()    
    def main():
        html = get_page()
        parse_html(html)    
    if __name__ == '__main__':
        main()
    

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless') # 无头浏览器,如果使用,将不打开浏览器,但功能都会正常执行
    browser = webdriver.Chrome(chrome_options=chrome_options)
    browser.execute_script('window.scrollTo(0, 300)')所获取的输入框及验证码框在屏幕上不能全部展示,故将页面向下移动300像素。
    screenshot = browser.get_screenshot_as_png()
    screenshot = Image.open(BytesIO(screenshot)) 对页面进行截图
    y1 = loc['y'] - 300 对验证码进行定位的过程中由于向下移动了300,故需要将其减掉。来获取验证码的准确位置。

    2移动登陆网页验证码获取

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from PIL import Image
    from io import BytesIO
    from chaojiying import main1
    import time
        
    chrome_options = webdriver.ChromeOptions()
    browser = webdriver.Chrome(chrome_options=chrome_options)
    # 设置屏幕大小及加载等待时间
    browser.set_window_size(1366, 768)
    wait = WebDriverWait(browser, 3)
    # 获取网页
    def get_page():
        url = 'https://login.10086.cn/html/register/register.html'
        browser.get(url)
        html = browser.page_source
        return html        
    # 截全屏
    def get_big_image():
        # browser.execute_script('window.scrollTo(0, 300)')
        screenshot = browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screenshot))
        return screenshot        
    # 取验证码坐标位置(左上角和右下角)
    def get_position():
        img = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#captchaImg')))
        loc = img.location
        size = img.size
        print(loc)
        print(size)
        x1 = loc['x']
        # 记住减去滚动高度
        y1 = loc['y']
        x2 = loc['x'] + size['width']
        y2 = y1 + size['height']
        return (x1, y1, x2, y2)        
    # 解析网页
    def parse_html(html):
        # 获取屏幕截图
        screenshot = get_big_image()
        # 保存屏幕截图
        screenshot.save('full_screen.png')
        # 获取验证码坐标
        x1, y1, x2, y2 = get_position()
        # 截小图并保存
        crop_image = screenshot.crop((x1, y1, x2, y2))
        file_name = 'crop.png'
        crop_image.save(file_name)
        # 调用超级鹰解析验证码
        captha_str = main1(file_name)    
        username = '1382@cdn.com'
        password = '123456'    
        print(captha_str)    
        input_username = wait.until(EC.presence_of_element_located
                                    ((By.CSS_SELECTOR, 'input#loginName')))
        input_password1 = wait.until(EC.presence_of_element_located
                                     ((By.CSS_SELECTOR, 'input#newPassword')))
        input_password2 = wait.until(EC.presence_of_element_located
                                     ((By.CSS_SELECTOR, 'input#newPasswordRepeat')))
        input_check = wait.until(EC.presence_of_element_located
                                 ((By.CSS_SELECTOR, 'input#inputCode')))
        sublime = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input#regSub')))
        input_username.send_keys(username)
        input_password1.send_keys(password)
        input_password2.send_keys(password)
        input_check.send_keys(captha_str)
        time.sleep(2)
        sublime.click()        
    def main():
        html = get_page()
        parse_html(html)    
    if __name__ == '__main__':
        main()
    

    screenshot = get_big_image() 获取屏幕截图
    screenshot.save('full_screen.png') 保存屏幕截图
    获取验证码坐标
    x1, y1, x2, y2 = get_position()
    crop_image = screenshot.crop((x1, y1, x2, y2)) 截小图并保存
    file_name = 'crop.png'
    crop_image.save(file_name)
    captha_str = main1(file_name) 传入小图并调用超级鹰解析验证码

    相关文章

      网友评论

          本文标题:day70-验证码识别

          本文链接:https://www.haomeiwen.com/subject/ujweyqtx.html