美文网首页
爬虫验证码之--微博宫格验证码的识别

爬虫验证码之--微博宫格验证码的识别

作者: strive鱼 | 来源:发表于2018-07-04 20:14 被阅读0次

    本文要识别的验证码类型为宫格验证,主要以微博为主,多次登陆微博(https://passport.weibo.cn/signin/login)则会出现该类验证码

    本文的重点知识还是selenium 的使用

    import os
    import time
    from io import BytesIO
    from PIL import Image
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from os import listdir
    
    USERNAME = 'xxxxxx'
    PASSWORD = 'xxxxxxx'
    
    TEMPLATES_FOLDER = 'templates/'
    
    
    class CrackWeiboSlide():
        def __init__(self):
            self.url = 'https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/'
            self.browser = webdriver.Chrome()
            self.wait = WebDriverWait(self.browser, 20)
            self.username = USERNAME
            self.password = PASSWORD
        
        def __del__(self):
            self.browser.close()
        
        def open(self):
            """
            打开网页输入用户名密码并点击
            :return: None
            """
            self.browser.get(self.url)
            username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginName')))
            password = self.wait.until(EC.presence_of_element_located((By.ID, 'loginPassword')))
            submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))
            username.send_keys(self.username)
            password.send_keys(self.password)
            submit.click()
        
        def get_position(self):
            """
            获取验证码位置
            :return: 验证码位置元组
            """
            try:
                img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'patt-shadow')))
            except TimeoutException:
                print('未出现验证码')
                self.open()
            time.sleep(2)
            location = img.location
            size = img.size
            top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
                'width']
            return (top, bottom, left, right)
        
        def get_screenshot(self):
            """
            获取网页截图
            :return: 截图对象
            """
            screenshot = self.browser.get_screenshot_as_png()
            screenshot = Image.open(BytesIO(screenshot))
            return screenshot
        
        def get_image(self, name='captcha.png'):
            """
            获取验证码图片
            :return: 图片对象
            """
            top, bottom, left, right = self.get_position()
            print('验证码位置', top, bottom, left, right)
            screenshot = self.get_screenshot()
            captcha = screenshot.crop((left, top, right, bottom))
            captcha.save(name)
            return captcha
        
        def is_pixel_equal(self,image1,image2,x,y):#判断刷新出来的验证码图片和已经存储好的验证码模板是否匹配
            pixel1=image1.load()[x,y]#返回的是RGB形式的元组
            pixel2=image2.load()[x,y]
            threshold=20#设定一个阈值
            if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
                    pixel1[2] - pixel2[2]) < threshold:
                return True 
            else:
                return False
            
        def same_image(self,image,template):#第一个为待识别的验证码,第二个为存储的模板,只要是比对一下各个点的像素
                count=0#用于存储像素相同点的个数
                threshold=0.99#阈值,用于标定两张图表是否相同
                for i in range(image.width):
                    for j in range(image.height):
                        if self.is_pixel_equal(image,template,i,j):#如果像素相同
                            count+=1
                result=float(count)/(image.width*image.height)#判断相同的像素点的占比
                if result>threshold:
                    print ('成功匹配')
                    return True 
                else:
                    return False
            
        
         def detect_image(self,image):
             for template_name in listdir(TEMPLATES_FOLDER):
                print('正在匹配', template_name)
                template = Image.open(TEMPLATES_FOLDER + template_name)
                if self.same_image(image, template):
                    # 返回顺序
                    numbers = [int(number) for number in list(template_name.split('.')[0])]
                    print('拖动顺序', numbers)
                    return numbers
        
        def move(self,numbers):
            circles=self.browser.find_elements_by_css_selector('.patt-wrap .patt-circ')#获得的是四个按钮的列表
            dx=dy=0#初始一个原始的初始移动位置
            for index in range(4):
                circle=circles[numbers[index]-1]#numbers是1-4的一个列表
                # 如果是第一次循环
                if index == 0:
                    # 点击第一个按点
                    ActionChains(self.browser) \
                        .move_to_element_with_offset(circle, circle.size['width'] / 2, circle.size['height'] / 2) \
                        .click_and_hold().perform()
                else:
                    # 小幅移动次数
                    times = 30
                    # 拖动
                    for i in range(times):
                        ActionChains(self.browser).move_by_offset(dx / times, dy / times).perform()#当dx=dy=0的时候,index=0,不会执行这一句
                        time.sleep(1 / times)
                # 如果是最后一次循环
                if index == 3:
                    # 松开鼠标
                    ActionChains(self.browser).release().perform()
                else:
                    # 计算下一次偏移
                    dx = circles[numbers[index + 1] - 1].location['x'] - circle.location['x']
                    dy = circles[numbers[index + 1] - 1].location['y'] - circle.location['y']
                    
        def crack(self):
            """
            破解入口
            :return:
            """
            self.open()
            # 获取验证码图片
            image = self.get_image('captcha.png')
            numbers = self.detect_image(image)
            self.move(numbers)
            time.sleep(10)
            print('识别结束')
                
                        
                
            
        
         def main(self):
            count=0 
            while True:
                self.open()
                self.get_image(str(count)+'.png')
                count+=1
                
                
    if __name__=='__main__':
        crack= CrackWeiboSlide()
        crack.crack()
    

    相关文章

      网友评论

          本文标题:爬虫验证码之--微博宫格验证码的识别

          本文链接:https://www.haomeiwen.com/subject/yckyuftx.html