美文网首页
验证码破解 | Selenium模拟登陆微博

验证码破解 | Selenium模拟登陆微博

作者: 生信师姐 | 来源:发表于2020-05-16 07:44 被阅读0次

    模拟登陆微博相对来说,并不难。验证码是常规的5个随机数字字母的组合,识别起来也比较容易。主要是用到许多Selenium中的知识,如定位标签、输入信息、点击等。

    破解微博登陆的思路:
    (1)使用webdriver打开微博网页;
    (2)输入用户名和密码,点击登录;
    (3)对第二步的结果进行判断

    • 情况一:用户名或者密码错误
    • 情况二:登录成功
    • 情况三:出现验证码图片,需识别
    • 情况四:其他错误
      (4)本例中增加了登录成功后获得cookies的情况
    import requests
    from requests import RequestException
    from selenium import webdriver
    from selenium.common.exceptions import NoSuchElementException, TimeoutException
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from chaojiying import Chaojiying
    
    
    # 超级鹰用户名、密码、软件ID、
    CHAOJIYING_USERNAME =
    CHAOJIYING_PASSWORD =
    CHAOJIYING_SOFT_ID =
    CHAOJIYING_KIND = 1006
    
    
    class LoginWeibo():
    
        def __init__(self, username, password):
            self.url = 'https://www.weibo.com'
            self.browser = webdriver.Chrome(executable_path='D:\download\pythonRelated\chromedriver.exe')
            self.wait = WebDriverWait(self.browser, 20)
            self.username = username
            self.password = password
            self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)
    
        # def __del__(self):
        #     self.browser.close()
    
        def open(self):
            """
            打开网页输入用户名密码
            :return: None
            """
            self.browser.get(self.url)
            username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginname')))
            password = self.wait.until(EC.presence_of_element_located((By.NAME, 'password')))
            username.send_keys(self.username)
            password.send_keys(self.password)
    
        def get_click_button(self):
            '''
            找到登录按钮
            :return:
            '''
            '''
            <a href="javascript:void(0)" class="W_btn_a btn_32px " action-type="btn_submit" node-type="submitBtn" suda-data="key=tblog_weibologin3&amp;value=click_sign" tabindex="6"><span node-type="submitStates">登录</span></a>
            '''
            button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'W_btn_a')))
            return button
    
        def login_successfully(self):
            """
            判断登陆是否成功
            :return:
            """
            '''
            登录成功才能看到
            <em class="W_ficon ficon_mail S_ficon">I</em>
            '''
            try:
                return bool(
                    WebDriverWait(self.browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ficon_mail')))
                )
            except TimeoutException:
                return False
    
        def get_click_image(self, name='captcha.png'):
            """
            获取验证码图片
            :param name:
            :return: 图片对象
            """
            try:
                '''
                <img width="95" height="34" action-type="btn_change_verifycode" node-type="verifycode_image" src="https://login.sina.com.cn/cgi/pin.php?r=88815771&amp;s=0&amp;p=gz-66c0488ef9191010d88bea8c9f3a09fdf3bf">
                '''
                element = self.wait.until(
                    EC.presence_of_element_located((By.XPATH, '//img[@action-type="btn_change_verifycode"]')))
                image_url = element.get_attribute('src')
                image = get_html(image_url).content
                with open(name, 'wb') as f:
                    f.write(image)
                return image
            except NoSuchElementException:
                print('')
            return None
    
        def password_error(self):
            """
            判断是否密码错误
            :return:
            """
            try:
                element = WebDriverWait(self.browser, 5).until(
                    EC.presence_of_element_located((By.XPATH, '//div[@class="W_layer W_layer_pop"]/div/p/span[2]')))
                print(element.text)
                if element.text == '用户名或密码错误。':
                    return True
            except TimeoutException:
                return False
    
        def get_cookies(self):
            """
            获取Cookies
            :return:
            """
            print(self.browser.get_cookies())
            return self.browser.get_cookies()
    
        def login(self):
    
            # 1. 打开网址 输入用户名和密码
            self.open()
    
            # 2. 点击登录按钮
            button = self.get_click_button()
            button.click()
    
            if self.password_error():
                print('用户名或密码错误')
                return {
                    'status': 2,
                    'content': '用户名或密码错误'
                }
            if self.login_successfully():
                print('登录成功')
                # 获取帐号对应的cookies
                cookies = self.get_cookies()
                return {
                    'status': 1,
                    'content': cookies
                }
            else:                                   # 有时会需要验证码
                # 获取验证码图片
                image = self.get_click_image()
    
                # 识别验证码
                result = self.chaojiying.post_pic(image, CHAOJIYING_KIND)
                print(result)
    
                # 输入验证码
                '''
                <input type="text" class="W_input " maxlength="6" autocomplete="off" value="验证码" action-data="text=请输入验证码" action-type="text_copy" name="verifycode" node-type="verifycode" tabindex="3">
                '''
                verifycode = self.wait.until(EC.presence_of_element_located((By.NAME, 'verifycode')))
                verifycode.send_keys(result['pic_str'])
    
                # 点击登录按钮
                button = self.get_click_button()
                button.click()
                if self.login_successfully():
                    print('登录成功')
                    # 获取帐号对应的cookies
                    cookies = self.get_cookies()
                    return {
                        'status': 1,
                        'content': cookies
                    }
                else:
                    self.chaojiying.report_error(result['pic_id'])
                    self.login()
                    # return {
                    #     'status': 3,
                    #     'content': '登录失败'
                    # }
    
    
    def get_html(url):
        try:
            # 添加User-Agent,放在headers中,伪装成浏览器
            headers = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
            }
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                response.encoding = response.apparent_encoding
                return response
            return None
        except RequestException:
            return None
    
    
    if __name__ == '__main__':
        result = LoginWeibo('username', 'password').login()
    

    本篇博文仅供学习交流相关的爬虫知识,请勿过度使用,如有任何纠纷,与本人无关。(瑟瑟发抖)

    相关文章

      网友评论

          本文标题:验证码破解 | Selenium模拟登陆微博

          本文链接:https://www.haomeiwen.com/subject/yemwihtx.html