美文网首页
知乎登录脚本

知乎登录脚本

作者: 异同 | 来源:发表于2019-11-14 15:03 被阅读0次
    import time
    from PIL import Image
    from numpy import argmax
    from selenium import webdriver
    from selenium.webdriver import ActionChains
    from selenium.webdriver.common.keys import Keys
    
    
    class Zhihu:
        def __init__(self,
                     url='https://www.zhihu.com/',
                     exec_path="c:/users/python_package/chromedriver.exe"
                     ):
            self._login_failed = 1  # flag
            self._sig_err = 0  # flag
            self._init_webdriver(exec_path)
            try:
                self._login(url)
            finally:
                while 1:
                    try:
                        handles = self.browser.window_handles
                        # time.sleep(5)
                    except:
                        break
                self.browser.quit()
    
        def _init_webdriver(self, exec_path, other_options=None):
            options = webdriver.ChromeOptions()
            # 添加其他webdriver配置选项
            if isinstance(other_options, list):
                for each in other_options:
                    options.add_argument(each)
            if isinstance(other_options, str):
                options.add_argument(other_options)
            # 隐藏window.navigator.webdriver(Chromedriver)
            options.add_experimental_option('excludeSwitches', ['enable-automation'])
            # options.add_argument('--headless') #隐藏浏览器界面
            # 实例化Chrome对象
            self.browser = webdriver.Chrome(executable_path=exec_path, options=options)
    
        def _login(self, url):
            self.browser.get(url)
            # 切换至密码登录界面
            self.browser.find_element_by_xpath('//div[@class="SignFlow-tabs"]/div[2]').click()
    
            while self._login_failed:
                current_page = self.browser.current_url
                if self._sig_err == 0:
                    self._input()
                self._click_login_button()
                err = self._err_catch()
                print(err)
                if '验证码' in err or '倒立的文字' in err:
                    self._sig_err = 1
                    self._sign_process()
                elif len(err) > 0:
                    self._sig_err = 0
                    print('请重新输入账号密码.\n')
                    continue
                else:
                    pass
                if current_page != self.browser.current_url: 
                    self._login_failed = 0
            print('Redirecting..')
    
        def _sign_process(self):
            # time.sleep(1)  # 睡眠1s等待验证码加载
            try:
                signimg = self.browser.find_element_by_xpath('//img[@alt="图形验证码"]')  # 查看是否正常加载验证码
                ce = signimg.get_attribute("class")  # 验证码类型
                self.browser.save_screenshot('capture.png')  # 保存登录页截图
                self.capture_sign(signimg.location['x'], signimg.location['y'], ce)  # 截取验证码区域用于后续处理
                if ce == 'Captcha-englishImg':  # 输入英文验证码
                    signinput = self.browser.find_element_by_xpath('//div[@class="SignFlowInput"]//input[@name="captcha"]')
                    x = input('输入验证码:\n')
                    signinput.send_keys(x)
                else:  # Captcha-chineseImg 点击反向中文文字
                    space = 200 // 8
                    x = input('输入倒立文字序号(base1),多个则用逗号分隔\n')
                    if ',' in x or ',' in x:
                        list_err = x.replace(',', ',').split(',')
                        mv_x = signimg.location['x'] + space * int(list_err[0]) - 1 / space
                        x_total = mv_x
                        mv_y = signimg.location['y'] + 22
                        y_total = mv_y
                        ActionChains(self.browser).move_by_offset(mv_x, mv_y).click().perform()
                        for i, each in enumerate(list_err):
                            if i == 0: 
                                continue
                            mv_x = space * (int(list_err[i]) - int(list_err[0]))
                            x_total += mv_x
                            ActionChains(self.browser).move_by_offset(mv_x, 0).click().perform()
                    else:
                        mv_x = signimg.location['x'] + space * int(x) - 1 / space
                        mv_y = signimg.location['y'] + 22
                        x_total = mv_x
                        y_total = mv_y
                        ActionChains(self.browser).move_by_offset(mv_x, mv_y).click().perform()
                    ActionChains(self.browser).move_by_offset(x_total, y_total).perform()
            except:
                print('未能成功加载验证码.')
    
        def _input(self):
            usr = input("username: ")
            pwd = input("password: ")
            u = self.browser.find_element_by_xpath('//input[@name="username"]')
            u.send_keys(Keys.CONTROL, "a")  # clear input form
            u.send_keys(Keys.DELETE)
            u.send_keys(usr)
            p = self.browser.find_element_by_xpath('//input[@name="password"]')
            p.send_keys(Keys.CONTROL, "a")  # clear input form
            p.send_keys(Keys.DELETE)
            p.send_keys(pwd)
    
        def sig_cv(self):
            pass
    
        def _click_login_button(self):
            self.browser.find_element_by_xpath('//button[contains(@class,"SignFlow-submitButton")]').click()
    
        def _err_catch(self):
            def _try_find(porp, cname):
                time.sleep(1)
                try:
                    xpath_str = '//{}[contains(@class,"{}")]'.format(porp, cname)
                    err = self.browser.find_elements_by_xpath(xpath_str)
                    for each in err:
                        if len(each.text) > 0:
                            return each.text
                    return ''
                except:
                    return ''
    
            input_err = _try_find('div', 'SignFlowInput-errorMask')  # 账号密码错误
            sign_err_e = _try_find('div', 'Captcha-errorMessage')  # 英文验证码错误-请提交正确的验证码 :(
            sign_err_c = _try_find('span', 'Captcha-error')  # 中文验证码错误-请提交正确的验证码 :(
            errs = [input_err, sign_err_e, sign_err_c]
            index = argmax([len(each) for each in errs])
            return errs[int(index)]
    
        def capture_sign(self, x, y, sign_type, path='capture.png'):
            '''
            验证码处理
            :param x: sign loc_x
            :param y: sign loc_y
            :param sign_type: english(input form) or chinese(click inverse words)
            :param path: capture path
            :return: None
            '''
            img = Image.open(path)
            if sign_type == 'Captcha-englishImg':
                box = (x, y, x + 75, y + 30)  # 左、上、右、下
            else:  # Captcha-chineseImg
                box = (x, y, x + 200, y + 44)
            region = img.crop(box)
            region.save('sign.png')
            Image.open('sign.png').show()
    
    
    a = Zhihu()
    

    相关文章

      网友评论

          本文标题:知乎登录脚本

          本文链接:https://www.haomeiwen.com/subject/obyrictx.html