import time
from PIL import Image
from numpy import argmax
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
class Zhihu:
def __init__(self,
url='https://www.zhihu.com/',
exec_path="c:/users/python_package/chromedriver.exe"
):
self._login_failed = 1 # flag
self._sig_err = 0 # flag
self._init_webdriver(exec_path)
try:
self._login(url)
finally:
while 1:
try:
handles = self.browser.window_handles
# time.sleep(5)
except:
break
self.browser.quit()
def _init_webdriver(self, exec_path, other_options=None):
options = webdriver.ChromeOptions()
# 添加其他webdriver配置选项
if isinstance(other_options, list):
for each in other_options:
options.add_argument(each)
if isinstance(other_options, str):
options.add_argument(other_options)
# 隐藏window.navigator.webdriver(Chromedriver)
options.add_experimental_option('excludeSwitches', ['enable-automation'])
# options.add_argument('--headless') #隐藏浏览器界面
# 实例化Chrome对象
self.browser = webdriver.Chrome(executable_path=exec_path, options=options)
def _login(self, url):
self.browser.get(url)
# 切换至密码登录界面
self.browser.find_element_by_xpath('//div[@class="SignFlow-tabs"]/div[2]').click()
while self._login_failed:
current_page = self.browser.current_url
if self._sig_err == 0:
self._input()
self._click_login_button()
err = self._err_catch()
print(err)
if '验证码' in err or '倒立的文字' in err:
self._sig_err = 1
self._sign_process()
elif len(err) > 0:
self._sig_err = 0
print('请重新输入账号密码.\n')
continue
else:
pass
if current_page != self.browser.current_url:
self._login_failed = 0
print('Redirecting..')
def _sign_process(self):
# time.sleep(1) # 睡眠1s等待验证码加载
try:
signimg = self.browser.find_element_by_xpath('//img[@alt="图形验证码"]') # 查看是否正常加载验证码
ce = signimg.get_attribute("class") # 验证码类型
self.browser.save_screenshot('capture.png') # 保存登录页截图
self.capture_sign(signimg.location['x'], signimg.location['y'], ce) # 截取验证码区域用于后续处理
if ce == 'Captcha-englishImg': # 输入英文验证码
signinput = self.browser.find_element_by_xpath('//div[@class="SignFlowInput"]//input[@name="captcha"]')
x = input('输入验证码:\n')
signinput.send_keys(x)
else: # Captcha-chineseImg 点击反向中文文字
space = 200 // 8
x = input('输入倒立文字序号(base1),多个则用逗号分隔\n')
if ',' in x or ',' in x:
list_err = x.replace(',', ',').split(',')
mv_x = signimg.location['x'] + space * int(list_err[0]) - 1 / space
x_total = mv_x
mv_y = signimg.location['y'] + 22
y_total = mv_y
ActionChains(self.browser).move_by_offset(mv_x, mv_y).click().perform()
for i, each in enumerate(list_err):
if i == 0:
continue
mv_x = space * (int(list_err[i]) - int(list_err[0]))
x_total += mv_x
ActionChains(self.browser).move_by_offset(mv_x, 0).click().perform()
else:
mv_x = signimg.location['x'] + space * int(x) - 1 / space
mv_y = signimg.location['y'] + 22
x_total = mv_x
y_total = mv_y
ActionChains(self.browser).move_by_offset(mv_x, mv_y).click().perform()
ActionChains(self.browser).move_by_offset(x_total, y_total).perform()
except:
print('未能成功加载验证码.')
def _input(self):
usr = input("username: ")
pwd = input("password: ")
u = self.browser.find_element_by_xpath('//input[@name="username"]')
u.send_keys(Keys.CONTROL, "a") # clear input form
u.send_keys(Keys.DELETE)
u.send_keys(usr)
p = self.browser.find_element_by_xpath('//input[@name="password"]')
p.send_keys(Keys.CONTROL, "a") # clear input form
p.send_keys(Keys.DELETE)
p.send_keys(pwd)
def sig_cv(self):
pass
def _click_login_button(self):
self.browser.find_element_by_xpath('//button[contains(@class,"SignFlow-submitButton")]').click()
def _err_catch(self):
def _try_find(porp, cname):
time.sleep(1)
try:
xpath_str = '//{}[contains(@class,"{}")]'.format(porp, cname)
err = self.browser.find_elements_by_xpath(xpath_str)
for each in err:
if len(each.text) > 0:
return each.text
return ''
except:
return ''
input_err = _try_find('div', 'SignFlowInput-errorMask') # 账号密码错误
sign_err_e = _try_find('div', 'Captcha-errorMessage') # 英文验证码错误-请提交正确的验证码 :(
sign_err_c = _try_find('span', 'Captcha-error') # 中文验证码错误-请提交正确的验证码 :(
errs = [input_err, sign_err_e, sign_err_c]
index = argmax([len(each) for each in errs])
return errs[int(index)]
def capture_sign(self, x, y, sign_type, path='capture.png'):
'''
验证码处理
:param x: sign loc_x
:param y: sign loc_y
:param sign_type: english(input form) or chinese(click inverse words)
:param path: capture path
:return: None
'''
img = Image.open(path)
if sign_type == 'Captcha-englishImg':
box = (x, y, x + 75, y + 30) # 左、上、右、下
else: # Captcha-chineseImg
box = (x, y, x + 200, y + 44)
region = img.crop(box)
region.save('sign.png')
Image.open('sign.png').show()
a = Zhihu()
网友评论