该类验证码则以12306最为典型,TouClick(https://www.touclick.com/)提供了该类验证码,下面的例子就以该网站为准
该类验证码的破解还是基于selenium/webdriver
破解的思路就是将验证码图片提交给平台(崔庆才的书中推荐超级鹰/https://www.chaojiying.com/price.html),平台会返回识别结果在图片中的坐标位置,然后再解析坐标模拟登陆
第一步:先注册超级鹰账号并申请软件ID(https://www.chaojiying.com/user/reg/),再充值一些题分
第二步:下载对应的python API,链接为https://www.chaojiying.com/api-14.html,对其进行修改,修改后的代码如下:
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
self.password = md5(password.encoding('utf-8')).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
"""
im: 图片字节
codetype: 题目类型 参考 http://www.chaojiying.com/price.html
"""
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
"""
im_id:报错题目的图片ID
"""
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
if __name__ == '__main__':
chaojiying = Chaojiying_Client('超级鹰用户名', '超级鹰用户名的密码', '96001')
im = open('a.jpg', 'rb').read()
print chaojiying.PostPic(im, 1902)
在此之前需要稍微了解一下selenium 中ActionChains的使用方法
click(on_element=None) ——单击鼠标左键
click_and_hold(on_element=None) ——点击鼠标左键,不松开
context_click(on_element=None) ——点击鼠标右键
double_click(on_element=None) ——双击鼠标左键
drag_and_drop(source, target) ——拖拽到某个元素然后松开
drag_and_drop_by_offset(source, xoffset, yoffset) ——拖拽到某个坐标然后松开
key_down(value, element=None) ——按下某个键盘上的键
key_up(value, element=None) ——松开某个键
move_by_offset(xoffset, yoffset) ——鼠标从当前位置移动到某个坐标
move_to_element(to_element) ——鼠标移动到某个元素
move_to_element_with_offset(to_element, xoffset, yoffset) ——移动到距某个元素(左上角坐标)多少距离的位置
perform() ——执行链中的所有动作
release(on_element=None) ——在某个元素位置松开鼠标左键
send_keys(*keys_to_send) ——发送某个键到当前焦点的元素
send_keys_to_element(element, *keys_to_send) ——发送某个键到指定元素
下面为代码的实现和注释(声明:以后的爬虫代码均来自崔庆才的爬虫书籍的自我总结,并非原创)
import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from chaojiying import Chaojiying
EEMAIL = 'xxxxx'
PASSWORD = 'xxxx'
CHAOJIYING_USERNAME = 'xxxx'
CHAOJIYING_PASSWORD = 'xxxxx'
CHAOJIYING_SOFT_ID = 896781#自己申请的ID
CHAOJIYING_KIND = 9102#要验证的点出验证码的类型
class cracktouclick(object):
def __init__(self):
self.url='http://admin.touclick.com/login.html'
self.browser=webdriver.Chrome()
self.wait= WebDriverWait(self.browser,20)
self.email = EMAIL
self.password = PASSWORD
self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)
def __del__(self):
self.browser.close()
def open(self):#打开需要验证的网页
self.browser.get(self.url)
email = self.wait.until(EC.presence_of_element_located((By.ID, 'email')))#先识别输入框
password = self.wait.until(EC.presence_of_element_located((By.ID, 'password')))#先识别输入框
email.send_keys(self.email)
password.send_keys(self.password)#键入内容
def get_touclick_button(self):#获取验证码的按钮
button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'touclick-hod-wrap')))
return button
def get_touclick_element(self):#该函数的作用是获取跳出的验证图片
element=self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'touclick-pub-content')))#获取图片
return element
def get_position(self):#获取字段的位置,即验证码的位置
element=self.get_touclick_element()
time.sleep(2)
location=element.location
size=element.size
top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size['width']
return (top,bottom,left,right)
def get_screenshot(self):#获取验证码的截图
screenshot=self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_touclick_image(self,name='captcha.png'):#获取验证码图片
top,bottom,left,right=self.get_position()
print('验证码位置:',top,bottom,left,right)
screenshot=self.get_screenshot()
captcha=screenshot.crop((top,left,right,bottom))
captcha.save(name)
return captcha
"""
接下来要做的就是将超级鹰识别的文字的位置(以字符串的形式返回)进行解析,然后模拟点击
形式为'pic_str':'132,127|56,77'
"""
def get_points(self,captcha_result):#里面的参数为上述的识别结果,函数返回的是转化处理后的结果
groups=captcha_result.get('pic_str').split('|')#返回的是分割后的列表
locations=[[int(number) for number in group.split(',')] for group in groups]
return locations
def touch_click_words(self,locations):#下面就是模拟点击图片中文字的位置'
for location in locations:
print (location)
ActionChains(self.browser).move_to_element_with_offset(self.get_touclick_element(),location[0],location[1]).click().perform()
time.sleep(1)
def touch_click_verify(self):#每点击字段,就会出现一个圆圈,这个就是验证按钮
button=self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'touclick-pub-submit')))#先识别这个按钮是否出现
button.click()
def login(self):#当验证成功之后,就可以点击登陆按钮实现登陆了
submit = self.wait.until(EC.element_to_be_clickable((By.ID, '_submit')))#submit实际就是按钮的定位
submit.click()
time.sleep(10)
print('登录成功')
def crack(self):#程序开启入口
self.open()#进入登陆界面
button=self.get_touclick_button()#开始识别验证按钮
button.click()#点击该按钮
image=self.get_touclick_image()
"""
接下来就要用超级鹰Chaojiying类里的post_pic方法(参数为需要传入识别的图片对象,以及该类验证码的代号
在充值提分后,可以查询到
"""
bytes_array=BytesIO()
image.save(bytes_array,format='PNG')
result = self.chaojiying.post_pic(bytes_array.getvalue(), CHAOJIYING_KIND)
print(result)#返回的是json 类型的
locations=self.get_points(result)#获得解析后的结果
self.touch_click_words(locations)
self.touch_click_verify()
success=self.wait.until(EC.text_to_be_present_in_element((By.CLASS_NAME, 'touclick-hod-note'), '验证成功'))
print(success)
if not success:
self.crack()
else:
self.login()
if __name__=='__main__':
crack=cracktouclick()#实例化
crack.crack()
```
网友评论