最近看到httpx这个库, 比requests更方便异步, 就想着用现有业务代码试试手(现在用的是threading)
任务:
同时请求一汽大众, 盖世汽车, 凯励程, 汽车之家, 易捷加油登录API, 确定我的手机号是否注册过这些网站, 贴出大众的代码, 剩下的截图, 意思差不多.
import httpx
import asyncio
class DazhongCrawl():
"""一汽大众"""
def __init__(self, phone,site):
self.phone=phone
self.site=site
async def crawl(self,client):
try:#如果该请求报错, 直接返回false,
headers = {
'Host': 'mall.faw-vw.com',
'Referer': 'https://mall.faw-vw.com/views/personal-center/login/index.html?u=https%3A%2F%2Fmall.faw-vw.com%2Fviews%2Fpersonal-center%2Flogin%2Findex.html',
}
data = f'account={self.phone}&password=kkk123456&code='
response = await client.post('https://mall.faw-vw.com/User/Login/login', headers=headers,data=data)
# {'code': 405, 'lockout': False, 'msg': '用户账户不存在'}
# {'code': 405, 'lockout': False, 'msg': '密码错误'}
rj=response.json()
result=rj['msg']
if '密码错误' in result:
return {'phone':self.phone,'site':self.site,'result':True}
else:
return {'phone':self.phone,'site':self.site,'result':False}
except: # 暂时没想好异步并发如何处理异常, 这里取巧直接pass得了
return {'phone': self.phone, 'site': self.site, 'result': False}




新建一个.py文件
import asyncio
import httpx
from spiders_httpx.car.dazhong_crawl import DazhongCrawl
from spiders_httpx.car.gaishiqiche_crawl import GaishiqicheCrawl
from spiders_httpx.car.kailicheng_crawl import KailichengCrawl
from spiders_httpx.car.qichezhijia_crawl import QichezhijiaCrawl
from spiders_httpx.car.yijie_crawl import YijieCrawl
def get_sites(phone,):
'''
获取所有支持的平台
'''
sites=[]
sites.append(DazhongCrawl(phone=phone, site="一汽大众"))
sites.append(GaishiqicheCrawl(phone=phone, site="盖世汽车"))
sites.append(KailichengCrawl(phone=phone, site="凯励程"))
sites.append(QichezhijiaCrawl(phone=phone, site="汽车之家"))
sites.append(YijieCrawl(phone=phone, site="易捷加油"))
return sites
async def run(phone):
sites=get_sites(phone,)
async with httpx.AsyncClient() as client:
task_list = []#任务列表
for site in sites:
req = site.crawl(client)
task = asyncio.create_task(req)
task_list.append(task)
result = await asyncio.gather(*task_list)
print(result)
return result
if __name__ == '__main__':
s_t=time.time()
asyncio.run(run('153xxxx1316'))
e_t=time.time()
print('httpx异步耗时: ',e_t-s_t )
# 结果, 除了大众, 剩下的我都注册了
[{'phone': '153xxxx1316', 'site': '一汽大众', 'result': False}, {'phone': '153xxxx1316', 'site': '盖世汽车', 'result': True}, {'phone': '153xxxx1316', 'site': '凯励程', 'result': True}, {'phone': '153xxxx1316', 'site': '汽车之家', 'result': True}, {'phone': '153xxxx1316', 'site': '易捷加油', 'result': True}]
httpx异步耗时: 0.512589693069458
我测试了for循环的requests结果, 当然请求量只有5个, 对比不是很明显(等我把这些平台(将近200个)都改完再来测试):
if __name__ == '__main__':
task = crawl('153xxxx1316')
l_result=[]
s_t=time.time()
for task_ in task:
result = task_.crawl()
l_result.append(result)
e_t=time.time()
print('requests for循环耗时: ',e_t-s_t )
#结果:
requests for循环耗时: 1.3432562351226807
网友评论