批量下载系统中多个文件
import aiohttp
import asyncio
import urllib.request
import http.cookiejar
# 1. 登录系统,获取Cookie值
cookiejar = http.cookiejar.CookieJar() #构建一个CookieJar对象实例来保存cookie 【CookieJar是对于Cookie类的一个类似管理类的封装】
handler = urllib.request.HTTPCookieProcessor(cookiejar) #使用HTTPCookieProcessor()来创建cookie处理器对象,参数为CookieJar()对象
opener = urllib.request.build_opener(handler) #通过build_opener()来构建opener
# headers、请求登录接口,传递地址和参数
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
url_login = 'http://xxx.xxx.xx.xx/xxxx/login'
FormData = {'username':'xxxxx', 'password':'xxxxx'} #<class 'dict'>
postdata = urllib.parse.urlencode(FormData).encode() #<class 'bytes'>
request = urllib.request.Request(url_login, postdata)
response = opener.open(request) #访问系统地址,访问之后会自动保存cookie到cookiejar中
for item in cookiejar:
Cookie = '%s=%s' % (item.name, item.value)
headers['Cookie'] = Cookie # 向headers中追加Cookie,没有Cookie值,系统会认为用户尚未登录
#文件地址
URL = ['http://xxx.xxx.xx.xx/xxxx/downloadTemplate?templateName=userTemplate.xlsx',
'http://xxx.xxx.xx.xx/xxxx/downloadTemplate?templateName=ProductTemplate.xlsx',
'http://xxx.xxx.xx.xx/xxxx/downloadTemplate?templateName=ProductGroup.xlsx',
'http://xxx.xxx.xx.xx/xxxx/downloadTemplate?templateName=MaterialTemplate.xlsx',
'http://xxx.xxx.xx.xx/xxxx/downloadTemplate?templateName=exceptionTemplate.xlsx',
'http://xxx.xxx.xx.xx/xxxx/downloadTemptale?templateName=saleOrderTemplate.xlsx',
'']
count = len(URL)
async def job(session, url, headers=headers):
# 声明为异步函数
name = url.split('=')[-1]
# 获得名字
file = await session.get(url, headers=headers)
# 触发到await就切换,等待get到数据
filecode = await file.read()
# 读取内容
with open('D:/xx/TemplateFiles/Excels/'+ str(name), 'wb') as f:
# 写入到指定目录下的文件中
f.write(filecode)
return str(url)
async def main(loop, URL):
async with aiohttp.ClientSession() as session:
# 建立会话session
tasks = [loop.create_task(job(session, URL[_])) for _ in range(count)]
# 建立所有任务
finished, unfinished = await asyncio.wait(tasks)
# 触发await,等待任务完成
all_results = [r.result() for r in finished]
# 获取所有结果
print("请求下载数", count, "已下载数: ", len(all_results))
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop, URL))
loop.close()
网友评论