线程池 concurrent.futures 使用心得
from concurrent.futures import ThreadPoolExecutor, as_completed
# 下载图片
def save_image(self,
img_url,
meida_path,
):
whole_path, file_path = get_img_path(meida_path,
img_url,
)
if whole_path and file_path:
self.download_img(img_url, whole_path)
return file_path, whole_path
# 对拿到的url 进行分解,创建本地下载的路径
def get_img_path(self,
media_path,
img_url,
):
"""
获取要存取图片的路径,用图片自己的路径
:param meida_dir: 项目中media的路径
:param img_url: 图片的url
eg: https://***/doctor/20190315/4/ca47fcbe8231ac60b152b51ff7183113_100_100.png
:return:
whole_path: 本地完整路径
file_path: /doctor/20190315/4/ca47fcbe8231ac60b152b51ff7183113_100_100.png
"""
parse_path = urlparse(img_url).path
h, file_name = os.path.split(parse_path)
basename = os.path.join(base_dir, media_path, h[1:])
whole_path = os.path.join(base_dir, media_path, h[1:], file_name)
# 如果之前就有,那就不下载 PS:有可能有,但是没有存进数据库
if os.path.exists(whole_path):
# todo 在之前就判断url
return None, None
try:
os.makedirs(basename)
return whole_path, parse_path
except FileExistsError:
return whole_path, parse_path
except Exception as e:
print(e)
# 拿到路径进行本地下载
def download_img(self,
img_url,
whole_path):
"""
下载图片
"""
if os.path.exists(whole_path):
return
res = requests.get(img_url)
try:
with open(whole_path, 'wb') as f:
for chunk in res.iter_content(chunk_size=512):
f.write(chunk)
except Exception as e:
print('download_exception:%s' % img_url)
# 利用线程池进行多线程下载
def no_name_insert_media(self,
url_lst,
sql_colums,
):
with ThreadPoolExecutor(max_workers=max_workers) as executor:
all_task = [executor.submit(save_image, url, media_path) for url in url_lst]
col_data = []
for future in as_completed(all_task):
file_path, whole_path = future.result()
if not file_path and not whole_path: continue
uid, md5sum = self.md5_uuid(whole_path)
col_data.append([file_path, uid, md5sum])
PS:
1,在创建线程池的时间尽量用
with ThreadPoolExecutor(max_workers=max_workers) as executor
2,带参数的函数,参数放在submit,函数名称的后面,多个一直放在后面就行
executor.submit(save_image, url, media_path)
这里的url和media_path都是参数
3,有些时候你用的多线程,也感觉效率也没有提升,那就是你写的方式有问题
all_task = [executor.submit(save_image, url, media_path) for url in url_lst]
col_data = []
for future in as_completed(all_task):
上面这样写,是利用了线程池的
下面这么写,就没有,好像是因为当用result()直接获取值的时候是会阻塞的.
for item in url_lst:
img_url = item.get('img_url') if item.get('img_url') else item.get('img')
if not img_url or check_url(img_url): continue
file_path, whole_path = executor.submit(self.save_image, img_url, media_path).result()
网友评论