线程池

作者: forjie | 来源:发表于2019-06-04 18:06 被阅读0次

    线程池 concurrent.futures 使用心得

    from concurrent.futures import ThreadPoolExecutor, as_completed
    
    # 下载图片
    def save_image(self,
                       img_url,
                       meida_path,
                       ):
            whole_path, file_path = get_img_path(meida_path,
                                                      img_url,
                                                      )
    
            if whole_path and file_path:
                self.download_img(img_url, whole_path)
            return file_path, whole_path
    
    # 对拿到的url 进行分解,创建本地下载的路径
    def get_img_path(self,
                         media_path,
                         img_url,
                         ):
            """
            获取要存取图片的路径,用图片自己的路径
            :param meida_dir:  项目中media的路径
            :param img_url:    图片的url
                   eg: https://***/doctor/20190315/4/ca47fcbe8231ac60b152b51ff7183113_100_100.png
            :return:
                whole_path: 本地完整路径
                file_path:  /doctor/20190315/4/ca47fcbe8231ac60b152b51ff7183113_100_100.png
            """
            parse_path = urlparse(img_url).path
            h, file_name = os.path.split(parse_path)
            basename = os.path.join(base_dir, media_path, h[1:])
            whole_path = os.path.join(base_dir, media_path, h[1:], file_name)
            # 如果之前就有,那就不下载   PS:有可能有,但是没有存进数据库
            if os.path.exists(whole_path):
                # todo 在之前就判断url
                return None, None
            try:
                os.makedirs(basename)
                return whole_path, parse_path
            except FileExistsError:
                return whole_path, parse_path
            except Exception as e:
                print(e)
    
    # 拿到路径进行本地下载
    def download_img(self,
                         img_url,
                         whole_path):
            """
            下载图片
            """
            if os.path.exists(whole_path):
                return
            res = requests.get(img_url)
            try:
                with open(whole_path, 'wb') as f:
                    for chunk in res.iter_content(chunk_size=512):
                        f.write(chunk)
            except Exception as e:
                print('download_exception:%s' % img_url)
    
    # 利用线程池进行多线程下载
    def no_name_insert_media(self,
                                 url_lst,
                                 sql_colums,
                                 ):
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                all_task = [executor.submit(save_image, url, media_path) for url in url_lst]
                col_data = []
                for future in as_completed(all_task):
                    file_path, whole_path = future.result()
                    if not file_path and not whole_path: continue
                    uid, md5sum = self.md5_uuid(whole_path)
                    col_data.append([file_path, uid, md5sum])
    

    PS:
    1,在创建线程池的时间尽量用

    with ThreadPoolExecutor(max_workers=max_workers) as executor
    

    2,带参数的函数,参数放在submit,函数名称的后面,多个一直放在后面就行

    executor.submit(save_image, url, media_path)
    

    这里的url和media_path都是参数
    3,有些时候你用的多线程,也感觉效率也没有提升,那就是你写的方式有问题

    all_task = [executor.submit(save_image, url, media_path) for url in url_lst]
                col_data = []
                for future in as_completed(all_task):
    

    上面这样写,是利用了线程池的
    下面这么写,就没有,好像是因为当用result()直接获取值的时候是会阻塞的.

      for item in url_lst:
              img_url = item.get('img_url') if item.get('img_url') else item.get('img')
              if not img_url or check_url(img_url): continue
              file_path, whole_path = executor.submit(self.save_image, img_url, media_path).result()
    

    相关文章

      网友评论

          本文标题:线程池

          本文链接:https://www.haomeiwen.com/subject/casrxctx.html