美文网首页
download pics with multithreads

download pics with multithreads

作者: satyrs_sh | 来源:发表于2017-12-05 12:05 被阅读0次

    python3.4.2
    最简单

    show me the code

    • download.py
    import json
    import logging
    import os
    from pathlib import Path
    from urllib.request import urlopen, Request
    
    logger = logging.getLogger(__name__)
    
    def get_links(client_id):
       headers = {'Authorization': 'Client-ID {}'.format(client_id)}
       req = Request('https://api.imgur.com/3/gallery/', headers=headers, method='GET')
       with urlopen(req) as resp:
           data = json.loads(resp.readall().decode('utf-8'))
       return map(lambda item: item['link'], data['data'])
    
    def download_link(directory, link):
       logger.info('Downloading %s', link)
       download_path = directory / os.path.basename(link)
       with urlopen(link) as image, download_path.open('wb') as f:
           f.write(image.readall())
    
    def setup_download_dir():
       download_dir = Path('images')
       if not download_dir.exists():
           download_dir.mkdir()
       return download_dir
    
    • single.py
    from download import setup_download_dir, get_links, download_link
    
    #logging部分
    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logging.getLogger('requests').setLevel(logging.CRITICAL)
    logger = logging.getLogger(__name__)
    
       ts = time()
       client_id = os.getenv('IMGUR_CLIENT_ID')
       if not client_id:
           raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
       download_dir = setup_download_dir()
       links = [l for l in get_links(client_id) if l.endswith('.jpg')]
       for link in links:
           download_link(download_dir, link)
       print('Took {}s'.format(time() - ts))
    
    • using threads
    from queue import Queue
    from threading import Thread
    
    #每次迭代从queue中获取url,取的时候queue不为空,为空则block。完成了download_link,workerthread通知queue完成。如果worker没完成,queue.join()阻塞main。
    class DownloadWorker(Thread):
       def __init__(self, queue):
           Thread.__init__(self)
           self.queue = queue
    
       def run(self):
           while True:
         #不停地从queue中获得url- 进行下载- 通知完成
               directory, link = self.queue.get()
               download_link(directory, link)
               self.queue.task_done()
    
    
    
    def main():
       ts = time()
       client_id = os.getenv('IMGUR_CLIENT_ID')
       if not client_id:
           raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
       download_dir = setup_download_dir()
       links = [l for l in get_links(client_id) if l.endswith('.jpg')]
       # 和所有workerthread交流的queue
       queue = Queue()
       # 8个worker 共享一个queue,从中取任务,queue为thread-safe
       for x in range(8):
           worker = DownloadWorker(queue)
           #  即使所有worker被阻塞的时候,daemon为true的话main就结束
           worker.daemon = True
           worker.start() #开始工作
       # 任务放到queue里
       for link in links:
           logger.info('Queueing {}'.format(link))
           queue.put((download_dir, link))
       queue.join()
    #main等queue为空
       print('Took {}'.format(time() - ts))
    

    8核,快了4倍

    相关文章

      网友评论

          本文标题:download pics with multithreads

          本文链接:https://www.haomeiwen.com/subject/wiktixtx.html