python3.4.2
最简单
show me the code
- download.py
import json
import logging
import os
from pathlib import Path
from urllib.request import urlopen, Request
logger = logging.getLogger(__name__)
def get_links(client_id):
headers = {'Authorization': 'Client-ID {}'.format(client_id)}
req = Request('https://api.imgur.com/3/gallery/', headers=headers, method='GET')
with urlopen(req) as resp:
data = json.loads(resp.readall().decode('utf-8'))
return map(lambda item: item['link'], data['data'])
def download_link(directory, link):
logger.info('Downloading %s', link)
download_path = directory / os.path.basename(link)
with urlopen(link) as image, download_path.open('wb') as f:
f.write(image.readall())
def setup_download_dir():
download_dir = Path('images')
if not download_dir.exists():
download_dir.mkdir()
return download_dir
- single.py
from download import setup_download_dir, get_links, download_link
#logging部分
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.getLogger('requests').setLevel(logging.CRITICAL)
logger = logging.getLogger(__name__)
ts = time()
client_id = os.getenv('IMGUR_CLIENT_ID')
if not client_id:
raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
download_dir = setup_download_dir()
links = [l for l in get_links(client_id) if l.endswith('.jpg')]
for link in links:
download_link(download_dir, link)
print('Took {}s'.format(time() - ts))
- using threads
from queue import Queue
from threading import Thread
#每次迭代从queue中获取url,取的时候queue不为空,为空则block。完成了download_link,workerthread通知queue完成。如果worker没完成,queue.join()阻塞main。
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
#不停地从queue中获得url- 进行下载- 通知完成
directory, link = self.queue.get()
download_link(directory, link)
self.queue.task_done()
def main():
ts = time()
client_id = os.getenv('IMGUR_CLIENT_ID')
if not client_id:
raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
download_dir = setup_download_dir()
links = [l for l in get_links(client_id) if l.endswith('.jpg')]
# 和所有workerthread交流的queue
queue = Queue()
# 8个worker 共享一个queue,从中取任务,queue为thread-safe
for x in range(8):
worker = DownloadWorker(queue)
# 即使所有worker被阻塞的时候,daemon为true的话main就结束
worker.daemon = True
worker.start() #开始工作
# 任务放到queue里
for link in links:
logger.info('Queueing {}'.format(link))
queue.put((download_dir, link))
queue.join()
#main等queue为空
print('Took {}'.format(time() - ts))
8核,快了4倍
网友评论