美文网首页Python
Python并发:线程、进程、协程的抓取速度

Python并发:线程、进程、协程的抓取速度

作者: SeanCheney | 来源:发表于2020-10-26 16:09 被阅读0次

    用一段代码测试线程、进程、协程的抓取速度:

    # -*- coding: utf-8 -*-
    
    import time
    import asyncio
    import aiohttp
    import requests
    import threading
    import multiprocessing
    from multiprocessing import Process
    from multiprocessing.dummy import Pool as ThreadPool
    
    
    OPTION = {
        "COROUTINE": 0,
        "SINGLE_THREAD": 0,
        "MULTI_THREADS": 0,
        "MULTI_THREADS_COROUTINE": 0,
        "MULTI_PROCESSES": 0,
        "MUTL_PROCESSES_COROUTINE": 0
        }
    
    urls = []
    
    def getsource(url):
        _ = requests.get(url)
        return
    
    async def agetsource(url):
        async with aiohttp.request("GET", url) as response:
            await response.text()
        return
    
    def singleThread():
        for url in urls:
            getsource(url)
    
    def multithreads():
        pool = ThreadPool(4)
        _ = pool.map(getsource, urls)
        pool.close()
        pool.join()
    
    def multiprocesses():
        pool = multiprocessing.Pool(processes=4)
        for url in urls:
            pool.apply_async(getsource, (url,))
        pool.close()
        pool.join()
    
    async def amain(index, pool_size):
        loop = asyncio.get_event_loop()
        start_index = index * int(len(urls) / pool_size)
        end_index = min(len(urls), start_index + int(len(urls) / pool_size))
    
        for url in urls[start_index:end_index]:
            _ = loop.create_task(agetsource(url))
        while (len(asyncio.all_tasks(loop)) > 1):
            await asyncio.sleep(2)
    
    def main(index, pool_size):
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        # loop = asyncio.get_event_loop()
        loop.run_until_complete(amain(index, pool_size))
    
    def mutithreads_coroutine():
        threads = []
        for index in range(4):
            threads.append(threading.Thread(target=main, args=(index, 4,)))
        for index in range(4):
            threads[index].start()
        for index in range(4):
            threads[index].join()
    
    def multiprocesses_coroutine():
        processes = []
        for index in range(4):
            processes.append(Process(target=main, args=(index, 4,)))
    
        for index in range(4):
            processes[index].start()
    
        for index in range(4):
            processes[index].join()
    
    if __name__ == "__main__":
    
        for option in OPTION:
            OPTION[option] = 1
            factor = 1
            start_time = time.time()
    
            urls.clear()
            for _ in range(50):
                newpage = 'http://www.baidu.com/'
                urls.append(newpage)
    
            # 单线程
            if OPTION["SINGLE_THREAD"]:
                singleThread()
            # 多线程
            if OPTION["MULTI_THREADS"]:
                multithreads()
            # 多进程
            if OPTION["MULTI_PROCESSES"]:
                multiprocesses()
            # 单线程+协程
            if OPTION["COROUTINE"]:
                main(0, 1)
            # 多线程 + 协程
            if OPTION["MULTI_THREADS_COROUTINE"]:
                mutithreads_coroutine()
            # 多进程 + 协程
            if OPTION["MUTL_PROCESSES_COROUTINE"]:
                multiprocesses_coroutine()
            
            end_time = time.time()
            print(f"Time consuming for option <{list(filter(lambda x : OPTION[x], OPTION))[0]}> = {factor * (end_time - start_time)}")
            OPTION[option] = 0
    

    我的15款MacBook Pro跑出来的结果,办公室的网一般:

    Time consuming for option <COROUTINE> = 8.015891075134277
    Time consuming for option <SINGLE_THREAD> = 35.00409913063049
    Time consuming for option <MULTI_THREADS> = 10.310127973556519
    Time consuming for option <MULTI_THREADS_COROUTINE> = 8.017142057418823
    Time consuming for option <MULTI_PROCESSES> = 9.180757999420166
    Time consuming for option <MUTL_PROCESSES_COROUTINE> = 8.016705989837646
    

    相关文章

      网友评论

        本文标题:Python并发:线程、进程、协程的抓取速度

        本文链接:https://www.haomeiwen.com/subject/eerrpktx.html