processing threading downloading

作者: 狼无雨雪 | 来源:发表于2019-07-05 12:56 被阅读0次

processing threading downloading
processing threading downloading
processing threading downloading
processing threading fetch big h
threading.Condition and threadin
python 3 的多线程实现
功能强大的python包（十一）：threading （多线程）
Linux安装中文字体2018-10-29
SDWebImage support WebP
How to install xgboost in ubuntu

多进程多线程爬取图片

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import os
from bs4 import BeautifulSoup
import random
import threading
import multiprocessing
import urllib
import warnings
from multiprocessing import Lock
warnings.filterwarnings("ignore")
# os.environ["PATH"] += os.pathsep + 'D:\google-art-downloader-master'

# chrome_options = Options()
# chrome_options.add_argument("--disable-gpu")
# chrome_options.add_argument("--headless")

images_all = set()
# browser = webdriver.Chrome(chrome_options = chrome_options)
# browser = webdriver.Chrome()
# browser = webdriver.PhantomJS(executable_path="phantomjs.exe")

origial_urls = []
original_urls_temp = []

Threads_number = 40
Processes_number = 6
num_cpu=multiprocessing.cpu_count()
path = "huaban_big_images"
if not os.path.exists(path):
    os.makedirs(path)

# print("numbers of Threads: ",Threads_number)
# print("numbers of Processes: ",Processes_number)
# print("numbers of cpu: ",num_cpu)

with open("huaban_images_all.txt",'r',encoding="utf8") as read_file:
        lines = read_file.readlines()
        for index, line in enumerate(lines):
            url = line.strip()
            original_urls_temp.append(url)
            if (index + 1) % Threads_number == 0 or (index + 1) == len(lines):
                origial_urls.append(original_urls_temp)
                original_urls_temp = []
# origial_urls


# def start_thread():
#     print("fuck")
def get_image_url(index,index2, url, epoch, batch, index3):
    try:
        print("index: %d, epoch: %d, batch: %d, index3: %d, index2: %d, line: %s"%(index, epoch, batch, index3, index2, "start" + url))
        
        try:
            lock = Lock()
            lock.acquire()
            time.sleep(random.randint(1,5))
            filename = path +"/" + str("index%depoch%dbatch%dindex3%dindex2%d"%(index, epoch, batch, index3, index2)) + str(url.split("/")[-1]) + ".png"
            print("now is loading %s"%url)
            urllib.request.urlretrieve(url, filename = filename)
        except Exception as e:
            print("failt to fetch : %s"%url)
            print(e)
        finally:
            lock.release()
            
        print("index: %d, epoch: %d, batch: %d, index3: %d, index2: %d, line: %s"%(index, epoch, batch, index3, index2, "end" + url))
        
    except Exception as e:
        pass
    finally:
        pass
        
def running_processing(urls, index, epoch, batch, index3):
    threads = []
    print("start process %d number %d"%(batch, index3))
    for index2, url in enumerate(urls) :
        t = threading.Thread(target= get_image_url, args=(index, index2, url, epoch, batch, index3))
        threads.append(t)
    for index_i, thread in enumerate(threads):
#         thread.setDaemon(True)
        thread.start()
    for index_j, thread in enumerate(threads):
        thread.join()
    

    

if __name__ == '__main__': 
    epoch = 0
    batch = 0
    len_original_urls = len(origial_urls)
    temp_urls_set = []
    for index, urls in enumerate(origial_urls):
        temp_urls_set.append(urls)
        epoch += 1
        if (index + 1) % Processes_number == 0 or (index + 1) == len_original_urls:
            batch += 1
            multiThreads = []
            for index3, urls in enumerate(temp_urls_set):
                mt = multiprocessing.Process(target=running_processing,args=(urls, index, epoch, batch, index3))
                mt.start()
                multiThreads.append(mt)

            for index_j, mthread in enumerate(multiThreads):
                mthread.join()
            temp_urls_set = []

    print('program end:%s' %time.ctime())

网友评论

工作生活

本文标题：processing threading downloading

本文链接：https://www.haomeiwen.com/subject/tttdhctx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

processing threading downloading

多进程多线程爬取图片

相关文章

processing threading downloading

processing threading downloading

processing threading downloading

processing threading fetch big h

threading.Condition and threadin

python 3 的多线程实现

功能强大的python包（十一）：threading （多线程）

Linux安装中文字体2018-10-29

SDWebImage support WebP

How to install xgboost in ubuntu

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

工作生活