import time
import requests
import concurrent
from concurrent import futures
import threading
from multiprocessing import Pool
import pandas as pd
# 定义装饰器
def get_time(fun):
def wrapper(*args,**kwargs):
print('*'*50)
print(fun.__name__, 'start.........')
# 开始时间
start_time = time.time()
# 运行函数
fun(*args,**kwargs)
# 结束时间
end_time = time.time()
spend_time = end_time - start_time
print(fun.__name__, '函数结束了,花费了%ss的时间'% spend_time)
print('*'*50)
return wrapper
# 定义装饰器
# 从文件中取出地址
def get_urls_from_file(n):
df = pd.read_csv('TestUrls.csv')
urls = list(df['url'][:n])
return urls
# 从文件中取出地址
# 请求数据并分析数据
def get_data(url, retries = 3):
headers_value={'User-Agent':"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}
try:
html = requests.get(url, headers = headers_value)
print('访问的网址:', url, ':访问的代码:', html.status_code)
except requests.exceptions.ConnectionError as e:
print('下载错误:',e)
html = None
if ((html != None) and (500<= html.status_code <=600) and retries):
print('服务器错误!,正在重试!')
# time.sleep(1)
retries -=1
get_data(url, retries)
data = html.text
else:
data = html.text
return data
# 请求数据并分析数据
# 串行
@get_time
def Mynormal(urls):
for url in urls:
get_data(url)
# 串行
#进程池
def MyprocessPool(num=10):
pool = Pool(num)
results = pool.map(get_data, urls)
pool.close()
pool.join()
return results
#进程池
#多线程
@get_time
def My_multi_thread(max_thread=10):
def url_process():
while True:
try:
url = urls.pop()
except IndexError :
break
data = get_data(url, retries=3)
threads =[]
while int(len(threads)< max_thread) and len(urls):
thread = threading.Thread(target=url_process())
thread.start()
threads.append(thread)
for thread in threads:
thread.join()
#多线程
#线程池
@get_time
def Myfuter(num_of_max=10):
with concurrent.futures.ThreadPoolExecutor(max_workers=num_of_max) as executor:
executor.map(get_data,urls)
#线程池
# 并行函数运用
if __name__ == '__main__':
# 数量越多,则越能体现并行优势
urls = get_urls_from_file(10)
Mynormal(urls)
MyprocessPool(10)#进程池
Myfuter(10)#线程池
My_multi_thread(10)#多线程
# 并行函数运用
运行结果:
image.png
这里还使用了装饰器做了一个查看函数运行时间的问题,具体看代码。
网友评论