很多人可能都没想过用selenium能做多线程爬虫
其实实现起来都是一样的简单
导入相关库
from selenium import webdriver
from multiprocessing.dummy import Pool
访问
def spider(url):
driver = webdriver.Chrome('chromedriver')
driver.get(url)
html = driver.page_source
print(html)
driver.close()
driver.quit()
控制
def run(x):
url = 'https://www.baidu.com/s?wd=今天有哪些美女&pn={}'
pages = []
for i in range(0, x * 10, 10):
page = url.format(i)
pages.append(page)
print(pages)
pool = Pool(5)
result = pool.map(spider, pages)
pool.close()
pool.join()
return result
试一下
if __name__ == '__main__':
run(10)
网友评论