from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import os
from bs4 import BeautifulSoup
# os.environ["PATH"] += os.pathsep + 'D:\google-art-downloader-master'
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--headless")
images_all = set()
browser = webdriver.Chrome(chrome_options = chrome_options)
# browser = webdriver.PhantomJS()
try:
with open("huaban_pin_asserts_all.txt",'r',encoding="utf8") as read_file:
for index, line in enumerate(read_file.readlines()):
url = "http://huaban.com" + line.strip()
browser.get(url,)
browser.set_page_load_timeout(10000)
browser.set_script_timeout(10000)#这两种设置都进行才有效
time.sleep(1)
print(index, url)
try:
img1 = browser.find_element_by_xpath('//*[@id="baidu_image_holder"]/a/img')
if img1 != None:
images_all.add(img1.get_attribute('src'))
except Exception as e:
pass
try:
img2 = browser.find_element_by_xpath('//*[@id="baidu_image_holder"]/img')
if img2 != None:
images_all.add(img2.get_attribute('src'))
except Exception as e:
pass
time.sleep(1)
with open("huaban_images_all.txt",'w',encoding="utf8") as write_file:
for line in images_all:
write_file.write(str(line) + "\n")
except Exception as e:
browser.close()
网友评论