美文网首页工作生活
fetch huaban big image urls

fetch huaban big image urls

作者: 狼无雨雪 | 来源:发表于2019-07-05 12:54 被阅读0次
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import os
from bs4 import BeautifulSoup
# os.environ["PATH"] += os.pathsep + 'D:\google-art-downloader-master'

chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--headless")

images_all = set()
browser = webdriver.Chrome(chrome_options = chrome_options)
# browser = webdriver.PhantomJS()
try:
    with open("huaban_pin_asserts_all.txt",'r',encoding="utf8") as read_file:
        for index, line in enumerate(read_file.readlines()):
            url = "http://huaban.com" + line.strip()
            browser.get(url,)
            browser.set_page_load_timeout(10000)
            browser.set_script_timeout(10000)#这两种设置都进行才有效
            time.sleep(1)
            print(index, url)

            try:
                img1 = browser.find_element_by_xpath('//*[@id="baidu_image_holder"]/a/img')
                if img1 != None:
                    images_all.add(img1.get_attribute('src'))
            except Exception as e:
                pass


            try:
                img2 = browser.find_element_by_xpath('//*[@id="baidu_image_holder"]/img')
                if img2 != None:
                    images_all.add(img2.get_attribute('src'))
            except Exception as e:
                pass
            time.sleep(1)
    with open("huaban_images_all.txt",'w',encoding="utf8") as write_file:
        for line in images_all:
            write_file.write(str(line) + "\n")
except Exception as e:
    browser.close()

相关文章

网友评论

    本文标题:fetch huaban big image urls

    本文链接:https://www.haomeiwen.com/subject/pbzrhctx.html