美文网首页工作生活
downloading images urls from wik

downloading images urls from wik

作者: 狼无雨雪 | 来源:发表于2019-07-05 12:56 被阅读0次
"""
really used in fetching url from wikiart
"""
from selenium import webdriver
import time
import os
import re
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
# os.environ["PATH"] += os.pathsep + 'D:\google-art-downloader-master'
chrome_options = Options()
# chrome_options.add_argument('--headless')
browser = webdriver.Chrome(chrome_options = chrome_options)

asserts_all=set()

mark_time = 0
last_value = 0

try:
    browser.get('https://www.wikiart.org/en/paintings-by-style/ink-and-wash-painting?select=featured#!#filterName:featured,viewType:masonry')
    while mark_time <= 5:
        pageSource = browser.page_source
        soup = BeautifulSoup(pageSource,'lxml')
        asserts = soup.find_all('img')
        for assert_value in asserts:
            if assert_value.get("src") != None and assert_value.get("src") != "":
                asserts_all.add(str(assert_value.get("src")).replace("!Large.jpg","").replace("!PinterestSmall.jpg",""))
#                 print(str(assert_value.get("src")).replace("!Large.jpg","").replace("!PinterestSmall.jpg",""))
        #     for assert_value in asserts:
        now_value = len(asserts_all)
        print(now_value)
        if last_value == now_value:
            mark_time += 1
        else:
            mark_time == 0
        try:
            browser.find_element_by_xpath('/html/body/div[2]/div[1]/section/main/div[4]/div/div/div[3]/a/span[3]').click()
        except Exception as e:
            print(e)
        last_value = now_value
        time.sleep(4)
    google_arts_images_urls = set()
    with open("wikiart_ink_and_wash_images_urls.txt",'w',encoding="utf8") as write_file:
        for line in asserts_all:
            write_file.write(line+"\n")
except Exception as e:
    print("global",e)
finally:
    browser.close()

相关文章

网友评论

    本文标题:downloading images urls from wik

    本文链接:https://www.haomeiwen.com/subject/entdhctx.html