downloading images urls from wik

作者: 狼无雨雪 | 来源:发表于2019-07-05 12:56 被阅读0次

downloading images urls from wik
downloading images from particul
downloading image urls from baid
downloading video urls from yout
scrapy downloading images
Convert FIU trace to Disksim tra
fetch huaban images pin urls and
django 2.0 重定向
node-sass ESOCKETTIMEDOUT 无法安装
TCGA数据下载(2)：Downloading Dataset

"""
really used in fetching url from wikiart
"""
from selenium import webdriver
import time
import os
import re
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
# os.environ["PATH"] += os.pathsep + 'D:\google-art-downloader-master'
chrome_options = Options()
# chrome_options.add_argument('--headless')
browser = webdriver.Chrome(chrome_options = chrome_options)

asserts_all=set()

mark_time = 0
last_value = 0

try:
    browser.get('https://www.wikiart.org/en/paintings-by-style/ink-and-wash-painting?select=featured#!#filterName:featured,viewType:masonry')
    while mark_time <= 5:
        pageSource = browser.page_source
        soup = BeautifulSoup(pageSource,'lxml')
        asserts = soup.find_all('img')
        for assert_value in asserts:
            if assert_value.get("src") != None and assert_value.get("src") != "":
                asserts_all.add(str(assert_value.get("src")).replace("!Large.jpg","").replace("!PinterestSmall.jpg",""))
#                 print(str(assert_value.get("src")).replace("!Large.jpg","").replace("!PinterestSmall.jpg",""))
        #     for assert_value in asserts:
        now_value = len(asserts_all)
        print(now_value)
        if last_value == now_value:
            mark_time += 1
        else:
            mark_time == 0
        try:
            browser.find_element_by_xpath('/html/body/div[2]/div[1]/section/main/div[4]/div/div/div[3]/a/span[3]').click()
        except Exception as e:
            print(e)
        last_value = now_value
        time.sleep(4)
    google_arts_images_urls = set()
    with open("wikiart_ink_and_wash_images_urls.txt",'w',encoding="utf8") as write_file:
        for line in asserts_all:
            write_file.write(line+"\n")
except Exception as e:
    print("global",e)
finally:
    browser.close()