from selenium import webdriver
import time
driver = webdriver.Chrome()
url = 'https://www.soxscc.com/JueDaiShenZhu/643701.html'
for i in range(10):
driver.get(url)
content = driver.find_element_by_class_name('content')
# print(content.text)
title = nextpage = driver.find_element_by_xpath('//*[@id]/div[3]/div[7]/a[3]').text
# print(title)
nextpage = driver.find_element_by_xpath('//*[@id]/div[3]/div[7]/a[3]').get_attribute('href')
url = nextpage
with open('绝代神主.txt','a',encoding='utf-8') as fp:
fp.write('\n'+content.text+'\n\n'+title)
selenium 翻页爬取小说
selenium可以节省时间
这个是selenium爬取斗图啦的图片
import ssl
import time
from urllib import request
from selenium import webdriver
ssl._create_default_https_context = ssl._create_unverified_context
driver = webdriver.Chrome()
url = 'https://www.fabiaoqing.com/search/bqb/keyword/%E6%96%97%E5%9B%BE%E5%95%A6'
driver.get(url)
for i in range(10):
hrefs = driver.find_elements_by_xpath('//*[@id="bqb"]/div/div/a/img')
for i in hrefs:
href = i.get_attribute('src')
# print(href)
name = href.split('/')[-1]
request.urlretrieve(href,name)
js = 'window.scrollBy(0,8000)'driver.execute_script(js)
nextpage = driver.find_element_by_partial_link_text('下一页')
nextpage.click()
time.sleep(.5)
爬取了两百多张张图片
很方便
网友评论