1. 观察网页面规律
2. 缺点是爬取速度比较慢,如果想要高速爬虫可以去学scrapy异步爬虫框架
3. 在上一个文档的基础上增加了翻页功能
from seleniumimport webdriver
import re
chrome_options=webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
driver= webdriver.Chrome('C:\\chromedriver.exe',options=chrome_options)
driver.get("http://www.cnblogs.com/yoyoketang/default.html?page=1")
patter= re.compile(r'<a class="postTitle2" href=".*?">\s(.*?)</a>'
r'.*?<div class="c_b_p_desc">(.*?)</div>',re.S)
while True:
try:
page=driver.page_source
subtitle=patter.findall(page)
print(subtitle)
driver.find_element_by_link_text("下一页").click()
except Exception as e:
print(e)
break
网友评论