# -*- coding: utf-8 -*-
import urllib
from bs4 import BeautifulSoup
from urllib import parse
url = 'https://www.douban.com/tag/{}/?focus=book'.format(parse.quote('小说'))
res = urllib.request.urlopen(url)
soup = BeautifulSoup(res,"html.parser")
book_div = soup.find(attrs={"id":"book"})
book_a = book_div.findAll(attrs={"class":"title"})
for book in book_a:
print(book.string)
import requests
from lxml import etree
res = requests.get(url)
root = etree.HTML(res.content)
book_a = root.xpath("//*[@id = 'book']//*[@class = 'title']/text()")
print(book_a)
分别用urllib+beautifulsoup和request+xpath方法爬取和解析网页内容
网友评论