import requests
from lxml import etree
i = 1
url = "https://www.qiushibaike.com/hot/page" + str(i)
def Html_download(url):
response = requests.get(url = url, headers = None).text
root = etree.HTML(response)
divList = root.xpath('//div[@id="content-left"]/div')
jokes_all = []
for div in divList:
joke_all = []
print("----")
author = div.xpath('.//h2/text()')[0].strip()
print(author)
contentForAll = div.xpath('.//div[@class="content"]/span[@class="contentForAll"]')
if contentForAll:
scr_path = div.xpath('.//a[@class="contentHerf"]/@herf')[0]
scr = "https://www.qiushibaike.com" + scr_path
scr_response = requests.get(scr).text
selector = etree.HTML(scr_response)
content = selector.xpath('//div[@class="content"]/text()')
content = "".join(content)#.replace("\n", "")
print(content)
else:
content = div.xpath('.//div[@class="content"]/span/text()')
content = "".join(content)#.replace("\n", "")
print(content)
joke_all.append(content)
stats_vote = div.xpath('.//span[@class="stats-vote"]/i[@class="number"]/text()')[0]
print(stats_vote)
stats_comments = div.xpath('.//span[@class="stats-comments"]//i[@class="number"]/text()')[0]
print(stats_comments)
jokes_all.append(joke_all)
return jokes_all
hhh = Html_download(url)
- 加载全文的时候,需要注意怎么 获取;
网友评论