代码展示：

import requests

import re

def parse_url(url):

headers={

'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'

}

response=requests.get(url=url,headers=headers)

text=response.text

#查找用户获得的赞 html格式为
30
# html页面分析 * 性别不一样获取内容是获得的喜欢小括号展示喜欢数量

love_stars=[]

stars = re.findall(r'<div class="articleGender .*?">(.*?)</div>', text, re.DOTALL)

for starin stars:

love_stars.append(star)

# print(love_stars)

#html页面分析
30
#获取的是性别所以内容是性别展示的性别

author_gender=[]

genders = re.findall(r'<div class="articleGender (.*?)">.*?</div>', text, re.DOTALL)

for genderin genders:

author_gender.append(gender)

# print(author_gender)

#获取作者的图片信息 html页面分析

#获取的是图片信息所以信息展示出来

author_imgs=[]

imgs=re.findall(r'<img src="(.*?) .*?>',text,re.DOTALL)

for imgin imgs:

author_imgs.append(img)

# print(author_img)

#re.findall是一个列表格式可以进行循环遍历可迭代循环的找到相关内容数据

contents=re.findall(r'<div class="content">.*?<span>(.*?)</span>',text,re.DOTALL)

#将查找到的内容数据进行遍历循环并且将标签中的元素进行替换为空的字符串将空格去掉

content_list=[]

for contentin contents:

data=re.sub('r<.*?>|\n|<br/>','',content)

content_list.append(data.strip())

# print(content_list,len(content_list))

#将查找到的作者信息页面html分析

骑着二哈啃黄瓜
取出来h2标签中的内容(.*?)
authors_list=[]

authors=re.findall(r'<h2>(.*?)</h2>',text,re.DOTALL)

for authorin authors:

authors_list.append(author.strip())

# print(authors_list)

#将内容中的作者名称/文章内容/作者图片/作者性别以及喜爱数/进行组合进行循环加入列表中

#将字段名称放进字典键值对放入列表中对数据进行循环

stories_list=[]

for valuein zip(authors_list,content_list,author_imgs,author_gender,love_stars):

author,content,imgs,gender,stars,=value

info={

'author':author,

'content':content,

'gender':gender,

'stars':stars,

'imgs':imgs

}

stories_list.append(info)

for storyin stories_list:

print(story)

def main():

#查找第一页的内容

# url = 'https://www.qiushibaike.com/text/page/1/'

# parse_url(url)

#循环遍历前十页的内容进行输出展示

for iin range(1,11):

url ='https://www.qiushibaike.com/text/page/%s/' % i

parse_url(url)

if __name__ =='__main__':

main()