#糗事百科段子爬虫
import urllib.request
import re
headers=("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
opener=urllib.request.build_opener()
opener.addheaders=[headers]
# 安装为全局
urllib.request.install_opener(opener)
'''
url="https://www.qiushibaike.com/8hr/page/10/"
urllib.request.urlopen(url)
'''
for i in range(0,35):
thisurl="https://www.qiushibaike.com/8hr/page/"+str(i+1)+"/"
data=urllib.request.urlopen(thisurl).read().decode("utf-8","ignore")
pat='<div class="content">.*?<span>(.*?)</span>.*?</div>'
rst=re.compile(pat,re.S).findall(data)
print(rst)
for j in range(0,len(rst)):
print(rst[j])
print("-------")
网友评论