![](https://img.haomeiwen.com/i3968643/69d32bd8a30a0c16.png)
image.png
![](https://img.haomeiwen.com/i3968643/b8639d552061ac4b.png)
image.png
![](https://img.haomeiwen.com/i3968643/87b9da32b9bbcf28.png)
image.png
抓取单张图片
#需求:抓取单张图片
import requests
url ="https://p0.itc.cn/q_70/images03/20230512/66f16aa1ece34f30bdcf28256c968611.png"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
}
img_data = requests.get(url=url).content
with open('./baidu.jpg','wb') as fp:
fp.write(img_data)
![](https://img.haomeiwen.com/i3968643/03484564e479ece2.png)
image.png
#需求:抓取单张图片
import requests
# url ="http://www.netbian.com/down.php?id=33263&type=1"
url ="http://www.netbian.com/down.php?"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
}
parm={
'id': '33263',
'type': '1'
}
img_data = requests.get(url=url,params=parm).content
with open('./baidu.jpg','wb') as fp:
fp.write(img_data)
抓取一整个页面的图片
import requests
import re
import os
if __name__=='__main__':
# 创建一个文件夹,保存所有的图片
if not os.path.exists('./qiutuLibs'):
os.mkdir('./qiutuLibs')
url = 'https://www.qiushibaike.com/pic/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/96.0.4664.45 Safari/537.36'
}
# 使用通用爬虫对url对应的一整张页面进行爬取
page_text = requests.get(url = url,headers = headers).text
# 使用聚焦爬虫将页面所有的糗图进行解析/提取
ex = '<div class="thumb">.*?<img src="(.*?)" alt.*?</div>'
img_src_list = re.findall(ex,page_text,re.S)
for src in img_src_list:
# 拼接出一个完整的url
url = 'https' + src
# 请求到了图片的二进制数据
img_data = requests.get(url=src, header=headers).content
# 生成图片名称
img_name = src.split('/')[-1]
# 图片存储路径
img_path = './qiutuLibs/'+img_name
with open(img_path,'wb') as fp:
fp.write(img_data)
抓取所有图片
import requests
import re
import os
if __name__=='__main__':
# 创建一个文件夹,保存所有的图片
if not os.path.exists('./qiutuLibs'):
os.mkdir('./qiutuLibs')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/96.0.4664.45 Safari/537.36'
}
# 设置一个通用的url模板,
url = 'https://www.qiushibaike.com/pic/%d/?s=5184961'
for pageNum in range(1,36):
# 对应页码的url
new_url = format(url%pageNum)
# 使用通用爬虫对url对应的一整张页面进行爬取
page_text = requests.get(url = new_url,headers = headers).text
# 使用聚焦爬虫将页面所有的糗图进行解析/提取
ex = '<div class="thumb">.*?<img src="(.*?)" alt.*?</div>'
img_src_list = re.findall(ex,page_text,re.S)
for src in img_src_list:
# 拼接出一个完整的url
url = 'https' + src
# 请求到了图片的二进制数据
img_data = requests.get(url=src, header=headers).content
# 生成图片名称
img_name = src.split('/')[-1]
# 图片存储路径
img_path = './qiutuLibs/'+img_name
with open(img_path,'wb') as fp:
fp.write(img_data)
网友评论