一,分析网页源代码;二模拟请求操作;三解析网页内容;四,下载存储图片到自己要保存的路径
image.png image.png代码:
image.png
cmd+alt+L:格式代码
import re
from xml import etree
import requests
import json
from urllib import request
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
全局取消证书验证
if name == 'main':
def downLoad():
header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Safari/605.1.15"
}
keyword = input("请输入爬取关键字:")
print("开始搜索... ...")
param = {
"query": keyword,
"model": "1",
"start": "1",
"xml_len": "48",
"rawQuery": keyword,
"st": "225",
}
url = "https://pic.sogou.com/napi/pc/searchList"
response = requests.get(url=url, params=param, headers=header)
jsonData = response.json() # 请求到的图片json数据
# 图片的数据分组存入本地文件
imgArray = jsonData["data"]["items"]
fp = open("./{}.json".format(keyword), "w", encoding="utf-8")
json.dump(imgArray, fp=fp, ensure_ascii=False)
print("{}张图片数组已经存入".format(len(imgArray)))
# 遍历图片所在的数组存入本地
order = input("是否下载图片到桌面 Y/N? :")
if order == "Y":
for item in imgArray:
localPath = '/Users/tianxiang.wu/Desktop/images/%s.jpeg' % item["title"] # 设置你的图片存储路径,并且给图片命名
imgUrl = item["oriPicUrl"] # 获取图片下载Url
print(imgUrl)
try:
request.urlretrieve(imgUrl, localPath)
except OSError as error:
print("%s加载失败" % item["title"])
else:
print("... ... ")
print("图片下载完成")
return
else:
print("程序执行结束")
downLoad()
网友评论