import requests
import urllib.parse
#通过url 获取数据
def get_page(url):
#requests.get 自带json.loads
page = requests.get(url)
page = page.content
#将 bytes 转换成 字符串
page.encoding = 'utf-8'
return page
def page_from_duitang(label):
pages = []
url = 'https://www.duitang.com/napi/blog/list/by_search/kw={}&start={}&limit=1000'
#将中文转成url 编码
label = urllib.parse.quote(label)
for index in range(0,3600,100):
u = url.format(label,index)
print(u)
page = get_page(u)
pages.append(page)
return pages
def findall_in_page(page,startpart,endpart):
all_strings = []
end = 0
while page.find(startpart,end) != -1:
start = page.find(startpart,end)+len(startpart)
end = page.find(endpart,start)
string = page[start:end]
all_strings.append(string)
return all_strings
def pic_urls_from_page(pages):
pic_urls = []
for page in pages:
urls = findall_in_page(page,"path":",""")
pic_urls.extend(urls) #extend 和 append的不同
return pic_urls
def download_pics(url,n):
r = requests.get(url)
path = 'pic/' + str(n) + '.jpg'
with open(path,'wb') as f:
f.write(r.content)
def main(label)
pages = page_from_duitang(label)
pic_urls = pic_urls_from_page(pages)
n = 0
for url in pic_urls:
n +=1
print("正在下载第 {} 张图片".format(n))
download_pics(url,n)
网友评论