环境:python3.6
目标:爬取HTML页面的图片,保存到本地
import requests
import os
from bs4 import BeautifulSoup
def downloadImages(imageLinks, localPath):
if not os.path.isdir(localPath):
os.mkdir(localPath)
for (index, url) in enumerate(imageLinks):
try:
res = requests.get(url)
filename = os.path.join(localPath, str(index) + ".jpg")
with open(filename, "wb") as f:
f.write(res.content)
print("download %sth picture" % index)
except:
print("download failure :", url)
def getImageLinks(htmlUrl):
html = requests.get(htmlUrl).text
bsObj = BeautifulSoup(html, "lxml")
imgTags = bsObj.find_all("img", {"height":"265"})
imageLinks = []
for i in imgTags:
if "src" in i.attrs:
imageLinks.append(i.attrs["src"])
return imageLinks
imageLinks = getImageLinks("http://www.27270.com/zt/xinggan/")
downloadImages(imageLinks, "/tmp/1118/")
网友评论