写在前面
环境:Ubuntu16.04
编辑器:Pycharm 2017.1.5
直接贴代码了,注释写在里面。
import os
import requests
from bs4 import BeautifulSoup
imgUrls =[]#存放所有图片的URL
def get_Html_ImgUrl(url):
'''
获取网页的源代码和图片的地址
存放在imgUrls这个列表里
'''
#获取网页源代码
headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
html = requests.get(url, headers=headers)
html.encoding = 'utf-8'
soup = BeautifulSoup(html.text, 'lxml')
#获取图片的地址并且存放在imgUrls列表里
for i in soup.find_all('img', class_='BDE_Image'):
imgUrls.append(i['src'])
def save_imgs():
#保存图片
for i,url in enumerate(imgUrls):
pic = requests.get(url).content
with open('./'+str(i)+'.jpg', 'wb') as f:#保存至根目录下
f.write(pic)
def main():
url = 'http://tieba.baidu.com/p/3210271432?pn='
for i in range(1,10):
try:
get_Html_ImgUrl(url+str(i))
except:
pass
save_imgs()
print('爬取完毕')
if __name__ == '__main__':
main()
结果如下

网友评论