运行环境:Python3.6.0
所需的包:
from bs4 import BeautifulSoup
import requests
import os
伪装:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"
}
目录下新建文件夹:
folder = "ooxx"
os.mkdir(folder)
os.chdir(folder)
网址分析:
urls = ["http://jandan.net/ooxx/page-{}#comments".format(str(i)) for i in range(90, 100)]
# 爬取的页面可更改
主要代码:
for url in urls:
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "lxml")
images = soup.select("img")
for image in images:
image_url = "http:" + image.get("src")
# 获取图片地址
filename = image_url.split("/")[-1]
# 图片命名
with open(filename, "wb") as f:
img = requests.get(image_url)
# 读取图片
f.write(img.content)
# 写入图片
一共20行不到,初学者自己动手试试(当然我也是初学者)。
网友评论