自从爬了表情包之后,我自己写代码爬取其他图片网站的图片。爬取煎蛋随手拍
import requests
from lxml import etree
url ='http://jandan.net/ooxx/MjAyMDEwMzEtOTg=#comments'
# //div[@class='text']/p//a/@href
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36'}
resp = requests.get(url,headers=headers)
html = etree.HTML(resp.text)
srcs = html.xpath('//a[@referrerpolicy]/@href')
for i in srcs:
urlplus ='http:' +i
name = urlplus.split('/')[-1]
r = requests.get(urlplus,headers=headers)
with open('suishoupai/'+name,'wb')as file:
file.write(r.content)
网友评论