IOUtils类
package com.example.demospringboot.util;
/**
* @program: demopa
* @description
* @author: tina.liu
* @create: 2020-02-09 11:12
**/
import java.io.FileOutputStream;
import java.io.InputStream;
public class IOUtils {
/**
*
* @param path 需要下载的文件路径,包括后缀名
* @param inStream 输入流
*/
public static void download(String path,InputStream inStream){
FileOutputStream fs;
try {
fs = new FileOutputStream(path);
byte[] buffer = new byte[1204];
int byteread = 0;
while ((byteread = inStream.read(buffer)) != -1) {
fs.write(buffer, 0, byteread);
}
System.out.println(path + "保存成功!");
} catch (Exception e) {
e.printStackTrace();
}
}
}
ReptileGetList类
package com.example.demospringboot.config;
import com.example.demospringboot.resp.MessageResp;
import com.example.demospringboot.util.IOUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
/**
* @program: demopa
* @description
* @author: tina.liu
* @create: 2020-02-09 09:59
**/
public class ReptileGetList {
public static void main(String[] args) throws Exception {
List<MessageResp> messageRespList = copy();
for (MessageResp loop :messageRespList) {
String nextSrc = loop.getNextSrc();
String title = loop.getTitle();
down2(nextSrc,title);
List<MessageResp> messageRespList1 = loop.getMessageRespList();
for (MessageResp loop2:messageRespList1) {
String nextSrc1 = loop2.getNextSrc();
String title1 = loop2.getTitle();
down2(nextSrc1,title1);
}
}
System.out.println("爬虫程序完成,期待您的下次使用");
}
//封装的爬图方法
public static List<MessageResp> copy() {
List<MessageResp> messageRespList = new ArrayList<MessageResp>();
//获取第一层的信息,title src href
//System.out.println("获取第一层的信息爬虫程序启动,当前访问的url为:" + firstUrl);
String firstUrl = "https://www.ivsky.com/tupian/renwutupian/";
Connection conn = Jsoup.connect(firstUrl);
Document doc = null;
try {
doc = conn.get();
} catch (IOException e) {
e.printStackTrace();
}
Elements divElements = doc.getElementsByClass("il_img");
//System.out.println("第一层div的size为: " + divElements.size());
for (int i = 0; i < divElements.size(); i++) {
//获取DIV下面的a标签
Elements aElements = divElements.get(i).select("a");
//获取 标签a中的 href title
String href = "https://www.ivsky.com" + aElements.get(0).attr("href");
//System.out.println(href);
String title = aElements.get(0).attr("title");
//System.out.println(title);
Elements imgElements = aElements.get(0).select("img");
Element imgElement = imgElements.get(0);
//获取标签img中的src
String nextSrc = "https:" + imgElement.attr("src");
//System.out.println(nextSrc);
//获取第二层的信息,title src href
//System.out.println("获取第二层的信息爬虫程序启动,当前访问的url为:" + href);
List<MessageResp> messageRespList2 = new ArrayList<MessageResp>();
Connection conn2 = Jsoup.connect(href);
Document doc2 = null;
try {
doc2 = conn2.get();
Thread.sleep(800);
} catch (Exception e) {
e.printStackTrace();
}
Elements divElements2 = doc2.getElementsByClass("il_img");
//System.out.println("第二层div的size为: " + divElements2.size());
for (int j = 0; j < divElements2.size(); j++) {
Element aElements2 = divElements2.get(j).select("a").get(0);
String href2 ="https://www.ivsky.com"+ aElements2.attr("href");
//System.out.println(href2);
String title2 = aElements2.attr("title");
//System.out.println(title2);
Element imgElements2 = aElements2.select("img").get(0);
String src2 = "https:"+imgElements2.attr("src");
//System.out.println(src2);
MessageResp secondMessageResp = new MessageResp(UUID.randomUUID().toString().substring(0, 16).replaceAll("-",""),href2,title2,src2,null);
messageRespList2.add(secondMessageResp);
}
MessageResp firstMessageResp = new MessageResp(UUID.randomUUID().toString().substring(0, 16).replaceAll("-",""), href, title, nextSrc, messageRespList2);
messageRespList.add(firstMessageResp);
}
return messageRespList;
}
//下载文件到本地的通用方法 返回值nextSrc
public static String down2(String url2, String title) throws Exception {
//获取URL对象
URL url = new URL(url2);
//根据URL打开链接
URLConnection connection = url.openConnection();
//从连接处获取输入流对象
InputStream inputStream = connection.getInputStream();
File file = new File("/Users/lvxiaokai/Desktop/tina/img/images/character/");
if(!file.exists()){
file.mkdirs();
}
String uuid = UUID.randomUUID().toString().replaceAll("-","");
String nextSrc = "/Users/lvxiaokai/Desktop/tina/img/images/character/"+title+".jpg";
IOUtils.download(nextSrc , inputStream);
return nextSrc;
}
}
网友评论