jsoup爬虫小能手,总是时不时会用到,放个demo在这里,用到再回顾 下。
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.LoggerContext;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CodeReviewUtils {
static Logger logger = logger = LoggerFactory.getLogger(CodeReviewUtils.class);
// 需要检视的项目地址
static String url = "";
// 网页登陆之前copy下来
static String Cookie = "";
static String X_CSRF_Token = "";
public static void main(String[] args) throws IOException {
LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
List<ch.qos.logback.classic.Logger> loggerList = loggerContext.getLoggerList();
loggerList.forEach(logger -> {
logger.setLevel(Level.INFO);
});
Connection connect = Jsoup.connect(url + "......................");
connect.header("Cookie", Cookie);
connect.header("X-CSRF-Token", X_CSRF_Token);
connect.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
connect.header("Referer", "");
Connection.Response execute = connect.execute();
Document html = Jsoup.parse(execute.body());
Elements allCommitDiv = html.select("div.commit.compare_check_box");
logger.info("code:{}", execute.statusCode());
logger.info("size:{}", allCommitDiv.size());
allCommitDiv.forEach(e -> {
// 已检视,跳过
if (e.select("div").last().hasClass("grey")) {
String commitId = "";
Element aTag = e.select("a.row_title").first();
Matcher href = Pattern.compile(".*commit/(.*)").matcher(aTag.attr("href"));
if (href.matches()) {
commitId = href.group(1);
}
logger.info("commitId:{}", commitId);
CloseableHttpClient client = HttpClients.createDefault();
HttpPost httpPost = new HttpPost(url + "......................?commit_id=" + commitId + "&commit_status=1");
httpPost.addHeader("Cookie", Cookie);
httpPost.addHeader("X-CSRF-Token", X_CSRF_Token);
httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
httpPost.addHeader("Referer", "");
try {
CloseableHttpResponse response = client.execute(httpPost);
CodeReviewUtils.logger.info("{} -> {}", commitId, response.getStatusLine().getStatusCode());
} catch (IOException ex) {
ex.printStackTrace();
}
}
});
}
}
网友评论