美文网首页
Jsoup demo

Jsoup demo

作者: timar | 来源:发表于2019-11-18 16:51 被阅读0次

jsoup爬虫小能手,总是时不时会用到,放个demo在这里,用到再回顾 下。

import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.LoggerContext;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CodeReviewUtils {

    static Logger logger = logger = LoggerFactory.getLogger(CodeReviewUtils.class);

    // 需要检视的项目地址
    static String url = "";

    // 网页登陆之前copy下来
    static String Cookie = "";
    static String X_CSRF_Token = "";

    public static void main(String[] args) throws IOException {
        LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
        List<ch.qos.logback.classic.Logger> loggerList = loggerContext.getLoggerList();
        loggerList.forEach(logger -> {
            logger.setLevel(Level.INFO);
        });

        Connection connect = Jsoup.connect(url + "......................");
        connect.header("Cookie", Cookie);
        connect.header("X-CSRF-Token", X_CSRF_Token);
        connect.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
        connect.header("Referer", "");

        Connection.Response execute = connect.execute();
        Document html = Jsoup.parse(execute.body());
        Elements allCommitDiv = html.select("div.commit.compare_check_box");
        logger.info("code:{}", execute.statusCode());
        logger.info("size:{}", allCommitDiv.size());
        allCommitDiv.forEach(e -> {
            // 已检视,跳过
            if (e.select("div").last().hasClass("grey")) {
                String commitId = "";
                Element aTag = e.select("a.row_title").first();
                Matcher href = Pattern.compile(".*commit/(.*)").matcher(aTag.attr("href"));
                if (href.matches()) {
                    commitId = href.group(1);
                }
                logger.info("commitId:{}", commitId);
                CloseableHttpClient client = HttpClients.createDefault();

                HttpPost httpPost = new HttpPost(url + "......................?commit_id=" + commitId + "&commit_status=1");
                httpPost.addHeader("Cookie", Cookie);
                httpPost.addHeader("X-CSRF-Token", X_CSRF_Token);
                httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
                httpPost.addHeader("Referer", "");
                try {
                    CloseableHttpResponse response = client.execute(httpPost);
                    CodeReviewUtils.logger.info("{} -> {}", commitId, response.getStatusLine().getStatusCode());
                } catch (IOException ex) {
                    ex.printStackTrace();
                }
            }


        });
    }
}

相关文章

网友评论

      本文标题:Jsoup demo

      本文链接:https://www.haomeiwen.com/subject/bhybictx.html