美文网首页
SpringBoot定时任务结合jsoup抓取网页信息

SpringBoot定时任务结合jsoup抓取网页信息

作者: 流年逝去sky | 来源:发表于2019-06-11 23:35 被阅读0次

由于测试环境需要一些测试数据或者压测时需要大量的不重复的数据,可以使用SpringBoot定时任务 来抓取网站上 的证件号信息用来作为测试数据使用,本项目Github源码

@Configuration
@EnableScheduling// 启用定时任务
public class GetIdNos {

    @Autowired
    private UserService userService;

    @Scheduled(cron = "0/5 * * * * ?")// 每10秒执行一次 抓取网页的身份证信息保存到数据库
    public void scheduler() throws Exception {
        DateFormat dateFormat = DateFormat.getDateTimeInstance();
        String time = dateFormat.format(new Date());

        String url = "https://hk.51240.com/";
        Document document = Jsoup.connect(url).get();
        Elements trElements = document.getElementsByTag("table").get(3).getElementsByTag("tr");
        for (int i = 1; i < trElements.size(); i++) {
            Elements tds = trElements.get(i).getElementsByTag("td");
            String name = tds.get(0).text();
            String idNo = tds.get(1).text();
            User user = userService.findUserByIdNo(idNo);
            if (StringUtils.isEmpty(user)) {
                userService.insert(new User(idNo, name, time));
            }
        }

    }
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.zhongan</groupId>
    <artifactId>GetPersonId</artifactId>
    <version>1.0-SNAPSHOT</version>

    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.1.4.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>

    <properties>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-jdbc</artifactId>
        </dependency>
        <dependency>
            <groupId>org.mybatis.spring.boot</groupId>
            <artifactId>mybatis-spring-boot-starter</artifactId>
            <version>2.0.1</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.15</version>
        </dependency>
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.11.3</version>
        </dependency>
    </dependencies>


</project>

成功爬取信息并保存


image.png

相关文章

网友评论

      本文标题:SpringBoot定时任务结合jsoup抓取网页信息

      本文链接:https://www.haomeiwen.com/subject/wzdhfctx.html