美文网首页
SpringBoot定时任务结合jsoup抓取网页信息

SpringBoot定时任务结合jsoup抓取网页信息

作者: 流年逝去sky | 来源:发表于2019-06-11 23:35 被阅读0次

    由于测试环境需要一些测试数据或者压测时需要大量的不重复的数据,可以使用SpringBoot定时任务 来抓取网站上 的证件号信息用来作为测试数据使用,本项目Github源码

    @Configuration
    @EnableScheduling// 启用定时任务
    public class GetIdNos {
    
        @Autowired
        private UserService userService;
    
        @Scheduled(cron = "0/5 * * * * ?")// 每10秒执行一次 抓取网页的身份证信息保存到数据库
        public void scheduler() throws Exception {
            DateFormat dateFormat = DateFormat.getDateTimeInstance();
            String time = dateFormat.format(new Date());
    
            String url = "https://hk.51240.com/";
            Document document = Jsoup.connect(url).get();
            Elements trElements = document.getElementsByTag("table").get(3).getElementsByTag("tr");
            for (int i = 1; i < trElements.size(); i++) {
                Elements tds = trElements.get(i).getElementsByTag("td");
                String name = tds.get(0).text();
                String idNo = tds.get(1).text();
                User user = userService.findUserByIdNo(idNo);
                if (StringUtils.isEmpty(user)) {
                    userService.insert(new User(idNo, name, time));
                }
            }
    
        }
    }
    
    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.zhongan</groupId>
        <artifactId>GetPersonId</artifactId>
        <version>1.0-SNAPSHOT</version>
    
        <parent>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-parent</artifactId>
            <version>2.1.4.RELEASE</version>
            <relativePath/> <!-- lookup parent from repository -->
        </parent>
    
        <properties>
            <java.version>1.8</java.version>
        </properties>
    
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-jdbc</artifactId>
            </dependency>
            <dependency>
                <groupId>org.mybatis.spring.boot</groupId>
                <artifactId>mybatis-spring-boot-starter</artifactId>
                <version>2.0.1</version>
            </dependency>
            <dependency>
                <groupId>mysql</groupId>
                <artifactId>mysql-connector-java</artifactId>
                <version>8.0.15</version>
            </dependency>
            <dependency>
                <groupId>org.jsoup</groupId>
                <artifactId>jsoup</artifactId>
                <version>1.11.3</version>
            </dependency>
        </dependencies>
    
    
    </project>
    

    成功爬取信息并保存


    image.png

    相关文章

      网友评论

          本文标题:SpringBoot定时任务结合jsoup抓取网页信息

          本文链接:https://www.haomeiwen.com/subject/wzdhfctx.html