美文网首页程序员
爬取日志文件

爬取日志文件

作者: 程序猿_小刚 | 来源:发表于2018-07-31 10:43 被阅读0次
    package com.use;
    
    
    import com.google.common.base.Splitter;
    import com.google.common.collect.Maps;
    import com.use.dao.InvitationDao;
    import com.use.entity.Invitation;
    import com.use.vo.ImportExcelVo;
    import com.use.vo.LinkVo;
    import org.apache.commons.lang3.StringUtils;
    import org.junit.Test;
    import org.junit.runner.RunWith;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.boot.test.context.SpringBootTest;
    import org.springframework.test.context.junit4.SpringRunner;
    
    import java.io.BufferedWriter;
    import java.io.IOException;
    import java.nio.file.Files;
    import java.nio.file.Path;
    import java.nio.file.Paths;
    import java.nio.file.StandardOpenOption;
    import java.util.List;
    import java.util.Map;
    import java.util.stream.Collectors;
    import java.util.stream.Stream;
    
    @RunWith(SpringRunner.class)
    @SpringBootTest
    public class ImportFileAddressTest {
        Logger logger =LoggerFactory.getLogger(ImportFileAddressTest.class);
        @Autowired
        private InvitationDao invitationDao;
    
        @Test
        public void test() throws IOException {
            Map<String,String> map =Maps.newHashMap();
          // 将 xml文件解析到 voList 中
            ImportExcelVo vo=
                    new ImportExcelVo("job.xml","C:\\Users\\sonyic\\Desktop\\krairjob.xls","linkVoList",LinkVo.class);
            List<LinkVo> voList =ImportExcelUtil.importData(vo);
            List<String> urlList=voList.parallelStream().map(linkVo -> linkVo.getUrl()).collect(Collectors.toList());
          //  文件爬取一层下的的所有日志文件
            Files.walk(Paths.get("C:\\Users\\sonyic\\Desktop\\nginx_log"),1)
            .filter(file -> file.toString().endsWith(".log"))
            .parallel()
            .forEach((Path file) -> {
                try (Stream<String> lines=Files.lines(file);){
                    //  进行匹配
                    lines.filter(line -> StringUtils.contains(line,"/krairdrop/validate?address")).forEach(
                        line->{
                            String validateLine = StringUtils.substringBetween(line," 200 8 \"","\" \"Mozilla/5.0");
    
                            String addressUrl = StringUtils.substringBetween(line,"GET","HTTP/1.1");
                            String address = StringUtils.substringAfterLast(addressUrl,"/krairdrop/validate?");
                            if (urlList.contains(validateLine)){
                                logger.info(".......line......{}",line);
                                if (StringUtils.isNotEmpty(address)){
                                    if (map.get(validateLine) == null) {
                                        Map<String, String> addressMap=Splitter.on("&").withKeyValueSeparator("=").split(address);
                                        map.put(validateLine,Class2MapUtil.convertBean(addressMap,Invitation.class).getBagAddress());
                                    } else {
                                        Map<String, String> addressMap=Splitter.on("&").withKeyValueSeparator("=").split(address);
                                        map.put(validateLine,map.get(validateLine) + "  "+Class2MapUtil.convertBean(addressMap,Invitation.class).getBagAddress());
                                    }
                                }
                            }
                        });
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
          // 将符合条件的内容存放到 output.txt 文件中。
            Path path = Paths.get("C:\\Users\\sonyic\\Desktop\\output.txt");
            try (BufferedWriter writer = Files.newBufferedWriter(path,StandardOpenOption.APPEND))
            {
                map.entrySet().forEach(entry->{
                    try {
                        writer.append(entry.getKey()+"-------"+entry.getValue()+"\n");
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                });
            } catch (IOException e) {
                e.printStackTrace();
            }
            //logger.info("------ url ------{}",URLDecoder.decode("http%3A%2F%2Frcfund.org%2Fkrairdrop%2F%3Fv%3DG0ULvfAP",java.nio.charset.StandardCharsets.UTF_8.name()));
    
        }
    
    }
    

    相关文章

      网友评论

        本文标题:爬取日志文件

        本文链接:https://www.haomeiwen.com/subject/uppcvftx.html