package com.use;
import com.google.common.base.Splitter;
import com.google.common.collect.Maps;
import com.use.dao.InvitationDao;
import com.use.entity.Invitation;
import com.use.vo.ImportExcelVo;
import com.use.vo.LinkVo;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@RunWith(SpringRunner.class)
@SpringBootTest
public class ImportFileAddressTest {
Logger logger =LoggerFactory.getLogger(ImportFileAddressTest.class);
@Autowired
private InvitationDao invitationDao;
@Test
public void test() throws IOException {
Map<String,String> map =Maps.newHashMap();
// 将 xml文件解析到 voList 中
ImportExcelVo vo=
new ImportExcelVo("job.xml","C:\\Users\\sonyic\\Desktop\\krairjob.xls","linkVoList",LinkVo.class);
List<LinkVo> voList =ImportExcelUtil.importData(vo);
List<String> urlList=voList.parallelStream().map(linkVo -> linkVo.getUrl()).collect(Collectors.toList());
// 文件爬取一层下的的所有日志文件
Files.walk(Paths.get("C:\\Users\\sonyic\\Desktop\\nginx_log"),1)
.filter(file -> file.toString().endsWith(".log"))
.parallel()
.forEach((Path file) -> {
try (Stream<String> lines=Files.lines(file);){
// 进行匹配
lines.filter(line -> StringUtils.contains(line,"/krairdrop/validate?address")).forEach(
line->{
String validateLine = StringUtils.substringBetween(line," 200 8 \"","\" \"Mozilla/5.0");
String addressUrl = StringUtils.substringBetween(line,"GET","HTTP/1.1");
String address = StringUtils.substringAfterLast(addressUrl,"/krairdrop/validate?");
if (urlList.contains(validateLine)){
logger.info(".......line......{}",line);
if (StringUtils.isNotEmpty(address)){
if (map.get(validateLine) == null) {
Map<String, String> addressMap=Splitter.on("&").withKeyValueSeparator("=").split(address);
map.put(validateLine,Class2MapUtil.convertBean(addressMap,Invitation.class).getBagAddress());
} else {
Map<String, String> addressMap=Splitter.on("&").withKeyValueSeparator("=").split(address);
map.put(validateLine,map.get(validateLine) + " "+Class2MapUtil.convertBean(addressMap,Invitation.class).getBagAddress());
}
}
}
});
} catch (IOException e) {
e.printStackTrace();
}
});
// 将符合条件的内容存放到 output.txt 文件中。
Path path = Paths.get("C:\\Users\\sonyic\\Desktop\\output.txt");
try (BufferedWriter writer = Files.newBufferedWriter(path,StandardOpenOption.APPEND))
{
map.entrySet().forEach(entry->{
try {
writer.append(entry.getKey()+"-------"+entry.getValue()+"\n");
} catch (IOException e) {
e.printStackTrace();
}
});
} catch (IOException e) {
e.printStackTrace();
}
//logger.info("------ url ------{}",URLDecoder.decode("http%3A%2F%2Frcfund.org%2Fkrairdrop%2F%3Fv%3DG0ULvfAP",java.nio.charset.StandardCharsets.UTF_8.name()));
}
}
网友评论