public void process(Page page) {
String rawText = page.getRawText();
Html html = page.getHtml();
String regEx = "(,s=\")\\d{4}-\\d{2}-\\d{2}(\";)";
Pattern p = Pattern.compile(regEx);
Matcher matcher = p.matcher(rawText);
if (matcher.find()) {
String res = matcher.group();
String[] array = res.split("\"");
String str = array[1];
}
String title = html.xpath("//h2[@class='rich_media_title']/text()").toString();
String content = html.xpath("//div[@class='rich_media_content ']").toString();
Html html1 = new Html(content);
List<String> imageList = html1.xpath("//img/@data-src").all();
System.out.println("ok");
System.out.println("hello");
}
网友评论