环境搭建
创建一个SpringBoot项目。
配置

<properties>
<java.version>1.8</java.version>
<!--自定一es版本依赖,保证和本地一致-->
<elasticsearch.version>7.6.2</elasticsearch.version>
</properties>
<dependencies>
<!--解析网页。只能解析网页-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<!--fastJson-->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.62</version>
</dependency>
<!--Elasticsearch-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<scope>runtime</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>

##关闭thymeleaf缓存
spring.thymeleaf.cache=false

导入页面资料。这个在Elasticsearch概述中留有百度云链接
controller

@Controller
public class IndexController {
@RequestMapping({"/","/index"})
public String index(){
return "index";
}
}
启动项目查看效果~ http:localhost:8080/

Jsoup解析
<!--解析网页。只能解析网页-->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>



创建utils包,创建HtmlParseUtil类
通过对其网站的分析,就应该能看懂下面的代码了

@Component//就可以使用@Autowired注入。不交给Spring管理就用new它
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
//获取请求。需要联网
String url = "https://search.jd.com/Search?keyword=java";
//解析网页。Jsoup返回的Document对象就是浏览器的Document对象
Document document = Jsoup.parse(new URL(url), 30000);
//所有在js中Document能进行的操作都能在次操作
Element element = document.getElementById("J_goodsList");
System.out.println(element.html());//打印J_goodList标签下的的html源码
//获取所有的li元素
Elements elements = element.getElementsByTag("li");
//获取元素中的所有内容
for (Element e1 : elements) {
//图片延迟加载
String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = e1.getElementsByClass("p-price").eq(0).text();
String title = e1.getElementsByClass("p-name").eq(0).text();
System.out.println("========================================");
System.out.println(img);
System.out.println(price);
System.out.println(title);
}
}
}

成功获取到相应信息。接着对其进行封装。首先创建一个pojo Content对象

然后封装成一个parseJD
方法。


@Component//就可以使用@Autowired注入。不交给Spring管理就用new它
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
new HtmlParseUtil().parseJD("Vue").forEach(System.out::println);
}
public ArrayList<Content> parseJD(String keywords) throws IOException {
//获取请求。需要联网
String url = "https://search.jd.com/Search?keyword=" + keywords;
System.out.println(url);
//解析网页。Jsoup返回的Document对象就是浏览器的Document对象
Document document = Jsoup.parse(new URL(url), 30000);
//所有在js中Document能进行的操作都能在次操作
Element element = document.getElementById("J_goodsList");
//System.out.println(element.html());
//获取所有的li元素
Elements elements = element.getElementsByTag("li");
ArrayList<Content> goodsList = new ArrayList<>();
//获取元素中的所有内容
for (Element e1 : elements) {
String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = e1.getElementsByClass("p-price").eq(0).text();
String title = e1.getElementsByClass("p-name").eq(0).text();
Content content = new Content();
content.setImg(img);
content.setTitle(title);
content.setPrice(price);
goodsList.add(content);
}
return goodsList;
}
}
业务编写
首先照样配置Elastcisearch的配置类。

@Configuration
public class ElasticSearchClientConfig {
@Bean
public RestHighLevelClient restHighLevelClient() {
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
return client;
}
}
编写service业务类

@Service
public class ContentService {
@Autowired
RestHighLevelClient restHighLevelClient;
//1.把解析导的数据放到es索引中
public Boolean parseContent(String keywords) throws IOException {
ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
//把查询导的数据放入es中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");//过期时间为两分钟
for (int i = 0 ;i < contents.size();i++){
System.out.println(JSON.toJSONString(contents.get(i)));
bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();//返回是否插入成功
}
}
编写controoler

@RestController
public class ContentController {
@Autowired
ContentService contentService;
@GetMapping("/parse/{keyword}")
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
Boolean result = contentService.parseContent(keyword);
return result;
}
}
启动项目进行测试http://localhost:8080/parse/java


成功添加相关javas商品资料。
接着我们继续编写service,添加分页搜索ES中的数据。

@Service
public class ContentService {
@Autowired
RestHighLevelClient restHighLevelClient;
//1.把解析导的数据放到es索引中
public Boolean parseContent(String keywords) throws IOException {
ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
//把查询导的数据放入es中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");//过期时间为两分钟
for (int i = 0 ;i < contents.size();i++){
System.out.println(JSON.toJSONString(contents.get(i)));
bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();//返回是否插入成功
}
//2.获取这些数据实现搜索功能
public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
if (pageNo<=1){
pageNo = 1;
}
//条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);//起始数据
sourceBuilder.size(pageSize);//页面大小
//精准匹配关键字
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));//超时控制
//执行搜索
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
ArrayList<Map<String,Object>> list = new ArrayList<>();
SearchHit[] hits = searchResponse.getHits().getHits();//得到hits数组对象
for (SearchHit documentFields : hits){
list.add(documentFields.getSourceAsMap());//添加到list中
}
return list;
}
}
接着在controller中添加一个请求

@RestController
public class ContentController {
@Autowired
ContentService contentService;
@GetMapping("/parse/{keyword}")
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
Boolean result = contentService.parseContent(keyword);
return result;
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
@PathVariable("pageNo") int pageNo,
@PathVariable("pageSize") int pageSize) throws IOException {
List<Map<String, Object>> list = contentService.searchPage(keyword, pageNo, pageSize);
return list;
}
}
启动项目进行测试http://localhost:8080/search/java/1/20

前端页面
导入vue 和 axios,我这里使用的是在线版的
<script src="https://cdn.staticfile.org/vue/2.6.2/vue.min.js"></script>
<script src="https://unpkg.com/axios/dist/axios.min.js"></script>
修改我们的index页面。

启动项目查看效果。(我已经解析过了vue数据添加到了es中)

高亮功能
我们在业务类service中修改一下代码。

将高亮中的字段替换添加到_source中的title

@Service
public class ContentService {
@Autowired
RestHighLevelClient restHighLevelClient;
//1.把解析导的数据放到es索引中
public Boolean parseContent(String keywords) throws IOException {
ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
//把查询导的数据放入es中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("2m");//过期时间为两分钟
for (int i = 0 ;i < contents.size();i++){
System.out.println(JSON.toJSONString(contents.get(i)));
bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();//返回是否插入成功
}
//2.获取这些数据实现搜索功能
public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
if (pageNo<=1){
pageNo = 1;
}
//条件搜索
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
//分页
sourceBuilder.from(pageNo);//起始数据
sourceBuilder.size(pageSize);//页面大小
//精准匹配关键字
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
sourceBuilder.query(termQueryBuilder);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));//超时控制
//高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("title");//高亮字段
highlightBuilder.requireFieldMatch(false);//关闭多个高亮。例如标题中有多个vue,只高亮一个
highlightBuilder.preTags("<span style='color:red'>");//前置标签
highlightBuilder.postTags("</span>");//后置标签
sourceBuilder.highlighter(highlightBuilder);//加入高亮
//执行搜索
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
ArrayList<Map<String,Object>> list = new ArrayList<>();
SearchHit[] hits = searchResponse.getHits().getHits();//得到hits数组对象
for (SearchHit hit : hits){
Map<String, HighlightField> highlightFields = hit.getHighlightFields();
Map<String, Object> sourceAsMap = hit.getSourceAsMap();//原来的结果
HighlightField title = highlightFields.get("title");
//解析高亮字段,将原来的字段替换成高亮字段
if (title!=null){
Text[] fragments = title.fragments();
String hTitle = "";
for (Text text : fragments) {
hTitle += text;
}
sourceAsMap.put("title",hTitle);//将高亮字段替换原来的内容
}
list.add(sourceAsMap);
}
return list;
}
}

<p class="productTitle">
<a v-html="result.title"> </a>
</p>
重启服务,访问测试。http://localhost:8080/

完成!
网友评论