美文网首页
Elasticsearch京东搜索实例

Elasticsearch京东搜索实例

作者: 西界__ | 来源:发表于2020-12-09 10:37 被阅读0次

    环境搭建

    创建一个SpringBoot项目。

    配置

    image-20201118161046183
    <properties>
        <java.version>1.8</java.version>
        <!--自定一es版本依赖,保证和本地一致-->
        <elasticsearch.version>7.6.2</elasticsearch.version>
    </properties>
    
    <dependencies>
        <!--解析网页。只能解析网页-->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.10.2</version>
        </dependency>
        <!--fastJson-->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.62</version>
        </dependency>
        <!--Elasticsearch-->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
    
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-devtools</artifactId>
            <scope>runtime</scope>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-configuration-processor</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.junit.vintage</groupId>
                    <artifactId>junit-vintage-engine</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
    </dependencies>
    
    image-20201118161058938
    ##关闭thymeleaf缓存
    spring.thymeleaf.cache=false
    
    image-20201118161405426

    导入页面资料。这个在Elasticsearch概述中留有百度云链接

    controller

    image-20201118161556687
    @Controller
    public class IndexController {
    
        @RequestMapping({"/","/index"})
        public String index(){
            return "index";
        }
    }
    

    启动项目查看效果~ http:localhost:8080/

    image-20201118161733077

    Jsoup解析

    <!--解析网页。只能解析网页-->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.10.2</version>
    </dependency>
    
    image-20201118162431516 image-20201118162754511 image-20201118165224792

    创建utils包,创建HtmlParseUtil类

    通过对其网站的分析,就应该能看懂下面的代码了

    image-20201118164733249
    @Component//就可以使用@Autowired注入。不交给Spring管理就用new它
    public class HtmlParseUtil {
    
        public static void main(String[] args) throws IOException {
            //获取请求。需要联网
            String url = "https://search.jd.com/Search?keyword=java";
            //解析网页。Jsoup返回的Document对象就是浏览器的Document对象
            Document document = Jsoup.parse(new URL(url), 30000);
            //所有在js中Document能进行的操作都能在次操作
            Element element = document.getElementById("J_goodsList");
            System.out.println(element.html());//打印J_goodList标签下的的html源码
            //获取所有的li元素
            Elements elements = element.getElementsByTag("li");
            //获取元素中的所有内容
            for (Element e1 : elements) {
                //图片延迟加载
                String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img");
                String price = e1.getElementsByClass("p-price").eq(0).text();
                String title = e1.getElementsByClass("p-name").eq(0).text();
                System.out.println("========================================");
                System.out.println(img);
                System.out.println(price);
                System.out.println(title);
            }
        }
    }
    
    image-20201118164644237

    成功获取到相应信息。接着对其进行封装。首先创建一个pojo Content对象

    image-20201118164634402

    然后封装成一个parseJD方法。

    image-20201118165533861 image-20201118165602244
    @Component//就可以使用@Autowired注入。不交给Spring管理就用new它
    public class HtmlParseUtil {
    
        public static void main(String[] args) throws IOException {
            new HtmlParseUtil().parseJD("Vue").forEach(System.out::println);
        }
    
    
        public ArrayList<Content> parseJD(String keywords) throws IOException {
            //获取请求。需要联网
            String url = "https://search.jd.com/Search?keyword=" + keywords;
            System.out.println(url);
            //解析网页。Jsoup返回的Document对象就是浏览器的Document对象
            Document document = Jsoup.parse(new URL(url), 30000);
            //所有在js中Document能进行的操作都能在次操作
            Element element = document.getElementById("J_goodsList");
            //System.out.println(element.html());
            //获取所有的li元素
            Elements elements = element.getElementsByTag("li");
    
            ArrayList<Content> goodsList = new ArrayList<>();
            //获取元素中的所有内容
            for (Element e1 : elements) {
                String img = e1.getElementsByTag("img").eq(0).attr("data-lazy-img");
                String price = e1.getElementsByClass("p-price").eq(0).text();
                String title = e1.getElementsByClass("p-name").eq(0).text();
    
                Content content = new Content();
                content.setImg(img);
                content.setTitle(title);
                content.setPrice(price);
                goodsList.add(content);
            }
            return goodsList;
        }
    }
    

    业务编写

    首先照样配置Elastcisearch的配置类。

    image-20201118170012108
    @Configuration
    public class ElasticSearchClientConfig {
    
        @Bean
        public RestHighLevelClient restHighLevelClient() {
            RestHighLevelClient client = new RestHighLevelClient(
                    RestClient.builder(
                            new HttpHost("localhost", 9200, "http")));
            return client;
        }
    }
    

    编写service业务类

    image-20201118170455428
    @Service
    public class ContentService {
    
        @Autowired
        RestHighLevelClient restHighLevelClient;
    
        //1.把解析导的数据放到es索引中
        public Boolean parseContent(String keywords) throws IOException {
            ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
    
            //把查询导的数据放入es中
            BulkRequest bulkRequest = new BulkRequest();
            bulkRequest.timeout("2m");//过期时间为两分钟
    
            for (int i = 0 ;i < contents.size();i++){
                System.out.println(JSON.toJSONString(contents.get(i)));
                bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
            }
            BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
            return !bulk.hasFailures();//返回是否插入成功
        }
    }
    

    编写controoler

    image-20201118170914091
    @RestController
    public class ContentController {
    
        @Autowired
        ContentService contentService;
    
        @GetMapping("/parse/{keyword}")
        public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
            Boolean result = contentService.parseContent(keyword);
            return result;
        }
    }
    

    启动项目进行测试http://localhost:8080/parse/java

    image-20201118170930108 image-20201118171017200

    成功添加相关javas商品资料。

    接着我们继续编写service,添加分页搜索ES中的数据。

    image-20201118171855808
    @Service
    public class ContentService {
    
        @Autowired
        RestHighLevelClient restHighLevelClient;
    
        //1.把解析导的数据放到es索引中
        public Boolean parseContent(String keywords) throws IOException {
            ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
    
            //把查询导的数据放入es中
            BulkRequest bulkRequest = new BulkRequest();
            bulkRequest.timeout("2m");//过期时间为两分钟
    
            for (int i = 0 ;i < contents.size();i++){
                System.out.println(JSON.toJSONString(contents.get(i)));
                bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
            }
            BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
            return !bulk.hasFailures();//返回是否插入成功
        }
    
        //2.获取这些数据实现搜索功能
        public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
            if (pageNo<=1){
                pageNo = 1;
            }
    
            //条件搜索
            SearchRequest searchRequest = new SearchRequest("jd_goods");
            SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    
            //分页
            sourceBuilder.from(pageNo);//起始数据
            sourceBuilder.size(pageSize);//页面大小
    
            //精准匹配关键字
            TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
            sourceBuilder.query(termQueryBuilder);
            sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));//超时控制
    
            //执行搜索
            searchRequest.source(sourceBuilder);
            SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    
            ArrayList<Map<String,Object>> list = new ArrayList<>();
    
            SearchHit[] hits = searchResponse.getHits().getHits();//得到hits数组对象
            for (SearchHit documentFields : hits){
                list.add(documentFields.getSourceAsMap());//添加到list中
            }
            return list;
        }
    }
    

    接着在controller中添加一个请求

    image-20201118171942279
    @RestController
    public class ContentController {
    
        @Autowired
        ContentService contentService;
    
        @GetMapping("/parse/{keyword}")
        public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
            Boolean result = contentService.parseContent(keyword);
            return result;
        }
    
        @GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
        public List<Map<String,Object>> search(@PathVariable("keyword") String keyword,
                                               @PathVariable("pageNo") int pageNo,
                                               @PathVariable("pageSize") int pageSize) throws IOException {
    
            List<Map<String, Object>> list = contentService.searchPage(keyword, pageNo, pageSize);
            return list; 
        }
    }
    

    启动项目进行测试http://localhost:8080/search/java/1/20

    image-20201118172147380

    前端页面

    导入vue 和 axios,我这里使用的是在线版的

    <script src="https://cdn.staticfile.org/vue/2.6.2/vue.min.js"></script>

    <script src="https://unpkg.com/axios/dist/axios.min.js"></script>

    修改我们的index页面。

    CleanShot 2020-11-18 at 18.02.39

    启动项目查看效果。(我已经解析过了vue数据添加到了es中)

    CleanShot 2020-11-18 at 18.09.20

    高亮功能

    我们在业务类service中修改一下代码。

    image-20201118183919496

    将高亮中的字段替换添加到_source中的title

    image-20201118134906443
    @Service
    public class ContentService {
    
        @Autowired
        RestHighLevelClient restHighLevelClient;
    
        //1.把解析导的数据放到es索引中
        public Boolean parseContent(String keywords) throws IOException {
            ArrayList<Content> contents = new HtmlParseUtil().parseJD(keywords);
    
            //把查询导的数据放入es中
            BulkRequest bulkRequest = new BulkRequest();
            bulkRequest.timeout("2m");//过期时间为两分钟
    
            for (int i = 0 ;i < contents.size();i++){
                System.out.println(JSON.toJSONString(contents.get(i)));
                bulkRequest.add(new IndexRequest("jd_goods").source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
            }
            BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
            return !bulk.hasFailures();//返回是否插入成功
        }
    
        //2.获取这些数据实现搜索功能
        public List<Map<String,Object>> searchPage(String keyword,int pageNo,int pageSize) throws IOException {
            if (pageNo<=1){
                pageNo = 1;
            }
    
            //条件搜索
            SearchRequest searchRequest = new SearchRequest("jd_goods");
            SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    
            //分页
            sourceBuilder.from(pageNo);//起始数据
            sourceBuilder.size(pageSize);//页面大小
    
            //精准匹配关键字
            TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keyword);
            sourceBuilder.query(termQueryBuilder);
            sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));//超时控制
    
            //高亮
            HighlightBuilder highlightBuilder = new HighlightBuilder();
            highlightBuilder.field("title");//高亮字段
            highlightBuilder.requireFieldMatch(false);//关闭多个高亮。例如标题中有多个vue,只高亮一个
            highlightBuilder.preTags("<span style='color:red'>");//前置标签
            highlightBuilder.postTags("</span>");//后置标签
            sourceBuilder.highlighter(highlightBuilder);//加入高亮
    
    
            //执行搜索
            searchRequest.source(sourceBuilder);
            SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    
            ArrayList<Map<String,Object>> list = new ArrayList<>();
    
            SearchHit[] hits = searchResponse.getHits().getHits();//得到hits数组对象
            for (SearchHit hit : hits){
                Map<String, HighlightField> highlightFields = hit.getHighlightFields();
                Map<String, Object> sourceAsMap = hit.getSourceAsMap();//原来的结果
                HighlightField title = highlightFields.get("title");
                //解析高亮字段,将原来的字段替换成高亮字段
                if (title!=null){
                    Text[] fragments = title.fragments();
                    String hTitle = "";
                    for (Text text : fragments) {
                         hTitle += text;
                    }
                    sourceAsMap.put("title",hTitle);//将高亮字段替换原来的内容
                }
                list.add(sourceAsMap);
            }
            return list;
        }
    }
    
    image-20201118184209351
    <p class="productTitle">
        <a v-html="result.title">  </a>
    </p>
    

    重启服务,访问测试。http://localhost:8080/

    CleanShot 2020-11-18 at 18.44.34

    完成!

    相关文章

      网友评论

          本文标题:Elasticsearch京东搜索实例

          本文链接:https://www.haomeiwen.com/subject/qqthgktx.html