美文网首页
ElasticSearch Java Api

ElasticSearch Java Api

作者: SHAN某人 | 来源:发表于2017-11-30 18:33 被阅读143次

    1.restful 风格 client

    restClient 走的是http 协议,9200端口
    restClient 使用与ElasticSearch版本无关,这是一个很大的优势

     public  static RestClient restClient;
        static {
            final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            restClient = RestClient.builder(new HttpHost("192.168.1.148",9200,"http"))
                    .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
                        @Override
                        public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
                            return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                        }
                    }).build();
        }
    

    2. java api client

    java api client 通信走的是 tcp 协议,9300端口

        private static int port = 9300;
        private static String cluster = "192.168.1.1";
        private static String index = "idx-comment";   // 推荐数据
        private static String type = "commen";
    
           static {
             // 2.0.0 版本连接方式
            String[] hosts = testhost.split(",");
            Settings settings = Settings.settingsBuilder().put("cluster.name", cluster).put("client.transport.sniff", false).build();    // 开启集群嗅探功能
            try {
                client = TransportClient.builder().settings(settings).build();
                for (String host : hosts) {
                    client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), port));
                }
            } catch (UnknownHostException e) {
                e.printStackTrace();
            }
          // 5.0.0 版本连接方式
      /*      Settings esSettings = Settings.builder()
                    .put("cluster.name", cluster) //设置ES实例的名称
                    .put("client.transport.sniff", true) //自动嗅探整个集群的状态,把集群中其他ES节点的ip添加到本地的客户端列表中
                    .build();
            try {
                client = new PreBuiltTransportClient(esSettings)
                        .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(testhost), 9300));
                System.out.println("ElasticsearchClient 连接成功");
            } catch (UnknownHostException e) {
                e.printStackTrace();
            }*/
        }
    

    3.拿到所有数据

    QueryBuilder qb = termQuery("multi", "test");
    
    SearchResponse scrollResp = client.prepareSearch(test)
            .addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC)
            .setScroll(new TimeValue(60000))
            .setQuery(qb)
            .setSize(100).get(); //max of 100 hits will be returned for each scroll
    //Scroll until no hits are returned
    do {
        for (SearchHit hit : scrollResp.getHits().getHits()) {
            //Handle the hit...
        }
    
        scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(60000)).execute().actionGet();
    } while(scrollResp.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop.
    

    4.新建索引

        public static void insertEs(List<User> userList) {
            if (CollectionUtils.isEmpty(userList))
                return;
            try {
                BulkRequestBuilder bulkRequest = client.prepareBulk();
                for (User duanzi : userList) {
                    if (null == duanzi)
                        continue;
                    String esJson = JSON.toJSONString(duanzi);
                    if ("".equals(esJson))
                        continue;
                    if (esJson != null) {
                        //写入结构,库,表,字段(index,type,info)
                        bulkRequest.add(client.prepareIndex(index, type, String.valueOf(duanzi.getId())).setSource(esJson));
                        System.out.println("bulk es [index]" + index + "[type]" + type + "message" + esJson);
                        //logger.info("bulk es [index]" + index + "[type]" + type + "message" + esJson);
                    }
                }
                bulkRequest.get();
            } catch (IndexNotFoundException e) {
               // logger.info("esIndex:" + index + "Not Found");
            }
        }
    

    5. moreLikeThisQuery

    moreLikeThisQuery能够比较好而且简单地实现基于文本内容的推荐,查询匹配文本的相关度经实测远远高于 matchQuery。

    https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-mlt-query.html
    moreLikeThisQuery的一些参数

    percent_terms_to_match:匹配项(term)的百分比,默认是0.3
    min_term_freq:一篇文档中一个词语至少出现次数,小于这个值的词将被忽略,默认是2
    max_query_terms:一条查询语句中允许最多查询词语的个数,默认是25
    stop_words:设置停止词,匹配时会忽略停止词
    min_doc_freq:一个词语最少在多少篇文档中出现,小于这个值的词会将被忽略,默认是无限制
    max_doc_freq:一个词语最多在多少篇文档中出现,大于这个值的词会将被忽略,默认是无限制
    min_word_len:最小的词语长度,默认是0
    max_word_len:最多的词语长度,默认无限制
    boost_terms:设置词语权重,默认是1
    boost:设置查询权重,默认是1

    public static void searchByMoreLikethis(DuanziEsModel request) {
            SearchRequestBuilder searchRequestBuilder = client.prepareSearch(index).setTypes(type);
            BoolQueryBuilder boolQ = QueryBuilders.boolQuery();
            String [] tagArray = request.getSourceNames().toArray(new String[request.getSourceNames().size()]);
    
            MoreLikeThisQueryBuilder  moreLikeThisQueryBuilder  =  QueryBuilders.moreLikeThisQuery("sourceNames", "tags.word")   // field
                    .like(tagArray)        //  tagArray为待匹配文本
                    .minTermFreq(1)         // 一篇文档中一个词语至少出现次数,少于这个数的会被忽略
                    .maxQueryTerms(12);     //一条查询语句中允许最多查询词语的个数
    
            boolQ.must(moreLikeThisQueryBuilder);
    
            SearchResponse searchResponse = searchRequestBuilder
                    .setQuery(moreLikeThisQueryBuilder)
                    .setSize(20)
                    .execute().actionGet();
    
            SearchHits hits = searchResponse.getHits();
            int  size = 0;
            for (SearchHit hit : hits) {
                if (hit.getScore() > 0.8) {
                    try {
                        String info = JSON.toJSONString(hit.getSource());
                    } catch (Exception ex) {
                    }
                }
            }
        }
    

    相关文章

      网友评论

          本文标题:ElasticSearch Java Api

          本文链接:https://www.haomeiwen.com/subject/hrpemxtx.html