美文网首页
ElasticSearch Java Api

ElasticSearch Java Api

作者: SHAN某人 | 来源:发表于2017-11-30 18:33 被阅读143次

1.restful 风格 client

restClient 走的是http 协议,9200端口
restClient 使用与ElasticSearch版本无关,这是一个很大的优势

 public  static RestClient restClient;
    static {
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        restClient = RestClient.builder(new HttpHost("192.168.1.148",9200,"http"))
                .setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
                    @Override
                    public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
                        return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
                    }
                }).build();
    }

2. java api client

java api client 通信走的是 tcp 协议,9300端口

    private static int port = 9300;
    private static String cluster = "192.168.1.1";
    private static String index = "idx-comment";   // 推荐数据
    private static String type = "commen";

       static {
         // 2.0.0 版本连接方式
        String[] hosts = testhost.split(",");
        Settings settings = Settings.settingsBuilder().put("cluster.name", cluster).put("client.transport.sniff", false).build();    // 开启集群嗅探功能
        try {
            client = TransportClient.builder().settings(settings).build();
            for (String host : hosts) {
                client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), port));
            }
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }
      // 5.0.0 版本连接方式
  /*      Settings esSettings = Settings.builder()
                .put("cluster.name", cluster) //设置ES实例的名称
                .put("client.transport.sniff", true) //自动嗅探整个集群的状态,把集群中其他ES节点的ip添加到本地的客户端列表中
                .build();
        try {
            client = new PreBuiltTransportClient(esSettings)
                    .addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(testhost), 9300));
            System.out.println("ElasticsearchClient 连接成功");
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }*/
    }

3.拿到所有数据

QueryBuilder qb = termQuery("multi", "test");

SearchResponse scrollResp = client.prepareSearch(test)
        .addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC)
        .setScroll(new TimeValue(60000))
        .setQuery(qb)
        .setSize(100).get(); //max of 100 hits will be returned for each scroll
//Scroll until no hits are returned
do {
    for (SearchHit hit : scrollResp.getHits().getHits()) {
        //Handle the hit...
    }

    scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(60000)).execute().actionGet();
} while(scrollResp.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop.

4.新建索引

    public static void insertEs(List<User> userList) {
        if (CollectionUtils.isEmpty(userList))
            return;
        try {
            BulkRequestBuilder bulkRequest = client.prepareBulk();
            for (User duanzi : userList) {
                if (null == duanzi)
                    continue;
                String esJson = JSON.toJSONString(duanzi);
                if ("".equals(esJson))
                    continue;
                if (esJson != null) {
                    //写入结构,库,表,字段(index,type,info)
                    bulkRequest.add(client.prepareIndex(index, type, String.valueOf(duanzi.getId())).setSource(esJson));
                    System.out.println("bulk es [index]" + index + "[type]" + type + "message" + esJson);
                    //logger.info("bulk es [index]" + index + "[type]" + type + "message" + esJson);
                }
            }
            bulkRequest.get();
        } catch (IndexNotFoundException e) {
           // logger.info("esIndex:" + index + "Not Found");
        }
    }

5. moreLikeThisQuery

moreLikeThisQuery能够比较好而且简单地实现基于文本内容的推荐,查询匹配文本的相关度经实测远远高于 matchQuery。

https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-mlt-query.html
moreLikeThisQuery的一些参数

percent_terms_to_match:匹配项(term)的百分比,默认是0.3
min_term_freq:一篇文档中一个词语至少出现次数,小于这个值的词将被忽略,默认是2
max_query_terms:一条查询语句中允许最多查询词语的个数,默认是25
stop_words:设置停止词,匹配时会忽略停止词
min_doc_freq:一个词语最少在多少篇文档中出现,小于这个值的词会将被忽略,默认是无限制
max_doc_freq:一个词语最多在多少篇文档中出现,大于这个值的词会将被忽略,默认是无限制
min_word_len:最小的词语长度,默认是0
max_word_len:最多的词语长度,默认无限制
boost_terms:设置词语权重,默认是1
boost:设置查询权重,默认是1

public static void searchByMoreLikethis(DuanziEsModel request) {
        SearchRequestBuilder searchRequestBuilder = client.prepareSearch(index).setTypes(type);
        BoolQueryBuilder boolQ = QueryBuilders.boolQuery();
        String [] tagArray = request.getSourceNames().toArray(new String[request.getSourceNames().size()]);

        MoreLikeThisQueryBuilder  moreLikeThisQueryBuilder  =  QueryBuilders.moreLikeThisQuery("sourceNames", "tags.word")   // field
                .like(tagArray)        //  tagArray为待匹配文本
                .minTermFreq(1)         // 一篇文档中一个词语至少出现次数,少于这个数的会被忽略
                .maxQueryTerms(12);     //一条查询语句中允许最多查询词语的个数

        boolQ.must(moreLikeThisQueryBuilder);

        SearchResponse searchResponse = searchRequestBuilder
                .setQuery(moreLikeThisQueryBuilder)
                .setSize(20)
                .execute().actionGet();

        SearchHits hits = searchResponse.getHits();
        int  size = 0;
        for (SearchHit hit : hits) {
            if (hit.getScore() > 0.8) {
                try {
                    String info = JSON.toJSONString(hit.getSource());
                } catch (Exception ex) {
                }
            }
        }
    }

相关文章

网友评论

      本文标题:ElasticSearch Java Api

      本文链接:https://www.haomeiwen.com/subject/hrpemxtx.html