1.restful 风格 client
restClient 走的是http 协议,9200端口
restClient 使用与ElasticSearch版本无关,这是一个很大的优势
public static RestClient restClient;
static {
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
restClient = RestClient.builder(new HttpHost("192.168.1.148",9200,"http"))
.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
@Override
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
}
}).build();
}
2. java api client
java api client 通信走的是 tcp 协议,9300端口
private static int port = 9300;
private static String cluster = "192.168.1.1";
private static String index = "idx-comment"; // 推荐数据
private static String type = "commen";
static {
// 2.0.0 版本连接方式
String[] hosts = testhost.split(",");
Settings settings = Settings.settingsBuilder().put("cluster.name", cluster).put("client.transport.sniff", false).build(); // 开启集群嗅探功能
try {
client = TransportClient.builder().settings(settings).build();
for (String host : hosts) {
client.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(host), port));
}
} catch (UnknownHostException e) {
e.printStackTrace();
}
// 5.0.0 版本连接方式
/* Settings esSettings = Settings.builder()
.put("cluster.name", cluster) //设置ES实例的名称
.put("client.transport.sniff", true) //自动嗅探整个集群的状态,把集群中其他ES节点的ip添加到本地的客户端列表中
.build();
try {
client = new PreBuiltTransportClient(esSettings)
.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(testhost), 9300));
System.out.println("ElasticsearchClient 连接成功");
} catch (UnknownHostException e) {
e.printStackTrace();
}*/
}
3.拿到所有数据
QueryBuilder qb = termQuery("multi", "test");
SearchResponse scrollResp = client.prepareSearch(test)
.addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC)
.setScroll(new TimeValue(60000))
.setQuery(qb)
.setSize(100).get(); //max of 100 hits will be returned for each scroll
//Scroll until no hits are returned
do {
for (SearchHit hit : scrollResp.getHits().getHits()) {
//Handle the hit...
}
scrollResp = client.prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(60000)).execute().actionGet();
} while(scrollResp.getHits().getHits().length != 0); // Zero hits mark the end of the scroll and the while loop.
4.新建索引
public static void insertEs(List<User> userList) {
if (CollectionUtils.isEmpty(userList))
return;
try {
BulkRequestBuilder bulkRequest = client.prepareBulk();
for (User duanzi : userList) {
if (null == duanzi)
continue;
String esJson = JSON.toJSONString(duanzi);
if ("".equals(esJson))
continue;
if (esJson != null) {
//写入结构,库,表,字段(index,type,info)
bulkRequest.add(client.prepareIndex(index, type, String.valueOf(duanzi.getId())).setSource(esJson));
System.out.println("bulk es [index]" + index + "[type]" + type + "message" + esJson);
//logger.info("bulk es [index]" + index + "[type]" + type + "message" + esJson);
}
}
bulkRequest.get();
} catch (IndexNotFoundException e) {
// logger.info("esIndex:" + index + "Not Found");
}
}
5. moreLikeThisQuery
moreLikeThisQuery能够比较好而且简单地实现基于文本内容的推荐,查询匹配文本的相关度经实测远远高于 matchQuery。
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-mlt-query.html
moreLikeThisQuery的一些参数
percent_terms_to_match:匹配项(term)的百分比,默认是0.3
min_term_freq:一篇文档中一个词语至少出现次数,小于这个值的词将被忽略,默认是2
max_query_terms:一条查询语句中允许最多查询词语的个数,默认是25
stop_words:设置停止词,匹配时会忽略停止词
min_doc_freq:一个词语最少在多少篇文档中出现,小于这个值的词会将被忽略,默认是无限制
max_doc_freq:一个词语最多在多少篇文档中出现,大于这个值的词会将被忽略,默认是无限制
min_word_len:最小的词语长度,默认是0
max_word_len:最多的词语长度,默认无限制
boost_terms:设置词语权重,默认是1
boost:设置查询权重,默认是1
public static void searchByMoreLikethis(DuanziEsModel request) {
SearchRequestBuilder searchRequestBuilder = client.prepareSearch(index).setTypes(type);
BoolQueryBuilder boolQ = QueryBuilders.boolQuery();
String [] tagArray = request.getSourceNames().toArray(new String[request.getSourceNames().size()]);
MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = QueryBuilders.moreLikeThisQuery("sourceNames", "tags.word") // field
.like(tagArray) // tagArray为待匹配文本
.minTermFreq(1) // 一篇文档中一个词语至少出现次数,少于这个数的会被忽略
.maxQueryTerms(12); //一条查询语句中允许最多查询词语的个数
boolQ.must(moreLikeThisQueryBuilder);
SearchResponse searchResponse = searchRequestBuilder
.setQuery(moreLikeThisQueryBuilder)
.setSize(20)
.execute().actionGet();
SearchHits hits = searchResponse.getHits();
int size = 0;
for (SearchHit hit : hits) {
if (hit.getScore() > 0.8) {
try {
String info = JSON.toJSONString(hit.getSource());
} catch (Exception ex) {
}
}
}
}
网友评论