- elasticsearch之十九springboot测试高级搜索
- Springboot实战之spring-boot-starter
- springboot整合es出现elasticsearch he
- 使用SpringBoot+elasticsearch6.x搭建的
- Spring Boot整合Elasticsearch全文搜索引擎
- springboot集成elasticsearch地理位置
- SpringBoot整合Elasticsearch报错avail
- spring boot + elasticsearch整合
- SpringBoot1.5.x集成Elasticsearch
- Spring Boot整合Elasticsearch全文搜索引擎
个人专题目录](https://www.jianshu.com/p/140e2a59db2c)
1. elasticsearch高级搜索聚合
聚合分析是数据库中重要的功能特性,完成对一个查询的数据集中数据的聚合计算,如:找出某字段(或计算表达式的结果)的最大值、最小值,计算和、平均值等。ES作为搜索引擎兼数据库,同样提供了强大的聚合分析能力。
1.1 cardinality去重计数
其作用是对选择字段先执行类似sql中的distinct操作,去掉集合中的重复项,然后统计排重后的集合长度。
总共有多少不同的值 相当于SQL中的 select count(distinct clusterId) from table
POST /book-index/_search
{
"from": 0,
"size": 100,
"aggregations": {
"agg": {
"cardinality": {
"field": "categoryName"
}
}
}
}
@Override
public void cardinalityAggregations(String indexName, String field) throws Exception {
CardinalityAggregationBuilder aggregationBuilder = AggregationBuilders.cardinality("agg").field(field);
baseQuery.builder(indexName, null, null, aggregationBuilder);
}
@Test
public void testCardinalityAggregations() throws Exception {
aggregationQuery.cardinalityAggregations(Constants.INDEX_NAME, "categoryName");
aggregationQuery.cardinalityAggregations(Constants.INDEX_NAME, "brandName");
}
1.2 range统计
range统计能够获取得到一个属于指定范围集的文档的个数。除些之外,还能够获取指定字段的聚合数据。例如,我们可以某个数值字段中小于100,100200,200300三外范围内的文档个数,还可以用在日期,IP地址范围统计 。
统计2011以前,2011~2019,2019及以后的文档数:
POST /book-index/_search
{
"from": 0,
"size": 100,
"aggregations": {
"agg": {
"date_range": {
"field": "createTime",
"format": "yyyy",
"ranges": [
{
"to": "2011"
},
{
"from": "2011",
"to": "2019"
},
{
"from": "2019"
}
],
"keyed": false
}
}
}
}
@Override
public void dateRangeAggregation(String indexName, String field) throws Exception {
AggregationBuilder agg1 = AggregationBuilders.dateRange("agg").field(field).format("yyyy").
addUnboundedTo("2011").
addRange("2011", "2019")
.addUnboundedFrom("2019");
baseQuery.builder(indexName, null, null, agg1);
}
@Test
public void testDateRangeAggregation() throws Exception {
aggregationQuery.dateRangeAggregation(Constants.INDEX_NAME, "createTime");
}
1.3 histogram 统计
histogram 统计能够对字段取值按间隔统计建立直方图(针对数值型和日期型字段)。
比如我们以5为间隔,统计不同区间的,现在想每隔5就创建一个桶,统计每隔区间都有多少个文档:
POST /book-index/_search
{
"from": 0,
"size": 100,
"aggregations": {
"agg": {
"histogram": {
"field": "price",
"interval": 1000,
"offset": 0,
"order": {
"_key": "asc"
},
"keyed": false,
"min_doc_count": 0
}
}
}
}
/**
* histogram 统计能够对字段取值按间隔统计建立直方图
*
* @param indexName 索引名称
* @param field 字段名称
* @param interval 间段值
* @throws Exception
*/
@Override
public void histogramAggregation(String indexName, String field, int interval) throws Exception {
AggregationBuilder agg1 = AggregationBuilders.histogram("agg").field(field).interval(interval);
baseQuery.builder(indexName, null, null, agg1);
}
@Test
public void testHistogramAggregation() throws Exception {
aggregationQuery.histogramAggregation(Constants.INDEX_NAME, "price", 1000);
}
1.4 date_histogram统计
histogram 除了对数值统计外,还提供了date_histogram统计类型,可以应用于日期字段类型。date_histogram允许我们使用year,month,week,day,hour或minute等常量作为interval属性的取值。
支持的日期格式:
public static final DateHistogramInterval SECOND = new DateHistogramInterval("1s");
public static final DateHistogramInterval MINUTE = new DateHistogramInterval("1m");
public static final DateHistogramInterval HOUR = new DateHistogramInterval("1h");
public static final DateHistogramInterval DAY = new DateHistogramInterval("1d");
public static final DateHistogramInterval WEEK = new DateHistogramInterval("1w");
public static final DateHistogramInterval MONTH = new DateHistogramInterval("1M");
public static final DateHistogramInterval QUARTER = new DateHistogramInterval("1q");
public static final DateHistogramInterval YEAR = new DateHistogramInterval("1y");
例如创建时间以天为单位来统计文档数量:
POST /book-index/_search
{
"from": 0,
"size": 100,
"aggregations": {
"agg": {
"date_histogram": {
"field": "createTime",
"calendar_interval": "1d",
"offset": 0,
"order": {
"_key": "asc"
},
"keyed": false,
"min_doc_count": 0
}
}
}
}
/**
* histogram 统计能够对字段取值按间隔统计建立直方图
*
* @param indexName 索引名称
* @param field 字段名称
* @param interval 间段值
* @throws Exception
*/
@Override
public void histogramDateAggregation(String indexName, String field, int interval) throws Exception {
AggregationBuilder agg1 = AggregationBuilders.dateHistogram("agg").field(field)
.calendarInterval(DateHistogramInterval.DAY)
.calendarInterval(DateHistogramInterval.days(interval));
baseQuery.builder(indexName, null, null, agg1);
}
@Test
public void testHistogramDateAggregation() throws Exception {
aggregationQuery.histogramDateAggregation(Constants.INDEX_NAME, "createTime", 1);
}
1.5 extended_stats统计聚合
extended_stats统计使得我们可以对一个数值型字段计算统计信息。我们能够得到个数、总和、平方和、均值、最小值、最大值、方差及标准差。
POST /book-index/_search
{
"from": 0,
"size": 100,
"aggregations": {
"agg": {
"extended_stats": {
"field": "price",
"sigma": 2
}
}
}
}
@Override
public void extendedStatsAggregation(String indexName, String field) throws Exception {
ExtendedStatsAggregationBuilder agg1 = AggregationBuilders.extendedStats("agg").field(field);
baseQuery.builder(indexName, null, null, agg1);
}
@Test
public void testExtendedStatsAggregation() throws Exception {
aggregationQuery.extendedStatsAggregation(Constants.INDEX_NAME, "price");
}
1.6 terms_stats统计
terms_stats统计提供了在一个字段上基于另一个字段获得的取值进行统计的能力。
例如对fee字段进行平均值统计,同时希望根据省份字段对统计值进行划分。
PPOST /book-index/_search
{
"from": 0,
"size": 100,
"query": {
"range": {
"createTime": {
"from": "2015-03-08 00:00:00",
"to": "2020-03-08 00:00:00",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
},
"aggregations": {
"brandName": {
"terms": {
"field": "brandName",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"avg_request_stock": {
"avg": {
"field": "stock"
}
}
}
}
}
}
@Override
public void termsAggregation(String indexName, String startTime, String endTime) throws Exception {
RangeQueryBuilder queryBuilder = QueryBuilders.rangeQuery("createTime").from(startTime).to(endTime);
//text类型不能用于索引或排序,必须转成keyword类型
TermsAggregationBuilder aggregation = AggregationBuilders.terms("brandName").field("brandName");
//avg_age 为子聚合名称,名称可随意
aggregation.subAggregation(AggregationBuilders.avg("avg_request_stock").field("stock"));
baseQuery.builder(indexName, queryBuilder, null, aggregation);
}
@Test
public void testTermsAggregation() throws Exception {
aggregationQuery.termsAggregation(Constants.INDEX_NAME, "2015-03-08 00:00:00", "2020-03-08 00:00:00");
}
1.7 geo_distance统计
全用该类型可以获得给定位置某个距离范围内的文档个数。
比如:利用第七节中的数据,统计离厦门(0100公里,100500公里,500~5000公里)索引中的文档数量。
POST /map/cp/_search
{
"aggregations": {
"agg": {
"geo_distance": {
"field": "location",
"origin": {
"lat": 40.1225,
"lon": 116.2577
},
"ranges": [{
"key": "*-100.0",
"from": 0.0,
"to": 100.0
}, {
"key": "100.0-500.0",
"from": 100.0,
"to": 500.0
}, {
"key": "500.0-5000.0",
"from": 500.0,
"to": 5000.0
}],
"keyed": false,
"unit": "km",
"distance_type": "ARC"
}
}
}
}
@Override
public void geoDistanceAggregation(String indexName) throws Exception {
GeoDistanceAggregationBuilder geoDistanceAggregationBuilder = AggregationBuilders.geoDistance("agg", new GeoPoint(40.1225, 116.2577))
.field("location")
.unit(DistanceUnit.KILOMETERS)
.addUnboundedTo(100)
.addRange(100, 500)
.addRange(500, 5000);
baseQuery.builder(indexName, null, null, geoDistanceAggregationBuilder);
}
@Test
public void testGeoDistanceAggregation() throws Exception {
aggregationQuery.geoDistanceAggregation("cn_large_cities");
}
网友评论