美文网首页elasticsearchBig data
ElasticSearch(五):Aggregation

ElasticSearch(五):Aggregation

作者: 采风JS | 来源:发表于2018-11-21 20:02 被阅读141次

    一 Metric

    单值分析,只输出一个分析结果,包括min/max/avg/sum/cardinality;

    • min/max/avg/sum
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "min_age": {
          "min": {
            "field": "age" ##age最小值
          }
        },
        "max_age": {
          "max": {
            "field": "age" ##age最大值
          }
        },
        "avg_age": {
          "avg": {
            "field": "age" ##age平均值
          }
        },
        "sum_age": {
          "sum": {
            "field": "age" ##age之和
          }
        }
      }
    }
    
    • cardinality
      集合的势,或者基数,指不同数值的个数,类似于sql中的distinct count的概念;
    GET test_search_index/_search
    {
      "size":0,
      "aggs":{
        "count_of_job":{
          "cardinality": {
            "field": "job.keyword" ##返回不同工作的个数
          }
        }
      }
    }
    

    多值分析,输出多个分析结果,stats/extended stats/percentile/percentile rank/top hits

    • stats/extended stats
    GET test_search_index/_search
    {
      "size":0,
      "aggs":{
        "stats_age":{
          "stats": {
            "field": "age"
          }
        }
      }
    }
    //更多统计数据
    GET test_search_index/_search
    {
      "size":0,
      "aggs":{
        "exstats_salary":{
          "extended_stats": {
            "field": "salary"
          }
        }
      }
    }
    
    • percentile/percentile rank
      百分位数统计/百分位数排名
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "per_age": {
          "percentiles": {
            "field": "salary",
            "percents": [
              95,
              99,
              99.9
            ]
          }
        }
      }
    }
    //百分位数排名
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "per_salary": {
          "percentile_ranks": {
            "field": "salary",
            "values": [
              11000,
              30000
            ]
          }
        }
      }
    }
    
    • top hits
      一般用于分桶后获取该桶内最匹配的顶部文档列表,即详情数据;
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "size": 10
          },
          "aggs": {
            "top_employee": {
              "top_hits": {
                "size": 10,
                "sort": [
                  {
                    "age": {
                      "order": "desc"
                    }
                  }
                ]
              }
            }
          }
        }
      }
    }
    

    二 Bucket

    • terms
      直接按照term分桶,text类型,按照分词后的结果进行分桶;
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job",
            "size": 5
          }
        }
      }
    }
    
    • range
      通过指定数值的范围来设定分桶规则;
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "salary_range": {
          "range": {
            "field": "salary",
            "ranges": [
              {
                "key":"<10000",
                "to": 10000
              },
              {
                "from": 10000,
                "to": 20000
              },
              {
                "key":">20000",
                "from": 20000
              }
            ]
          }
        }
      }
    }
    
    • date range
      通过指定日期的范围来进行分桶;
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "date_range": {
          "range": {
            "field": "birth",
            "format": "yyyy",
            "ranges": [
              {
                "from":"1980",
                "to": "1990"
              },
              {
                "from": "1990",
                "to": "2000"
              },
              {
                "from": "2000"
              }
            ]
          }
        }
      }
    }
    
    • historgram
      直方图,以固定间隔的策略来分割数据;
    GET test_search_index/_search
    {
      "size":0,
      "aggs":{
        "salary_hist":{
          "histogram": {
            "field": "salary",
            "interval": 5000,
            "extended_bounds": {
              "min": 0,
              "max": 40000
            }
          }
        }
      }
    }
    
    • date historgram
      针对日期的直方图或者柱状图;
    GET test_search_index/_search
    {
      "size":0,
      "aggs":{
        "by_year":{
          "date_histogram": {
            "field": "birth",
            "interval": "year",
            "format":"yyyy"
          }
        }
      }
    }
    

    三 Bucket+Matric

    Bucket聚合分析允许通过添加子分析来进一步进行分析,子分析可以是Bucket,也可以时Metric;

    • bucket+bucket
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "size": 10
          },
          "aggs": {
            "age_range": {
              "range": {
                "field": "age",
                "ranges": [
                  {
                    "to": 20
                  },
                  {
                    "from": 20,
                    "to": 30
                  },
                  {
                    "from": 30
                  }
                ]
              }
            }
          }
        }
      }
    }
    
    • bucket+metric
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "size": 10
          },
          "aggs": {
            "salary": {
              "stats": {
                "field": "salary"
              }
            }
          }
        }
      }
    }
    

    四 Pipeline

    针对聚合分析的结果再次进行聚合分析,支持链式调用,且分析结果会输出原结果中,输出结果与现有聚合分析结果同级,称为Sibling;

    • Max/Min/Avg/Sum Bucket
    GET test_search_index/_search
    {
      "size":0,
      "aggs":{
        "jobs":{
          "terms": {
            "field": "job.keyword",
            "size": 10
          },
          "aggs":{
            "avg_salary":{
              "avg": {
                "field": "salary"
              }
            }
          }
        },
        "sum_salary_by_job":{
          "sum_bucket": {
            "buckets_path": "jobs>avg_salary"
          }
        }
      }
    } 
    
    • Stats/Extended Stats Bucket
    GET test_search_index/_search
    {
      "size":0,
      "aggs":{
        "jobs":{
          "terms": {
            "field": "job.keyword",
            "size": 10
          },
          "aggs":{
            "avg_salary":{
              "avg": {
                "field": "salary"
              }
            }
          }
        },
        "stats_salary_by_job":{
          "stats_bucket": {
            "buckets_path": "jobs>avg_salary"
          }
        }
      }
    } 
    
    • Percentiles Buckets
    GET test_search_index/_search
    {
      "size":0,
      "aggs":{
        "jobs":{
          "terms": {
            "field": "job.keyword",
            "size": 10
          },
          "aggs":{
            "avg_salary":{
              "avg": {
                "field": "salary"
              }
            }
          }
        },
        "percentiles_salary_by_job":{
          "percentiles_bucket": {
            "buckets_path": "jobs>avg_salary"
          }
        }
      }
    } 
    

    输出结果内嵌到现有聚合分析结果中,称为parent;

    • Deritave
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "birth": {
          "date_histogram": {
            "field": "birth",
            "interval": "year",
            "min_doc_count": 0
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            },
            "derivative_avg_salary": {
              "derivative": {
                "buckets_path": "avg_salary"
              }
            }
          }
        }
      }
    }
    
    • Moving Average
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "birth": {
          "date_histogram": {
            "field": "birth",
            "interval": "year",
            "min_doc_count": 0
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            },
            "mavg_salary": {
              "moving_avg": {
                "buckets_path": "avg_salary"
              }
            }
          }
        }
      }
    }
    
    • Cumulative Sum
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "birth": {
          "date_histogram": {
            "field": "birth",
            "interval": "year",
            "min_doc_count": 0
          },
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            },
            "cumulative_salary": {
              "cumulative_sum": {
                "buckets_path": "avg_salary"
              }
            }
          }
        }
      }
    }
    

    五 Scope

    Es聚合分析默认作用范围时query结果集,可以通过filter/post_filter/global改变其作用范围;

    • filter
      不改变整体query语句的情况下,为某个聚合分析设定过滤条件,从而修改了作用范围;
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "jobs_salary_small": {
          "filter": {
            "range": {
              "salary": {
                "to": 10000
              }
            }
          },
          "aggs": {
            "jobs": {
              "terms": {
                "field": "job.keyword"
              }
            }
          }
        },
        "jobs": { ##jobs与jobs_salary_small同级
          "terms": {
            "field": "job.keyword"
          }
        }
      }
    }
    
    • post-filter
      在聚合分析后,作用于文档过滤;
    GET test_search_index/_search
    {
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword"
          }
        }
      },
      "post_filter": {
        "match":{
          "job.keyword":"java engineer"
        }
      }
    }
    
    • global
      无视query过滤条件,基于全部文档进行分析;
    GET test_search_index/_search
    {
      "query": {
        "match": {
          "job.keyword": "java engineer"
        }
      },
      "aggs": {
        "java_avg_salary": {
          "avg": {
            "field": "salary"
          }
        },
        "all": {
          "global": {},
          "aggs": {
            "avg_salary": {
              "avg": {
                "field": "salary"
              }
            }
          }
        }
      }
    }
    

    六 Sort

    • .与>的区别
    ##当为json对象时使用>,当为基本数值统计时用.
    ##以薪水和降序排序
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "jobs": {
          "terms": {
            "field": "job.keyword",
            "size": 10,
            "order": [
              {
                "stats_salary.sum": "desc"
              }
            ]
          },
          "aggs": {
            "stats_salary": {
              "stats": {
                "field": "salary"
              }
            }
          }
        }
      }
    }
    ##以5000间隔分桶,分桶的排序依赖于每个桶内大于10岁的平均年龄决定
    GET test_search_index/_search
    {
      "size": 0,
      "aggs": {
        "salary_hist": {
          "histogram": {
            "field": "salary",
            "interval": 5000,
            "order": {
              "age>avg_age": "desc"
            }
          },
          "aggs": {
            "age": {
              "filter": {
                "range": {
                  "age": {
                    "gte": 10
                  }
                }
              },
              "aggs": {
                "avg_age": {
                  "avg": {
                    "field": "age"
                  }
                }
              }
            }
          }
        }
      }
    }
    

    相关文章

      网友评论

        本文标题:ElasticSearch(五):Aggregation

        本文链接:https://www.haomeiwen.com/subject/jhfhqqtx.html