美文网首页
013.Elasticsearch聚合统计简单操作

013.Elasticsearch聚合统计简单操作

作者: CoderJed | 来源:发表于2020-06-29 16:56 被阅读0次

    1. 准备测试数据

    PUT /shop/product/1
    {
        "name": "Charcoal Toothpaste",
        "desc": "Travel-Friendly Daily Use Teeth Whitening Cleaning Activated Organic Charcoal Toothpaste",
        "price": 30,
        "producer": "Charcoal Producer",
        "tags": ["Whitening", "Refreshing"]
    }
    
    PUT /shop/product/2
    {
        "name": "Netural Toothpaste",
        "desc": "128g Rock salt whitening anti sensitive Toothpaste natural oral deeping cleaning",
        "price": 25,
        "producer": "Netural Producer",
        "tags": ["Whitening", "Refreshing", "Cleaning"]
    }
    
    PUT /shop/product/3
    {
        "name": "Bamboo Toothpaste",
        "desc": "Bamboo Charcoal Toothpaste Fluoride Free 4 oz",
        "price": 40,
        "producer": "Bamboo Producer",
        "tags": ["Anti-Cavity", "Sensitive"]
    }
    
    PUT /shop/_mapping/product
    {
      "properties": {
        "tags": {
          "type": "text",
          "fielddata": true
        }
      }
    }
    

    2. 基本的聚合统计分析API

    2.1 查询名称包含"Toothpaste"的商品并按照价格降序排序

    GET /shop/product/_search
    {
      "query": {
        "match": {
          "name": "Toothpaste"
        }
      },
      "sort": [
        {
          "price": {
            "order": "desc"
          }
        }
      ]
    }
    
    {
      "took" : 55,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 3,
        "max_score" : null,
        "hits" : [
          {
            "_index" : "shop",
            "_type" : "product",
            "_id" : "3",
            "_score" : null,
            "_source" : {
              "name" : "Bamboo Toothpaste",
              "desc" : "Bamboo Charcoal Toothpaste Fluoride Free 4 oz",
              "price" : 40,
              "producer" : "Bamboo Producer",
              "tags" : [
                "Anti-Cavity",
                "Sensitive"
              ]
            },
            "sort" : [
              40
            ]
          },
          {
            "_index" : "shop",
            "_type" : "product",
            "_id" : "1",
            "_score" : null,
            "_source" : {
              "name" : "Charcoal Toothpaste",
              "desc" : "Travel-Friendly Daily Use Teeth Whitening Cleaning Activated Organic Charcoal Toothpaste",
              "price" : 30,
              "producer" : "Charcoal Producer",
              "tags" : [
                "Whitening",
                "Refreshing"
              ]
            },
            "sort" : [
              30
            ]
          },
          {
            "_index" : "shop",
            "_type" : "product",
            "_id" : "2",
            "_score" : null,
            "_source" : {
              "name" : "Netural Toothpaste",
              "desc" : "128g Rock salt whitening anti sensitive Toothpaste natural oral deeping cleaning",
              "price" : 25,
              "producer" : "Netural Producer",
              "tags" : [
                "Whitening",
                "Refreshing",
                "Cleaning"
              ]
            },
            "sort" : [
              25
            ]
          }
        ]
      }
    }
    

    2.2 分页查询,每页显示1条数据,查询第2页,并且只查询名称和价格字段

    # 分页下标从0开始,document并非按照id排序
    GET /shop/product/_search
    {
      "query": {
        "match_all": {}
      },
      "from": 1,
      "size": 1,
      "_source": ["name", "price"]
    }
    
    {
      "took" : 3,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 3,
        "max_score" : 1.0,
        "hits" : [
          {
            "_index" : "shop",
            "_type" : "product",
            "_id" : "1",
            "_score" : 1.0,
            "_source" : {
              "price" : 30,
              "name" : "Charcoal Toothpaste"
            }
          }
        ]
      }
    }
    

    如果不加from和size,默认搜索前10条数据

    深度分页带来的性能问题

    考虑这样的场景:

    假设某个index中总共有6万条数据,有3个shard,每个shard上有2万条数据,3个shard分布再node01、node02、node03这3个节点,客户端请求分页查询,查询第1000页的10条数据,请求发送给node04这个节点,那么node04会把请求转发到shard所在的3个节点,由于请求是第1000页的10条数据,所以应该是第10001-10010条数据,注意,这里每个shard都会将自己的前10010条数据返回给node04节点,而不是每个shard将第10001-10010这10条数据返回给node04,所以node04会受到30030条数据,而不是30条,拿到这30030条数据后,再根据相关度、"_score"进行排序,最终取到满足要求的第1000页的10条数据。

    所以,深度分页是有性能问题的,这个过程是占用大量的网络带宽,协调节点(node04)的内存和CPU资源,所以应该尽量避免深度分页搜索。

    2.3 搜索商品名称包含"Toothpaste"并且售价大于30的商品

    GET /shop/product/_search
    {
      "query": {
        "bool": {
          "must": {
            "match": {
              "name": "Toothpaste"
            }
          },
          "filter": {
            "range": {
              "price": {
                "gt": 30
              }
            }
          }
        }
      }
    }
    
    {
      "took" : 15,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 1,
        "max_score" : 0.2876821,
        "hits" : [
          {
            "_index" : "shop",
            "_type" : "product",
            "_id" : "3",
            "_score" : 0.2876821,
            "_source" : {
              "name" : "Bamboo Toothpaste",
              "desc" : "Bamboo Charcoal Toothpaste Fluoride Free 4 oz",
              "price" : 40,
              "producer" : "Bamboo Producer",
              "tags" : [
                "Anti-Cavity",
                "Sensitive"
              ]
            }
          }
        ]
      }
    }
    

    2.4 高亮搜索

    GET /shop/product/_search
    {
      "query": {
        "match": {
          "name": "Bamboo"
        }
      },
      "highlight": {
        "fields": {
          "name": {}
        }
      }
    }
    
    {
      "took" : 51,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 1,
        "max_score" : 0.2876821,
        "hits" : [
          {
            "_index" : "shop",
            "_type" : "product",
            "_id" : "3",
            "_score" : 0.2876821,
            "_source" : {
              "name" : "Bamboo Toothpaste",
              "desc" : "Bamboo Charcoal Toothpaste Fluoride Free 4 oz",
              "price" : 40,
              "producer" : "Bamboo Producer",
              "tags" : [
                "Anti-Cavity",
                "Sensitive"
              ]
            },
            "highlight" : {
              "name" : [
                "<em>Bamboo</em> Toothpaste"
              ]
            }
          }
        ]
      }
    }
    
    # 自定义高亮标签
    GET /shop/product/_search
    {
      "query": {
        "match": {
          "name": "Bamboo"
        }
      },
      "highlight": {
        "fields": {
          "name": {
            "pre_tags": ["<h1>"],
            "post_tags": ["</h1>"]
          }
        }
      }
    }
    

    2.5 计算每个tag下的商品数量

    GET /shop/product/_search
    {
      "aggs": {
        "products_per_tag": {
          "terms": {
            "field": "tags"
          }
        }
      },
      "size": 0 # 这个代表只返回聚合结果而不返回每个Document记录
    }
    
    {
      "took" : 38,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 3,
        "max_score" : 0.0,
        "hits" : [ ]
      },
      "aggregations" : {
        "products_per_tag" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "refreshing",
              "doc_count" : 2
            },
            {
              "key" : "whitening",
              "doc_count" : 2
            },
            {
              "key" : "anti",
              "doc_count" : 1
            },
            {
              "key" : "cavity",
              "doc_count" : 1
            },
            {
              "key" : "cleaning",
              "doc_count" : 1
            },
            {
              "key" : "sensitive",
              "doc_count" : 1
            }
          ]
        }
      }
    }
    

    2.6 对名称中包含"Toothpaste"的商品,计算每个tag下的商品数量

    GET /shop/product/_search
    {
      "query": {
        "match": {
          "name": "Toothpaste"
        }
      }, 
      "aggs": {
        "products_per_tag": {
          "terms": {
            "field": "tags"
          }
        }
      },
      "size": 0
    }
    
    # 结果与上述结果是一样的
    

    2.7 查询每个tag的商品的平均价格

    GET /shop/product/_search
    {
      "aggs": {
        "group_by_tag": {
          "terms": {
            "field": "tags"
          },
          "aggs": {
            "avg_price_per_tag": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      },
      "size": 0
    }
    
    {
      "took" : 11,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 3,
        "max_score" : 0.0,
        "hits" : [ ]
      },
      "aggregations" : {
        "group_by_tag" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "refreshing",
              "doc_count" : 2,
              "avg_price_per_tag" : {
                "value" : 27.5
              }
            },
            {
              "key" : "whitening",
              "doc_count" : 2,
              "avg_price_per_tag" : {
                "value" : 27.5
              }
            },
            {
              "key" : "anti",
              "doc_count" : 1,
              "avg_price_per_tag" : {
                "value" : 40.0
              }
            },
            {
              "key" : "cavity",
              "doc_count" : 1,
              "avg_price_per_tag" : {
                "value" : 40.0
              }
            },
            {
              "key" : "cleaning",
              "doc_count" : 1,
              "avg_price_per_tag" : {
                "value" : 25.0
              }
            },
            {
              "key" : "sensitive",
              "doc_count" : 1,
              "avg_price_per_tag" : {
                "value" : 40.0
              }
            }
          ]
        }
      }
    }
    

    2.8 查询每个tag的商品的平均价格,并且按照平均价格降序排序

    GET /shop/product/_search
    {
      "aggs": {
        "group_by_tag": {
          "terms": {
            "field": "tags",
            "order": {
              "avg_price_per_tag": "desc"
            }
          },
          "aggs": {
            "avg_price_per_tag": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      },
      "size": 0
    }
    
    {
      "took" : 14,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 3,
        "max_score" : 0.0,
        "hits" : [ ]
      },
      "aggregations" : {
        "group_by_tag" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "anti",
              "doc_count" : 1,
              "avg_price_per_tag" : {
                "value" : 40.0
              }
            },
            {
              "key" : "cavity",
              "doc_count" : 1,
              "avg_price_per_tag" : {
                "value" : 40.0
              }
            },
            {
              "key" : "sensitive",
              "doc_count" : 1,
              "avg_price_per_tag" : {
                "value" : 40.0
              }
            },
            {
              "key" : "refreshing",
              "doc_count" : 2,
              "avg_price_per_tag" : {
                "value" : 27.5
              }
            },
            {
              "key" : "whitening",
              "doc_count" : 2,
              "avg_price_per_tag" : {
                "value" : 27.5
              }
            },
            {
              "key" : "cleaning",
              "doc_count" : 1,
              "avg_price_per_tag" : {
                "value" : 25.0
              }
            }
          ]
        }
      }
    }
    

    2.9 按照指定的价格范围进行分组,然后在每个分组内按照tag进行分组,然后统计每组的平均价格

    GET /shop/product/_search
    {
      "aggs": {
        "group_by_price_range": {
          "range": {
            "field": "price",
            "ranges": [
              {
                "from": 0,
                "to": 30
              },
              {
                "from": 30,
                "to": 60
              }
            ]
          },
          "aggs": {
            "group_by_tag": {
              "terms": {
                "field": "tags"
              },
              "aggs": {
                "avg_price": {
                  "avg": {
                    "field": "price"
                  }
                }
              }
            }
          }
        }
      },
      "size": 0
    }
    
    {
      "took" : 13,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 3,
        "max_score" : 0.0,
        "hits" : [ ]
      },
      "aggregations" : {
        "group_by_price_range" : {
          "buckets" : [
            {
              "key" : "0.0-30.0",
              "from" : 0.0,
              "to" : 30.0,
              "doc_count" : 1,
              "group_by_tag" : {
                "doc_count_error_upper_bound" : 0,
                "sum_other_doc_count" : 0,
                "buckets" : [
                  {
                    "key" : "cleaning",
                    "doc_count" : 1,
                    "avg_price" : {
                      "value" : 25.0
                    }
                  },
                  {
                    "key" : "refreshing",
                    "doc_count" : 1,
                    "avg_price" : {
                      "value" : 25.0
                    }
                  },
                  {
                    "key" : "whitening",
                    "doc_count" : 1,
                    "avg_price" : {
                      "value" : 25.0
                    }
                  }
                ]
              }
            },
            {
              "key" : "30.0-60.0",
              "from" : 30.0,
              "to" : 60.0,
              "doc_count" : 2,
              "group_by_tag" : {
                "doc_count_error_upper_bound" : 0,
                "sum_other_doc_count" : 0,
                "buckets" : [
                  {
                    "key" : "anti",
                    "doc_count" : 1,
                    "avg_price" : {
                      "value" : 40.0
                    }
                  },
                  {
                    "key" : "cavity",
                    "doc_count" : 1,
                    "avg_price" : {
                      "value" : 40.0
                    }
                  },
                  {
                    "key" : "refreshing",
                    "doc_count" : 1,
                    "avg_price" : {
                      "value" : 30.0
                    }
                  },
                  {
                    "key" : "sensitive",
                    "doc_count" : 1,
                    "avg_price" : {
                      "value" : 40.0
                    }
                  },
                  {
                    "key" : "whitening",
                    "doc_count" : 1,
                    "avg_price" : {
                      "value" : 30.0
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
    

    相关文章

      网友评论

          本文标题:013.Elasticsearch聚合统计简单操作

          本文链接:https://www.haomeiwen.com/subject/dsqafktx.html