美文网首页
Elasticsearch的故事

Elasticsearch的故事

作者: 木棍兒 | 来源:发表于2019-01-16 15:15 被阅读0次

    第一个故事:趋势

      小明每十秒钟向es存入当前网卡的信息。该如何统计该网卡某天每小时接受到字节的趋势图。
    注:(网卡信息中的接受字节数是一直累加的)

    {
        "query": {
            "bool": {
                "must": [{
                        "range": {
                            "@timestamp": {
                                "gt": "2019-01-13T00:00:00.000+08:00",
                                "lt": "2019-01-13T23:59:59.999+08:00"
                            }
                        }
                    }
                ]
            }
        },
        "size": 0,
        "aggs": {
            "groupByInterval": {
                "date_histogram": {
                    "field": "@timestamp",
                    "interval": "1h",
                    "format": "yyyy-MM-dd HH:mm:ss",
                    "time_zone": "+08:00",
                    "min_doc_count": 0
                },
                "aggs": {
                    "maxin": {
                        "max": {
                            "field": "system.network.in.bytes"
                        }
                    },
                    "in_deriv": {
                        "derivative": {
                            "buckets_path": "maxin",
                            "unit": "1s"
                        }
                    }
                }
            }
        }
    }
    

    上面的查询语句将返回:

    {
      "took" : 260,
      "timed_out" : false,
      "_shards" : {
        "total" : 1211,
        "successful" : 1211,
        "skipped" : 1205,
        "failed" : 0
      },
      "hits" : {
        "total" : 8640,
        "max_score" : 0.0,
        "hits" : [ ]
      },
      "aggregations" : {
        "groupByInterval" : {
          "buckets" : [
            {
              "key_as_string" : "2019-01-13 00:00:00",
              "key" : 1547308800000,
              "doc_count" : 360,
              "maxin" : {
                "value" : 1.5438929488E10
              }
            },
            ...
            ...
            {
              "key_as_string" : "2019-01-13 23:00:00",
              "key" : 1547391600000,
              "doc_count" : 360,
              "maxin" : {
                "value" : 1.5990460333E10
              },
              "in_deriv" : {
                "value" : 2883272.0,
                "normalized_value" : 800.9088888888889
              }
            }
          ]
        }
      }
    }
    

    知识点:
      derivative:用于histogram (or date_histogram)的子聚合。可以对histogram聚合中的指标类聚合进行求导。(简单来说就是每个时间段的值减去上一个时间段的值)其中“buckets_path”是描述需要求导的聚合名。因为“unit”设置为1s,所以返回结果中“normalized_value”是平均每秒的变化。

    第二个故事:听说你要每个的最后一条?

      小明每十秒钟向es存入当前cpu使用的百分比信息。现有10台主机,该如何获取每台主机最新的一条cpu使用信息。

    {
      "aggs": {
        "groupByHostName": {
          "terms": {
            "field": "host.name"
          },
          "aggs": {
            "lastOne": {
              "top_hits": {
                "size":1,
                "sort":[
                  {
                    "@timestamp":{
                        "order":"desc"
                    }
                  }
                ],
                "_source": {
                  "includes": [ "system.cpu.total.pct"]
                }
              }
            }
          }
        }
      },
      "query": {
        "bool": {
          "must": [
            {
              "term": {
                "metricset.name": "cpu"
              }
            },
            {
                "range": {
                    "@timestamp": {
                        "gt": "2019-01-13T00:00:00.000+08:00",
                        "lt": "2019-01-13T23:59:59.999+08:00"
                    }
                }
            }
          ]
        }
      },
      "size": 0
    }
    

    上面的查询语句将返回:

    {
      "took" : 3,
      "timed_out" : false,
      "_shards" : {
        "total" : 14,
        "successful" : 14,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 164136,
        "max_score" : 0.0,
        "hits" : [ ]
      },
      "aggregations" : {
        "groupByHostName" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 77736,
          "buckets" : [
            {
              "key" : "RedHat6.4-02",
              "doc_count" : 8640,
              "lastOne" : {
                "hits" : {
                  "total" : 8640,
                  "max_score" : null,
                  "hits" : [
                    {
                      "_index" : "metricbeat-6.5.1-2019.01.13",
                      "_type" : "doc",
                      "_id" : "c1zwR2gB7bWvjZhWp3RJ",
                      "_score" : null,
                      "_source" : {
                        "system" : {
                          "cpu" : {
                            "total" : {
                              "pct" : 0.0655
                            }
                          }
                        }
                      },
                      "sort" : [
                        1547395192576
                      ]
                    }
                  ]
                }
              }
            },
            ...
            ...
            {
              "key" : "docker185",
              "doc_count" : 8640,
              "lastOne" : {
                "hits" : {
                  "total" : 8640,
                  "max_score" : null,
                  "hits" : [
                    {
                      "_index" : "metricbeat-6.5.1-2019.01.13",
                      "_type" : "doc",
                      "_id" : "xVzwR2gB7bWvjZhWqHSz",
                      "_score" : null,
                      "_source" : {
                        "system" : {
                          "cpu" : {
                            "total" : {
                              "pct" : 0.0509
                            }
                          }
                        }
                      },
                      "sort" : [
                        1547395192917
                      ]
                    }
                  ]
                }
              }
            }
          ]
        }
      }
    }
    

    知识点:
      top_hits聚合实现了在相同的hostname组中取得最新一条上报的文档。其中“sort”指定了按照上传时间倒序,“size”指定了取每组的最后一条,而“_source”中的“includes”则指定了只获取“system.cpu.total.pct”的值,不关心该条文档的其他字段。

    第三个故事:一骑红尘妃子笑

      家住长安的小杨经常在网上购买岭南的荔枝。从岭南到长安的路上有许多个驿站,小杨的快递每经过一个驿站,该驿站的工作人员就会向es中记录一条包含快递单号和当前时间的信息。那么如何计算出每次从发货到收货的平均运输时间?

    {
        "size": 0,
        "aggs": {
            "groupById": {
                "terms": {
                    "field": "id"
                },
                "aggs": {
                    "maxCreateTime": {
                        "max": {
                            "field": "createTime"
                        }
                    },
                    "minCreateTime": {
                        "min": {
                            "field": "createTime"
                        }
                    },
                    "resultValue": {
                        "bucket_script": {
                            "buckets_path": {
                                "min": "minCreateTime",
                                "max": "maxCreateTime"
                            },
                            "script": {
                                "source": "params.max - params.min"
                            }
                        }
                    }
                }
            },
            "avgValue": {
                "avg_bucket": {
                    "buckets_path": "groupById>resultValue"
                }
            }
        }
    }
    

    上面的查询语句将返回:

    {
        "took": 5,
        "timed_out": false,
        "_shards": {
            "total": 1,
            "successful": 1,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": 4,
            "max_score": 0.0,
            "hits": []
        },
        "aggregations": {
            "groupById": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [{
                    "key": "1",
                    "doc_count": 2,
                    "minCreateTime": {
                        "value": 1.547366698E12,
                        "value_as_string": "2019-01-13 08:04:58"
                    },
                    "maxCreateTime": {
                        "value": 1.547539498E12,
                        "value_as_string": "2019-01-15 08:04:58"
                    },
                    "resultValue": {
                        "value": 1.728E8
                    }
                }, {
                    "key": "2",
                    "doc_count": 2,
                    "minCreateTime": {
                        "value": 1.547193898E12,
                        "value_as_string": "2019-01-11 08:04:58"
                    },
                    "maxCreateTime": {
                        "value": 1.547371938E12,
                        "value_as_string": "2019-01-13 09:32:18"
                    },
                    "resultValue": {
                        "value": 1.7804E8
                    }
                }]
            },
            "avgValue": {
                "value": 1.7542E8
            }
        }
    }
    

    知识点:
      bucket_script聚合它执行一个脚本,该脚本可以执行对每个桶的计算。其中buckets_path将minCreateTime和maxCreateTime的结果作为参数,参数名分别是min和max。script中的source则指定了具体的计算内容。
      外层的avg_bucket聚合将计算出所有桶的平均耗时,其中buckets_path指定了对groupById聚合的resultValue子聚合做取平均值计算。
      除avg_bucket外,es还提供了max_bucket,min_bucket,sum_bucket,stats_bucket,derivative等其他操作。

    持续更新中……

    相关文章

      网友评论

          本文标题:Elasticsearch的故事

          本文链接:https://www.haomeiwen.com/subject/tyiddqtx.html