ES相关

作者: taobao | 来源:发表于2021-09-10 18:13 被阅读0次

Elasticsearch基本命令

-X GET一种请求方法
-d 标识以post形式传入参数，写在请求正文里面
?pretty=true 以格式的形式显示结果
-H "Content-Type:application/json" POST和PUT提交数据格式

curl -X GET http://localhost:9200/_cluster/health?pretty --查询elasticsearch的健康信息
curl -X GET http://localhost:9200/ --查询实例的相关信息

索引相关
curl -X GET http://localhost:9200/_cat/indices?v 索引列表
curl -X PUT http://localhost:9200/lvxh  创建索引
curl -X DELETE http://localhost:9200/lvxh  删除索引
修改lvxh索引的配置
curl -X PUT -H "Content-Type:application/json" http://localhost:9200/lvxh/_settings -d '{"number_of_replicas":1}' 
创建索引并带上配置
curl -X PUT -H "Content-Type:application/json" http://localhost:9200/lvxh -d '{"settings":{"number_of_replicas":1,"number_of_shards":1}}'  

文档相关：
插入/全量更新
// 不带主键
curl -X POST -H "Content-Type:application/json" http://localhost:9200/test/_doc -d '{"name":"张三","age":19,"性别":"男"}'
// 带主键
curl -X POST -H "Content-Type:application/json" http://localhost:9200/test/_doc/1 -d '{"name":"张三","age":19,"性别":"男"}'
// 强制新增，如果存在会报错
curl -X POST -H "Content-Type:application/json" http://localhost:9200/test/_doc/2/_create -d '{"name":"张三","age":19,"性别":"男"}'
// 部分字段更新
curl -X POST -H "Content-Type:application/json" http://localhost:9200/test/_doc/2/_update -d '{"doc":{"age":20}}'
// 版本7以上新增的更新方式
curl -X POST -H "Content-Type:application/json" http://localhost:9200/test/_update/2/ -d '{"doc":{"age":20}}'

删除
curl -X DELETE http://localhost:9200/test/_doc/1

查询
// 根据主键查询
curl -X GET http://localhost:9200/test/_doc/1
 // 查询所有
curl -X GET http://localhost:9200/test/_search
// 全文检索
curl -X GET http://localhost:9200/test/_search?q=张三
// 查询所有
curl -X GET http://localhost:9200/test/_search -d '{"query":{"match_all":{}}}'
#查询所有
get /test/_search

#全文检索
get /test/_search?q=xxx

#DSL(Domain Specified Language)语句
#查询所有
get /test/_search
{
  "query":{
    "match_all": {}
  }
}

#按条件查询，并排序
get /test/_search
{
  "query":{
    "match": { 
         "sex":"女"
    }
  },
  "sort":[
      {"age":"desc"}
  ]
}

#分页查询
get test/_search
{
  "query":{
    "match_all": {}
  },
  "from":1, #从第几条数据开始查询，从0开始计数
  "size":2, #查询多少数据
  "sort":[
    {"age":"desc"}
  ]
}

#查询部分字段 - 此搜索操作适合构建复杂查询条件，生产环境常用。
get /test/_search
{
  "query":{
    "match_all": {
    }
  },
  "_source":["name"]
}

# 多条件查询
GET /test/_search
{
  "query": {
    "bool": {     # 多条件搜索，内部的若干条件，只要有正确结果，即可
      "must": [ # 必须，内部若干条件，必须都匹配才有结果（类似mysql中的and关键字, 与之对应的是should, 类似mysql中的or关键字）
        {
          "match": {  
            "name": "张三"
          }
        },
        {
          "match": {
            "sex": "女"
          }
        }
      ]
    }
  }
}

#全文检索 -  full-text search
#要求查询条件拆分后的任意词条与具体数据匹配就算搜索结果。查询结果顺序默认与匹配度分数相关
GET /test/_search
{
  "query": {
    "match": {
      "name": "张三"
    }
  }
}

#phrase search（短语搜索）。
#要求查询条件必须和具体数据完全匹配才算搜索结果
GET /test/_search
{
  "query": {
    "match_phrase": {
      "name": "张三"
    }
  }
}

#高亮搜索结果
#高亮显示。高亮不是搜索条件，是显示逻辑。在搜索的时候，经常需要对条件实现高亮显示
get /test/_search
{
  "query":{
    "match":{
      "name":"张三"
    }
  },
  "highlight":{
    "pre_tags": ["&lt;font style='color:red'&gt;"],
    "post_tags": ["&lt;/font&gt;"],
    "fields":{
      "name":{}
    }
  }
}

#聚合搜索
#准备工作
PUT /products_index/_doc/1
{
   "name":"IPHONE 8",
   "remark":"64G",
   "price":548800,
   "producer":"APPLE",
   "tags" : [ "64G", "red color", "Nano SIM" ]
}
PUT /products_index/_doc/2
{
   "name":"IPHONE 8",
   "remark":"64G",
   "price":548800,
   "producer":"APPLE",
   "tags" : [ "64G", "golden color", "Nano SIM" ]
}
PUT /products_index/_doc/3
{
   "name":"IPHONE 8 PLUS",
   "remark":"128G",
   "price":748800,
   "producer":"APPLE",
   "tags" : [ "128G", "red color", "Nano SIM" ]
}
PUT /products_index/_doc/4
{
   "name":"IPHONE 8 PLUS",
   "remark":"256G",
   "price":888800,
   "producer":"APPLE",
   "tags" : [ "256G", "golden color", "Nano SIM" ]
}

#开启fielddata
PUT /products_index/_mapping/
{
   "properties" : {
      "tags" : {
         "type":"text",
         "fielddata":true  #开启正排索引,类似mysql中的普通索引，用以下面的聚合查询
      }
   }
}

#查询每个词条出现的个数统计。
#类似mysql中的select count(*) .. from .. group by ...
get /products_index/_search
{
  "aggs":{  # 开始聚合，类似query，是一个命令。或api
    "group_by_tags":{ # 给聚合数据，加一个命名。自定义
      "terms": {  #  是一个聚合api，类似数据库中的聚合函数。解析某字段中的词条。类似的聚合函数还有avg等
        "field": "tags"
      }      
    }
  }
}

#结合查询条件的聚合统计。
#类似类似mysql中的select count(*) .. from ...where.... group by ...
GET /products_index/_search
{
   "query" : {
      "match" : { "name" : "PLUS" }
   },
   "aggs" : {
      "group_by_tags":{
         "terms" : { "field" : "tags" }
      }
   }
}

#计算name中包含plus的document数据中的price字段平均值
#类似mysql的select avg(...) from ... group by...
GET /products_index/_search
{
   "query" : {
      "match" : { "name" : "PLUS" }
   },
   "aggs" : {
      "avg_by_price":{
         "avg" : { "field" : "price" }
      }
   }
}

#聚合是可以嵌套的，内层聚合是依托于外层聚合的结果之上，实现聚合计算的。
#搜索包含plus的document，根据tags做词条统计，在统计结果中，计算price平均值。
#类似mysql中的select avg(..) from ... group by ...
GET /products_index/_search
{
  "query": {
    "match": {
      "name": "plus"
    }
  },
  "aggs": {
    "group_by_tags": {
      "terms": {
        "field": "tags"
      },
      "aggs": {
        "avg_by_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

#聚合aggs中如果使用order排序的话，要求排序字段必须是一个aggs聚合相关的字段。
#计算每个tag中的Document数据的price平均值，并根据price字段数据排序.类似mysql中的select  .. from ...where.... group by ...order by...
get /products_index/_search
{
  "aggs":{
    "group_by_tags":{
      "terms": {
        "field": "tags",
        "order": {
          "avg_price": "desc"
        }
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

#使用price取值范围分组，再计算分组document中price的平均值
get /products_index/_search
{
  "query":{
    "match_all": {}
  },
  "_source":"price",
  "aggs":{
    "range_by_price":{
        "range": {
          "field": "price",
          "ranges": [
            {
              "from": 500000,
              "to": 600000
            },
            {
              "from": 600001,
              "to": 800000
            },
            {
              "from": 800001,
              "to": 1000000
            }            
          ]
        },
        "aggs": {
          "avg_by_price": {
            "avg": {
              "field": "price"
            }
          }
        }
    }
  }
}

创建索引的mapping
PUT /test
{
    "settings" : {
        "number_of_shards" : 1,
        "number_of_replicas" : 1
    },
    "settings" : {
        "index" : {
            "sort.field" : ["name", "age"], 
            "sort.order" : ["asc", "desc"] 
        }
    },
    "mappings": {
      "properties": {
        "name":{
          "type": "keyword"
        },
        "age":{
          "type":"integer"
        },
        "description":{
          "type": "text",
          "analyzer": "ik_max_word"
        }
      }
    }
}

Es增删改数据处理过程

Es数据增删改处理过程

a. 客户端发起请求，执行增删改操作。所有的增删改操作都由primary shard直接处理，replica shard只被动的备份数据。此操作请求到节点2（请求发送到的节点随机），这个节点称为协调节点（coordinate node）。
b. 协调节点通过路由算法，计算出本次操作的Document所在的shard。假设本次操作的Document所在shard为 primary shard 0。协调节点计算后，会将操作请求转发到节点1。
c. 节点1中的primary shard 0在处理请求后，会将数据的变化同步到对应的replica shard 0中，也就是发送一个同步数据的请求到节点3中。
d. replica shard 0在同步数据后，会响应通知请求这同步成功，也就是响应给primary shard 0（节点1）。
e. primary shard 0（节点1）接收到replica shard 0的同步成功响应后，会响应请求者，本次操作完成。也就是响应给协调节点（节点2）。
f. 协调节点返回响应给客户端，通知操作结果。

Es查询过程

a. 客户端发起请求，执行查询操作。查询操作都由primary shard和replica shard共同处理。此操作请求到节点2（请求发送到的节点随机），这个节点称为协调节点（coordinate node）。
b. 协调节点通过路由算法，计算出本次查询的Document所在的shard。假设本次查询的Document所在shard为 shard 0。协调节点计算后，会将操作请求转发到节点1或节点3。分配请求到节点1还是节点3通过随机算法计算，ES会保证当请求量足够大的时候，primary shard和replica shard处理的查询请求数是均等的（是不绝对一致）。
c. 节点1或节点3中的primary shard 0或replica shard 0在处理请求后，会将查询结果返回给协调节点（节点2）。
d. 协调节点得到查询结果后，再将查询结果返回给客户端。

curl -X GET http://localhost:9200/_cluster/nodes/ --得到集群中节点的相关信息
curl -X POST http://localhost:9200/_cluster/nodes/_shutdown --关闭整个集群
curl -X POST http://localhost:9200/_cluster/nodes/aaaa/_shutdown --关闭集群中指定节点
curl -X POST http://localhost:9200/lishuai --创建名为lishuai的索引
curl -X DELETE http://localhost:9200/lishuai --删除名为lishuai的索引
curl -X GET 'http://localhost:19200/benlaitest/_search?pretty=true' -d '{"query":{"multi_match":{"query":"法国","fields":["firstname","lastname"]}}}' --查询数据（匹配firstname和lastname）
curl -X POST -H "Content-Type: application/json" http://localhost:9200/benlaitest/_analyze -d '{"analyzer": "standard","text": "我爱你中国"}'
注意-H参数，和内容为json格式

postman执行请求API:
curl -XGET http://localhost:9200/_cat/indices?v -- Get请求查看有多少索引
curl -XGET http://localhost:9200/benlaitest/_analyze?analyzer=standard --查看分词结果
curl -XGET http://localhost:9200/benlaitest/_search?pretty=true -d '{"query":{"multi_match":{"query":"法国","fields":["firstname","lastname"]}}}' --查询数据（匹配firstname和lastname）
curl http://localhost:9200/benlaitest/_analyze?analyzer=standard -d 我爱你中国
curl -X POST -H "Content-Type: application/json" http://localhost:9200/benlaitest/_analyze -d '{"analyzer": "standard","text": "我爱你中国"}'

postman执行请求API:
http://10.10.110.160:9200/_cat/indices?v -- Get请求查看有多少索引
http://10.10.110.160:9200/benlaitest/_analyze?analyzer=standard --查看分词结果

分片原则

参考：https://blog.csdn.net/alan_liuyue/article/details/79585345

单个分片数据大小30G
如果你真的担心数据的快速增长, 我们建议你多关心这条限制: ElasticSearch推荐的最大JVM堆空间是30~32G, 所以把你的分片最大容量限制为30GB, 然后再对分片数量做合理估算. 例如, 你认为你的数据能达到200GB, 我们推荐你最多分配7到8个分片.
单个分片默认最大文档数量是20亿
按天建索引，每个索引都拥有集群设定的分片数

MySQL空间占用计算方法

各业务状态表

bigint         8字节
int             4字节
tinyint         1字节
算乘法计算占用磁盘：
(8+8+4+1) * 1亿 = 21亿字节   约为2G

子订单状态表

smallint    2字节
(8+8+2+8+1) * 1千万 = 2.7亿字节，约为0.3G

埋点日志

data 字段varchar(200)
(8+8+4+4+200)*1亿 = 224亿字节，约为 20G，
如果一个副本，1亿条占用40G，5亿条200G



TINYINT                         1 字节
SMALLINT                        2 个字节
MEDIUMINT                       3 个字节
INT                             4 个字节
INTEGER                                    4 个字节
BIGINT                                       8 个字节
FLOAT                                       4 个字节
DOUBLE                                    8 个字节
DOUBLE PRECISION                  8 个字节
REAL                                          8 个字节
DECIMAL(M,D)                          M字节(D+2 , 如果M < D)
NUMERIC(M,D)                          M字节(D+2 , 如果M < D)
日期和时间类型
DATE                                        3 个字节
DATETIME                                 8 个字节
TIMESTAMP                               4 个字节
TIME                                         3 个字节
YEAR                                         1 字节
字符串类型
CHAR(M)                                        M字节，1 <= M <= 255
VARCHAR(L)                                 L+1 字节, 在此L <= M和1 <= M <= 255
TINYBLOB, TINYTEXT                     L+1 字节, 在此L< 2 ^ 8
BLOB, TEXT                                   L+2 字节, 在此L< 2 ^ 16
MEDIUMBLOB, MEDIUMTEXT         L+3 字节, 在此L< 2 ^ 24
LONGBLOB, LONGTEXT                 L+4 字节, 在此L< 2 ^ 32

其它资料

1：https://blog.csdn.net/qq_39938758/article/details/108047841?utm_term=es%E8%8F%9C%E9%B8%9F%E6%95%99%E7%A8%8B&utm_medium=distribute.pc_aggpage_search_result.none-task-blog-2_allsobaiduweb~default-1-108047841&spm=3001.4430

2：优化
https://blog.csdn.net/lcfchan/article/details/115217711

3：es文档
https://www.elastic.co/guide/cn/elasticsearch/guide/current/intro.html

4：es性能测试，批量写会快很多
https://www.cnblogs.com/sesexxoo/p/6190583.html

ES相关

Elasticsearch基本命令

分片原则

MySQL空间占用计算方法

其它资料

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读