美文网首页
ES操作纪要

ES操作纪要

作者: 唐T唐X | 来源:发表于2020-12-22 11:48 被阅读0次

    我们的ES是5.5版本,所以参考看的文档是这个:https://www.elastic.co/guide/en/elasticsearch/reference/5.5/index.html

    1. 创建索引、创建mapping映射、设置分词

    PUT tangxue_test_index_20201222
    {
        "settings":{
            "index":{
                "number_of_shards":"3",
                "number_of_replicas":0,
                "refresh_interval":"1"
            },
            "analysis":{
                "analyzer":{
                    "ngram_analyzer":{
                        "tokenizer":"my_tokenizer"
                    },
                    "ik_unsmart_analyzer":{
                        "filter":[
                            "lowercase"
                        ],
                        "type":"custom",
                        "tokenizer":"ik_max_word"
                    }
                },
                "tokenizer":{
                    "my_tokenizer":{
                        "filter":[
                            "lowercase"
                        ],
                        "type":"ngram",
                        "min_gram":1,
                        "max_gram":3,
                        "token_chars":[
                            "letter",
                            "digit"
                        ]
                    }
                }
            }
        },
        "mappings":{
            "medicalrecord":{
                "properties":{
                    "fullFieldName":{
                        "type":"keyword",
                        "fields":{
                            "ngramFullFieldName":{
                                "type":"text",
                                "analyzer":"ngram_analyzer"
                            },
                            "ikFullFieldName":{
                                "type":"text",
                                "analyzer":"ik_unsmart_analyzer"
                            }
                        }
                    }
                }
            }
        }
    }
    
    image.png

    2. 获取索引数据

    GET tangxue_test_index_20201222
    

    获取结果为:

    {
      "tangxue_test_index_20201222": {
        "aliases": {},
        "mappings": {
          "medicalrecord": {
            "properties": {
              "fullFieldName": {
                "type": "keyword",
                "fields": {
                  "ikFullFieldName": {
                    "type": "text",
                    "analyzer": "ik_unsmart_analyzer"
                  },
                  "ngramFullFieldName": {
                    "type": "text",
                    "analyzer": "ngram_analyzer"
                  }
                }
              }
            }
          }
        },
        "settings": {
          "index": {
            "refresh_interval": "-1",
            "number_of_shards": "3",
            "provided_name": "tangxue_test_index_20201222",
            "creation_date": "1608606744812",
            "analysis": {
              "analyzer": {
                "ik_unsmart_analyzer": {
                  "filter": [
                    "lowercase"
                  ],
                  "type": "custom",
                  "tokenizer": "ik_max_word"
                },
                "ngram_analyzer": {
                  "tokenizer": "my_tokenizer"
                }
              },
              "tokenizer": {
                "my_tokenizer": {
                  "filter": [
                    "lowercase"
                  ],
                  "token_chars": [
                    "letter",
                    "digit"
                  ],
                  "min_gram": "1",
                  "type": "ngram",
                  "max_gram": "3"
                }
              }
            },
            "number_of_replicas": "0",
            "uuid": "-QzDmvr1RiO4Ce44RKom7A",
            "version": {
              "created": "5050399"
            }
          }
        }
      }
    }
    

    3. 创建文档

    POST tangxue_test_index_20201222/medicalrecord
    {
      "fullFieldName":"姓名"
    }
    POST tangxue_test_index_20201222/medicalrecord
    {
      "fullFieldName":"姓"
    }
    POST tangxue_test_index_20201222/medicalrecord
    {
      "fullFieldName":"名"
    }
    

    4. 搜索文档(细写细读,也就是写入和读取用的分词一样)

    GET tangxue_test_index_20201222/_search
    {
      "query" : {
        "bool" : {
          "should" : [
            {
              "match": {
                "fullFieldName.ngramFullFieldName": "姓名"
              }
            }
          ],
          "disable_coord" : false,
          "adjust_pure_negative" : true,
          "boost" : 1.0
        }
      }
    }
    

    返回结果:

    {
      "took": 1,
      "timed_out": false,
      "_shards": {
        "total": 3,
        "successful": 3,
        "failed": 0
      },
      "hits": {
        "total": 7,
        "max_score": 4.9663877,
        "hits": [
          {
            "_index": "tangxue_test_index_20201222",
            "_type": "medicalrecord",
            "_id": "AXaIjKHEsDP0SDHXyJsa",
            "_score": 4.9663877,
            "_source": {
              "fullFieldName": "姓名"
            }
          },
          {
            "_index": "tangxue_test_index_20201222",
            "_type": "medicalrecord",
            "_id": "AXaIlF0xsDP0SDHXyJse",
            "_score": 1.5325457,
            "_source": {
              "fullFieldName": "名"
            }
          },
          {
            "_index": "tangxue_test_index_20201222",
            "_type": "medicalrecord",
            "_id": "AXaIlCH-sDP0SDHXyJsd",
            "_score": 0.44839138,
            "_source": {
              "fullFieldName": "姓"
            }
          }
          }
        ]
      }
    }
    

    5. 修改_settings

    POST tangxue_test_index_20201222/_close
    
    PUT tangxue_test_index_20201222/_settings
    {
      "analysis":{
          "analyzer":{
              "ngram_analyzer":{
                  "tokenizer":"my_tokenizer"
              },
              "ik_unsmart_analyzer":{
                  "filter":[
                      "lowercase"
                  ],
                  "type":"custom",
                  "tokenizer":"ik_max_word"
              }
          },
          "tokenizer":{
              "my_tokenizer":{
                  "filter":[
                      "lowercase"
                  ],
                  "type":"ngram",
                  "min_gram":2,
                  "max_gram":3,
                  "token_chars":[
                      "letter",
                      "digit"
                  ]
              }
          }
      }
    }
    
    POST tangxue_test_index_20201222/_open
    

    6.增加Mappings实现细写粗读

    一般情况,索引分词(写)应该按照最细力度分词,搜索分词(读)可按照最粗力度分词,即所谓的细写粗读

    修改Settings
    PUT tangxue_test_index_20201222/_settings
    {
      "analysis":{
          "analyzer":{
              "ngram_analyzer1":{
                  "tokenizer":"my_tokenizer1"
              },
              "ngram_analyzer2":{
                  "tokenizer":"my_tokenizer2"
              },
              "ik_unsmart_analyzer":{
                  "filter":[
                      "lowercase"
                  ],
                  "type":"custom",
                  "tokenizer":"ik_max_word"
              }
          },
          "tokenizer":{
              "my_tokenizer1":{
                  "filter":[
                      "lowercase"
                  ],
                  "type":"ngram",
                  "min_gram":1,
                  "max_gram":3,
                  "token_chars":[
                      "letter",
                      "digit"
                  ]
              },
              "my_tokenizer2":{
                  "filter":[
                      "lowercase"
                  ],
                  "type":"ngram",
                  "min_gram":2,
                  "max_gram":3,
                  "token_chars":[
                      "letter",
                      "digit"
                  ]
              }
          }
      }
    }
    
    增加Mappings
    PUT tangxue_test_index_20201222/_mapping/medicalrecord?update_all_types
    {
      "properties": {
        "fullFieldName":{
            "type":"keyword",
            "fields":{
                "ngramFullFieldName_new":{
                    "type":"text",
                    "analyzer":"ngram_analyzer1",
                    "search_analyzer":"ngram_analyzer2"
                }
            }
        }
      }
    }
    
    写入数据
    POST tangxue_test_index_20201222/medicalrecord
    {
      "fullFieldName":"姓名"
    }
    POST tangxue_test_index_20201222/medicalrecord
    {
      "fullFieldName":"姓"
    }
    POST tangxue_test_index_20201222/medicalrecord
    {
      "fullFieldName":"姓名tang"
    }
    
    读取数据,注意要用新的mapping字段ngramFullFieldName_new
    GET tangxue_test_index_20201222/_search
    {
      "query" : {
        "bool" : {
          "should" : [
            {
              "match": {
                "fullFieldName.ngramFullFieldName_new": "姓名"
              }
            }
          ],
          "disable_coord" : false,
          "adjust_pure_negative" : true,
          "boost" : 1.0
        }
      }
    }
    
    读取结果,会发现返回结果是没有单独“姓”这个返回的
    {
      "took": 1,
      "timed_out": false,
      "_shards": {
        "total": 3,
        "successful": 3,
        "failed": 0
      },
      "hits": {
        "total": 2,
        "max_score": 0.49191087,
        "hits": [
          {
            "_index": "tangxue_test_index_20201222",
            "_type": "medicalrecord",
            "_id": "AXaI8FkysDP0SDHXyJsp",
            "_score": 0.49191087,
            "_source": {
              "fullFieldName": "姓名"
            }
          },
          {
            "_index": "tangxue_test_index_20201222",
            "_type": "medicalrecord",
            "_id": "AXaI9ewmsDP0SDHXyJsr",
            "_score": 0.28004453,
            "_source": {
              "fullFieldName": "姓名tang"
            }
          }
        ]
      }
    }
    

    7. 查看特定分词器下的分词结果

    GET tangxue_test_index_20201222/_analyze
    {
      "analyzer":"ngram_analyzer1",
      "text":"姓名美美 !*1"
    }
    

    结果为:

    {
      "tokens": [
        {
          "token": "姓",
          "start_offset": 0,
          "end_offset": 1,
          "type": "word",
          "position": 0
        },
        {
          "token": "姓名",
          "start_offset": 0,
          "end_offset": 2,
          "type": "word",
          "position": 1
        },
        {
          "token": "姓名美",
          "start_offset": 0,
          "end_offset": 3,
          "type": "word",
          "position": 2
        },
        {
          "token": "名",
          "start_offset": 1,
          "end_offset": 2,
          "type": "word",
          "position": 3
        },
        {
          "token": "名美",
          "start_offset": 1,
          "end_offset": 3,
          "type": "word",
          "position": 4
        },
        {
          "token": "名美美",
          "start_offset": 1,
          "end_offset": 4,
          "type": "word",
          "position": 5
        },
        {
          "token": "美",
          "start_offset": 2,
          "end_offset": 3,
          "type": "word",
          "position": 6
        },
        {
          "token": "美美",
          "start_offset": 2,
          "end_offset": 4,
          "type": "word",
          "position": 7
        },
        {
          "token": "美",
          "start_offset": 3,
          "end_offset": 4,
          "type": "word",
          "position": 8
        },
        {
          "token": "1",
          "start_offset": 7,
          "end_offset": 8,
          "type": "word",
          "position": 9
        }
      ]
    }
    

    8. 查看所有索引详细数据

    GET /_cat/indices?v
    

    9. 查看某一个数据的信息(举例比如通过字段 “_id” 获取)

    GET tangxue_test_index_20201222/_search
    {
      "query": {
        "term": {
          "_id": {
            "value": "AXapehbzsDP0SDHXyJyz"
          }
        }
      }
    }
    

    结果为:

    {
      "took": 1,
      "timed_out": false,
      "_shards": {
        "total": 3,
        "successful": 3,
        "failed": 0
      },
      "hits": {
        "total": 1,
        "max_score": 1,
        "hits": [
          {
            "_index": "tangxue_test_index_20201222",
            "_type": "medicalrecord",
            "_id": "AXapehbzsDP0SDHXyJyz",
            "_score": 1,
            "_source": {
              "fullFieldName": "美美姓名思"
            }
          }
        ]
      }
    }
    

    10. 清除ES某个索引的缓存

    POST /tangxue_test_index_20201222/_cache/clear
    

    相关文章

      网友评论

          本文标题:ES操作纪要

          本文链接:https://www.haomeiwen.com/subject/wogjnktx.html