我们的ES是5.5版本,所以参考看的文档是这个:https://www.elastic.co/guide/en/elasticsearch/reference/5.5/index.html
1. 创建索引、创建mapping映射、设置分词
PUT tangxue_test_index_20201222
{
"settings":{
"index":{
"number_of_shards":"3",
"number_of_replicas":0,
"refresh_interval":"1"
},
"analysis":{
"analyzer":{
"ngram_analyzer":{
"tokenizer":"my_tokenizer"
},
"ik_unsmart_analyzer":{
"filter":[
"lowercase"
],
"type":"custom",
"tokenizer":"ik_max_word"
}
},
"tokenizer":{
"my_tokenizer":{
"filter":[
"lowercase"
],
"type":"ngram",
"min_gram":1,
"max_gram":3,
"token_chars":[
"letter",
"digit"
]
}
}
}
},
"mappings":{
"medicalrecord":{
"properties":{
"fullFieldName":{
"type":"keyword",
"fields":{
"ngramFullFieldName":{
"type":"text",
"analyzer":"ngram_analyzer"
},
"ikFullFieldName":{
"type":"text",
"analyzer":"ik_unsmart_analyzer"
}
}
}
}
}
}
}
image.png
2. 获取索引数据
GET tangxue_test_index_20201222
获取结果为:
{
"tangxue_test_index_20201222": {
"aliases": {},
"mappings": {
"medicalrecord": {
"properties": {
"fullFieldName": {
"type": "keyword",
"fields": {
"ikFullFieldName": {
"type": "text",
"analyzer": "ik_unsmart_analyzer"
},
"ngramFullFieldName": {
"type": "text",
"analyzer": "ngram_analyzer"
}
}
}
}
}
},
"settings": {
"index": {
"refresh_interval": "-1",
"number_of_shards": "3",
"provided_name": "tangxue_test_index_20201222",
"creation_date": "1608606744812",
"analysis": {
"analyzer": {
"ik_unsmart_analyzer": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "ik_max_word"
},
"ngram_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"filter": [
"lowercase"
],
"token_chars": [
"letter",
"digit"
],
"min_gram": "1",
"type": "ngram",
"max_gram": "3"
}
}
},
"number_of_replicas": "0",
"uuid": "-QzDmvr1RiO4Ce44RKom7A",
"version": {
"created": "5050399"
}
}
}
}
}
3. 创建文档
POST tangxue_test_index_20201222/medicalrecord
{
"fullFieldName":"姓名"
}
POST tangxue_test_index_20201222/medicalrecord
{
"fullFieldName":"姓"
}
POST tangxue_test_index_20201222/medicalrecord
{
"fullFieldName":"名"
}
4. 搜索文档(细写细读,也就是写入和读取用的分词一样)
GET tangxue_test_index_20201222/_search
{
"query" : {
"bool" : {
"should" : [
{
"match": {
"fullFieldName.ngramFullFieldName": "姓名"
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
返回结果:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 7,
"max_score": 4.9663877,
"hits": [
{
"_index": "tangxue_test_index_20201222",
"_type": "medicalrecord",
"_id": "AXaIjKHEsDP0SDHXyJsa",
"_score": 4.9663877,
"_source": {
"fullFieldName": "姓名"
}
},
{
"_index": "tangxue_test_index_20201222",
"_type": "medicalrecord",
"_id": "AXaIlF0xsDP0SDHXyJse",
"_score": 1.5325457,
"_source": {
"fullFieldName": "名"
}
},
{
"_index": "tangxue_test_index_20201222",
"_type": "medicalrecord",
"_id": "AXaIlCH-sDP0SDHXyJsd",
"_score": 0.44839138,
"_source": {
"fullFieldName": "姓"
}
}
}
]
}
}
5. 修改_settings
POST tangxue_test_index_20201222/_close
PUT tangxue_test_index_20201222/_settings
{
"analysis":{
"analyzer":{
"ngram_analyzer":{
"tokenizer":"my_tokenizer"
},
"ik_unsmart_analyzer":{
"filter":[
"lowercase"
],
"type":"custom",
"tokenizer":"ik_max_word"
}
},
"tokenizer":{
"my_tokenizer":{
"filter":[
"lowercase"
],
"type":"ngram",
"min_gram":2,
"max_gram":3,
"token_chars":[
"letter",
"digit"
]
}
}
}
}
POST tangxue_test_index_20201222/_open
6.增加Mappings实现细写粗读
一般情况,索引分词(写)应该按照最细力度分词,搜索分词(读)可按照最粗力度分词,即所谓的细写粗读
修改Settings
PUT tangxue_test_index_20201222/_settings
{
"analysis":{
"analyzer":{
"ngram_analyzer1":{
"tokenizer":"my_tokenizer1"
},
"ngram_analyzer2":{
"tokenizer":"my_tokenizer2"
},
"ik_unsmart_analyzer":{
"filter":[
"lowercase"
],
"type":"custom",
"tokenizer":"ik_max_word"
}
},
"tokenizer":{
"my_tokenizer1":{
"filter":[
"lowercase"
],
"type":"ngram",
"min_gram":1,
"max_gram":3,
"token_chars":[
"letter",
"digit"
]
},
"my_tokenizer2":{
"filter":[
"lowercase"
],
"type":"ngram",
"min_gram":2,
"max_gram":3,
"token_chars":[
"letter",
"digit"
]
}
}
}
}
增加Mappings
PUT tangxue_test_index_20201222/_mapping/medicalrecord?update_all_types
{
"properties": {
"fullFieldName":{
"type":"keyword",
"fields":{
"ngramFullFieldName_new":{
"type":"text",
"analyzer":"ngram_analyzer1",
"search_analyzer":"ngram_analyzer2"
}
}
}
}
}
写入数据
POST tangxue_test_index_20201222/medicalrecord
{
"fullFieldName":"姓名"
}
POST tangxue_test_index_20201222/medicalrecord
{
"fullFieldName":"姓"
}
POST tangxue_test_index_20201222/medicalrecord
{
"fullFieldName":"姓名tang"
}
读取数据,注意要用新的mapping字段ngramFullFieldName_new
GET tangxue_test_index_20201222/_search
{
"query" : {
"bool" : {
"should" : [
{
"match": {
"fullFieldName.ngramFullFieldName_new": "姓名"
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
读取结果,会发现返回结果是没有单独“姓”这个返回的
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.49191087,
"hits": [
{
"_index": "tangxue_test_index_20201222",
"_type": "medicalrecord",
"_id": "AXaI8FkysDP0SDHXyJsp",
"_score": 0.49191087,
"_source": {
"fullFieldName": "姓名"
}
},
{
"_index": "tangxue_test_index_20201222",
"_type": "medicalrecord",
"_id": "AXaI9ewmsDP0SDHXyJsr",
"_score": 0.28004453,
"_source": {
"fullFieldName": "姓名tang"
}
}
]
}
}
7. 查看特定分词器下的分词结果
GET tangxue_test_index_20201222/_analyze
{
"analyzer":"ngram_analyzer1",
"text":"姓名美美 !*1"
}
结果为:
{
"tokens": [
{
"token": "姓",
"start_offset": 0,
"end_offset": 1,
"type": "word",
"position": 0
},
{
"token": "姓名",
"start_offset": 0,
"end_offset": 2,
"type": "word",
"position": 1
},
{
"token": "姓名美",
"start_offset": 0,
"end_offset": 3,
"type": "word",
"position": 2
},
{
"token": "名",
"start_offset": 1,
"end_offset": 2,
"type": "word",
"position": 3
},
{
"token": "名美",
"start_offset": 1,
"end_offset": 3,
"type": "word",
"position": 4
},
{
"token": "名美美",
"start_offset": 1,
"end_offset": 4,
"type": "word",
"position": 5
},
{
"token": "美",
"start_offset": 2,
"end_offset": 3,
"type": "word",
"position": 6
},
{
"token": "美美",
"start_offset": 2,
"end_offset": 4,
"type": "word",
"position": 7
},
{
"token": "美",
"start_offset": 3,
"end_offset": 4,
"type": "word",
"position": 8
},
{
"token": "1",
"start_offset": 7,
"end_offset": 8,
"type": "word",
"position": 9
}
]
}
8. 查看所有索引详细数据
GET /_cat/indices?v
9. 查看某一个数据的信息(举例比如通过字段 “_id” 获取)
GET tangxue_test_index_20201222/_search
{
"query": {
"term": {
"_id": {
"value": "AXapehbzsDP0SDHXyJyz"
}
}
}
}
结果为:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "tangxue_test_index_20201222",
"_type": "medicalrecord",
"_id": "AXapehbzsDP0SDHXyJyz",
"_score": 1,
"_source": {
"fullFieldName": "美美姓名思"
}
}
]
}
}
10. 清除ES某个索引的缓存
POST /tangxue_test_index_20201222/_cache/clear
网友评论