1.下载安装(略)
2.IK分词器
安装
elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.1.1/elasticsearch-analysis-ik-6.1.1.zip
重启ES 集群
3.对于明确分词,通过 ik_smart 和 ik_max_word 控制查询匹配粒度
# 最粗粒度
GET _analyze
{
"analyzer":"ik_smart",
"text":"中华人民共和国"
}
{
"tokens" : [
{
"token" : "中华人民共和国",
"start_offset" : 0,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 0
}
]
}
# 最细粒度
GET _analyze
{
"analyzer":"ik_max_word",
"text":"中华人民共和国"
}
{
"tokens" : [
{
"token" : "中华人民共和国",
"start_offset" : 0,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "中华人民",
"start_offset" : 0,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 1
},
{
"token" : "中华",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 2
},
{
"token" : "华人",
"start_offset" : 1,
"end_offset" : 3,
"type" : "CN_WORD",
"position" : 3
},
{
"token" : "人民共和国",
"start_offset" : 2,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 4
},
{
"token" : "人民",
"start_offset" : 2,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 5
},
{
"token" : "共和国",
"start_offset" : 4,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 6
},
{
"token" : "共和",
"start_offset" : 4,
"end_offset" : 6,
"type" : "CN_WORD",
"position" : 7
},
{
"token" : "国",
"start_offset" : 6,
"end_offset" : 7,
"type" : "CN_CHAR",
"position" : 8
}
]
}
4.自定义词条
# 未添加新词条前,被断开
GET _analyze
{
"analyzer": "ik_smart",
"text":"洪荒之力"
}
{
"tokens" : [
{
"token" : "洪荒",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "之力",
"start_offset" : 2,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 1
}
]
}
# 添加新词条
cd /opt/softwares/elasticsearch-6.5.4/config/analysis-ik
mkdir custom
vim custom/new_word.dic
-----------------------------
洪荒之力
-----------------------------
vim IKAnalyzer.cfg.xml 通过相对路径定位,到扩展词条路径
-----------------------------
<entry key="ext_dict">custom/new_word.dic</entry>
-----------------------------
重启ES,Kibana
新词项注册成功
GET _analyze
{
"analyzer": "ik_smart",
"text":"洪荒之力"
}
{
"tokens" : [
{
"token" : "洪荒之力",
"start_offset" : 0,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 0
}
]
}
网友评论