导语
比如用户索索“住宿”,但是没有一家酒店的名字中包含住宿,然后给用户返回了一个空白页。除了给“住宿”添加同义词“酒店”外 ,这个问题还可以这样解决:为每一个 catetory_id 维护一组关键词,这组关系维护在一个 Map 中,当一个 keyword 进入 Service 层后,先 Analyze 它,得到的 token 去 Map 中查找对应的 category_id,然后可以将这个 category_id 放在 Query 里影响召回,也可以放在 filter 中影响排序。一般的做法是:先影响排序,如果得不到结果,再影响召回;
category_id 和关键词的映射
// category_id 下的所有关键词
private Map<Integer,List<String>> categoryWorkMap = new HashMap<>();
@PostConstruct
public void init(){
categoryWorkMap.put(1,new ArrayList<>());
categoryWorkMap.put(2,new ArrayList<>());
categoryWorkMap.get(1).add("吃饭");
categoryWorkMap.get(1).add("下午茶");
categoryWorkMap.get(2).add("休息");
categoryWorkMap.get(2).add("睡觉");
categoryWorkMap.get(2).add("住宿");
}
查询 keyword 对应的 category_id
/**
* GET /shop/_analyze
* {
* "field": "name",
* "text": "凯悦"
* }
* 先分词 Keyword,再看分词后的每个 token 是否对应了某个 category_id,最后将每个 token 对应哪个 category_id 返回出来;
* @param keyword
* @return
* @throws IOException
*/
private Map<String,Object> analyzeCategoryKeyword(String keyword) throws IOException {
Map<String,Object> res = new HashMap<>();
Request request = new Request("GET","/shop/_analyze");
request.setJsonEntity("{" + " \"field\": \"name\"," + " \"text\":\"" + keyword + "\"\n" + "}");
Response response = highLevelClient.getLowLevelClient().performRequest(request);
String responseStr = EntityUtils.toString(response.getEntity());
JSONObject jsonObject = JSONObject.parseObject(responseStr);
JSONArray jsonArray = jsonObject.getJSONArray("tokens");
for(int i = 0; i < jsonArray.size(); i++){
String token = jsonArray.getJSONObject(i).getString("token");
Integer categoryId = getCategoryIdByToken(token);
if(categoryId != null){
res.put(token, categoryId);
}
}
return res;
}
private Integer getCategoryIdByToken(String token){
for(Integer key : categoryWorkMap.keySet()){
List<String> tokenList = categoryWorkMap.get(key);
if(tokenList.contains(token)){
return key;
}
}
return null;
}
影响召回的 Query DSL
- 使用 bool 查询的 should 子句;
GET /shop/_search
{
"_source": "*",
"script_fields": {
"distance": {
"script": {
"source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)",
"lang": "expression",
"params": {"lat":31.23916171,"lon":121.48789949}
}
}
},
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{"match": {"name": {"query": "住宿","boost": 0.1}}},
{"term":{"category_id":2}}
]
}
},
{"term": {"seller_disabled_flag": 0}}
]
}
},
"functions": [
{
"gauss": {
"location": {
"origin": "31.23916171,121.48789949",
"scale": "100km",
"offset": "0km",
"decay": 0.5
}
},
"weight": 9
},
{
"field_value_factor": {
"field": "remark_score"
},
"weight": 0.2
},
{
"field_value_factor": {
"field": "seller_remark_score"
},
"weight": 0.1
}
],
"score_mode": "sum",
"boost_mode": "sum"
}
},
"sort": [
{
"_score": {
"order":"desc"
}
}
]
}
影响排序的 Query DSL
- 在 Function Query 的 Function 中添加一个 filer:
GET /shop/_search
{
"_source": "*",
"script_fields": {
"distance": {
"script": {
"source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)",
"lang": "expression",
"params": {"lat":31.23916171,"lon":121.48789949}
}
}
},
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{"match": {"name": {"query": "住宿","boost": 0.1}}}
]
}
},
{"term": {"seller_disabled_flag": 0}}
]
}
},
"functions": [
{
"gauss": {
"location": {
"origin": "31.23916171,121.48789949",
"scale": "100km",
"offset": "0km",
"decay": 0.5
}
},
"weight": 9
},
{
"field_value_factor": {
"field": "remark_score"
},
"weight": 0.2
},
{
"field_value_factor": {
"field": "seller_remark_score"
},
"weight": 0.1
},
{
"filter": {"term":{"category_id": 2}},
"weight": 0.2
}
],
"score_mode": "sum",
"boost_mode": "sum"
}
},
"sort": [
{
"_score": {
"order":"desc"
}
}
]
}
网友评论