美文网首页
ES Suggester 搜索自动补全

ES Suggester 搜索自动补全

作者: KICHUN | 来源:发表于2020-11-12 17:36 被阅读0次

需求

将商品表数据全量更新至ES索引
商品索引支持Suggester自动补全,支持过滤商品enable和delete_status状态,只筛选启用且未删除的商品
Suggester与普通搜索区别:ES将Suggest机器依赖的字段放在堆内存,实现近实时的搜索提示功能

es安装ik分词插件

./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.7.0/elasticsearch-analysis-ik-7.7.0.zip

配置logstash配置文件

input {
    stdin {
    }
    jdbc {
      # 连接的数据库地址和哪一个数据库,指定编码格式,禁用SSL协议,设定自动重连
      jdbc_connection_string => "jdbc:mysql://{你的MySQL}:3306/shop-server?characterEncoding=UTF-8&useSSL=false&autoReconnect=true"
      # 你的账户密码
      jdbc_user => "{你的用户}"
      jdbc_password => "{你的密码}"
      # 连接数据库的驱动包,建议使用绝对地址
      jdbc_driver_library => "/data/logstash-6.4.0/bin/mysql/mysql-connector-java-5.1.22-bin.jar"
      # 这是不用动就好
      jdbc_driver_class => "com.mysql.jdbc.Driver"
      jdbc_paging_enabled => "true"
      jdbc_page_size => "2000"

          #处理中文乱码问题
      codec => plain { charset => "UTF-8"}

      #使用其它字段追踪,而不是用时间
      use_column_value => true
      #追踪的字段
      tracking_column => app_goods_id
      record_last_run => true
        statement => "SELECT
        g.app_goods_id,
        g.goods_id,
        g.goods_name,
        g.goods_name AS suggest,
        g.collect_count,
        g.sale,
        g.alone_price,
        g.enable,
        g.delete_status,
         (SELECT GROUP_CONCAT( goods_type_id ) FROM db_goods_type_link WHERE goods_id = g.goods_Id ) AS goods_type_id,
        CASE
                WHEN g.alone_price > 301 THEN
                100
                WHEN g.alone_price > 101 THEN
                300
                WHEN g.alone_price > 51 THEN
                500
                WHEN g.alone_price > 0 THEN
                400 ELSE 0
        END price_score,
        CASE
                b.brand_level
                WHEN 1 THEN
                500
                WHEN 2 THEN
                300
                WHEN 3 THEN
                100
                WHEN 4 THEN
                0 ELSE 0
        END brand_score
        FROM
        db_app_goods g
        LEFT JOIN db_brand b ON g.brand_id = b.brand_id
        LIMIT 2000"

      #上一个sql_last_value值的存放文件路径, 必须要在文件中指定字段的初始值
      last_run_metadata_path => "/data/logstash-6.4.0/bin/mysql/goods.log"

      jdbc_default_timezone => "Asia/Shanghai"

      #statement_filepath => "mysql/jdbc.sql"


      #是否清除 last_run_metadata_path 的记录,如果为真那么每次都相当于从头开始查询所有的数据库记录
      clean_run => false

      # 这是控制定时的,重复执行导入任务的时间间隔,第一位是分钟
      schedule => "* */1 * * *"
      type => "jdbc"
    }
}


filter {
    json {
        source => "message"
        remove_field => ["message"]
    }
}


output {
    elasticsearch {
        # 要导入到的Elasticsearch所在的主机
        hosts => "127.0.0.1:9200"
        # 要导入到的Elasticsearch的索引的名称
        index => "goods"
        # 类型名称(类似数据库表名)
        #document_type => "appgood"
        # 主键名称(类似数据库主键)
        document_id => "%{app_goods_id}"
        # es 账号
        user => {你的ES用户}
        password => {你的ES密码}
        # 这里配置为当前logstash的相对路径,该文件配置了输出的Mapping
        template => "mysql/goods_mapping.json"
        template_name => "goods"
        template_overwrite => true

    }

    stdout {
        # JSON格式输出
        codec => json_lines
    }
}

创建商品索引映射goods_mapping.json

{
        "template": "goods",
        "settings": {
                "index.refresh_interval": "1s"
        },
        "index_patterns": ["goods"],
        "mappings": {
                "properties": {
                        "suggest": {
                                "type": "completion",
                                "analyzer": "ik_smart",
                                "search_analyzer": "ik_smart",
                                "contexts": [{
                                                "name": "enable_cat",
                                                "type": "category",
                                                "path": "enable"
                                        },
                                        {
                                                "name": "delete_status_cat",
                                                "type": "category",
                                                "path": "delete_status"
                                        }
                                ]
                        },
                        "goods_name": {
                                "type": "text",
                                "analyzer": "ik_max_word",
                                "search_analyzer": "ik_smart"
                        },
                        "goods_type_id": {
                                "type": "keyword"
                        },
                        "app_goods_id": {
                                "type": "long"
                        },
                        "goods_id": {
                                "type": "long"
                        },
                        "collect_count": {
                                "type": "integer"
                        },
                        "sale": {
                                "type": "integer"
                        },
                        "alone_price": {
                                "type": "double"
                        },
                        "brand_score": {
                                "type": "integer"
                        },
                        "enable": {
                                "type": "keyword"
                        },
                        "delete_status" :{
                                "type": "keyword"
                        }
                }
        }
}

使用logstash -f {goods配置文件}启动logstash,logstash将自动从数据库查询并以Mapping创建索引

查看索引是否映射成功
GET goods_dev/_mapping
结果:

{
  "goods_dev" : {
    "mappings" : {
      "properties" : {
        "@timestamp" : {
          "type" : "date"
        },
        "@version" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "alone_price" : {
          "type" : "double"
        },
        "app_goods_id" : {
          "type" : "long"
        },
        "brand_score" : {
          "type" : "integer"
        },
        "collect_count" : {
          "type" : "integer"
        },
        "delete_status" : {
          "type" : "keyword"
        },
        "enable" : {
          "type" : "keyword"
        },
        "goods_id" : {
          "type" : "long"
        },
        "goods_name" : {
          "type" : "text",
          "analyzer" : "ik_max_word",
          "search_analyzer" : "ik_smart"
        },
        "goods_type_id" : {
          "type" : "keyword"
        },
        "price_score" : {
          "type" : "long"
        },
        "sale" : {
          "type" : "integer"
        },
        "suggest" : {
          "type" : "completion",
          "analyzer" : "ik_smart",
          "preserve_separators" : true,
          "preserve_position_increments" : true,
          "max_input_length" : 50,
          "contexts" : [
            {
              "name" : "enable_cat",
              "type" : "CATEGORY",
              "path" : "enable"
            },
            {
              "name" : "delete_status_cat",
              "type" : "CATEGORY",
              "path" : "delete_status"
            }
          ]
        },
        "type" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    }
  }
}

使用前缀ContextComplitationSuggester进行联想词查询

POST goods_dev/_search?pretty
{
  "_source": "suggest",
  "suggest": {
    "my-suggest":{
      "prefix":"肌肤",
      "completion":{
        "field":"suggest",
        "skip_duplicates":true,
        "size":10,
        "contexts":{
          "enable_cat":{
             "context":1
          },
          "delete_status_cat":{
            "context":0
          }
        }
      }
    }
  }
}

结果:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 0,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "suggest" : {
    "my-suggest" : [
      {
        "text" : "肌肤",
        "offset" : 0,
        "length" : 2,
        "options" : [
          {
            "text" : "“肌肤吸尘器123”香港CHICELAN槿念烟酰胺磨砂沐浴露温和双效去角质一瓶=沐浴露+磨砂膏",
            "_index" : "goods_dev",
            "_type" : "_doc",
            "_id" : "1323938082604585019",
            "_score" : 1.0,
            "_source" : {
              "suggest" : "“肌肤吸尘器123”香港CHICELAN槿念烟酰胺磨砂沐浴露温和双效去角质一瓶=沐浴露+磨砂膏"
            },
            "contexts" : {
              "enable_cat" : [
                "1"
              ]
            }
          },
          {
            "text" : "“肌肤吸尘器”香港CHICELAN槿念烟酰胺磨砂沐浴露温和双效去角质一瓶=沐浴露+磨砂膏",
            "_index" : "goods_dev",
            "_type" : "_doc",
            "_id" : "1323894411586834447",
            "_score" : 1.0,
            "_source" : {
              "suggest" : "“肌肤吸尘器”香港CHICELAN槿念烟酰胺磨砂沐浴露温和双效去角质一瓶=沐浴露+磨砂膏"
            },
            "contexts" : {
              "delete_status_cat" : [
                "0"
              ]
            }
          }
        ]
      }
    ]
  }
}

使用SpringDataElasticsearch进行JavaAPI查询

新建一个固定suggest查询对象,该对象构建一个查询context,类似于上面的

"contexts":{
          "enable_cat":{
             "context":1
          },
          "delete_status_cat":{
            "context":0
          }
        }
/**
     * 固定的suggest查询对象
     */
    private static  Map<String, List<? extends ToXContent>> SUGGESTION_CONTEXT = null;
    static {
        CategoryQueryContext enableCat = CategoryQueryContext.builder().setCategory("1").build();
        CategoryQueryContext deleteStatusCat = CategoryQueryContext.builder().setCategory("0").build();
        Map<String, List<? extends ToXContent>> contexts = new HashMap<>();
        List<CategoryQueryContext> list = new ArrayList<>(1);
        list.add(enableCat);
        contexts.put("enable_cat", list);
        List<CategoryQueryContext> list2 = new ArrayList<>(1);
        list2.add(deleteStatusCat);
        contexts.put("delete_status_cat",list2);
        SUGGESTION_CONTEXT = contexts;
    }

根据关键字联想查询方法

    @Override
    public List<String> associate(String keyword) {
        //使用suggest进行标题联想
        CompletionSuggestionBuilder suggest = SuggestBuilders.completionSuggestion("suggest").prefix(keyword).skipDuplicates(true).size(10).contexts(SUGGESTION_CONTEXT);
        SuggestBuilder suggestBuilder = new SuggestBuilder();
        suggestBuilder.addSuggestion("goodsNameSuggest",suggest);

        //查询
        SearchResponse goodsNameSuggestResp = elasticsearchRestTemplate.suggest(suggestBuilder, goodsIndexName);
        Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> goodsNameSuggest = goodsNameSuggestResp
                .getSuggest().getSuggestion("goodsNameSuggest");
        //处理返回
        List<String> collect = goodsNameSuggest.getEntries().stream().map(x -> x.getOptions().stream().map(y->y.getText().toString()).collect(Collectors.toList())).findFirst().get();
        return CollectionUtils.isEmpty(collect)?Collections.emptyList():collect;
    }

接口测试

接口传递keyword关键字"韩版",返回

{
  "code": 200,
  "msg": "操作成功",
  "timestamp": "1605173186443",
  "data": [
    "韩版时尚中长款衬衫 Because-t2041",
    "韩版淑女纯色休闲裤 Holicholic-sp26265",
    "韩版简约时尚连衣裙 Happy10-ds1009573",
    "韩版简约纯色衬衫 Holicholic-t26070",
    "韩版简约经典半身裙 Maybe-baby-sp30591",
    "韩版简约经典女裤套装 Holicholic-ds26260",
    "韩版纯色休闲T恤 Holicholic-t26074"
  ]
}

相关文章

网友评论

      本文标题:ES Suggester 搜索自动补全

      本文链接:https://www.haomeiwen.com/subject/vutpbktx.html