美文网首页
PHP  Elasticsearch 6.7初步学习实践

PHP  Elasticsearch 6.7初步学习实践

作者: 鸿雁长飞光不度 | 来源:发表于2019-05-14 19:57 被阅读0次

    1.Mac电脑安装

    brew install elasticsearch
    
    #最后几行可以看到相关的配置目录
    
    Data:    /usr/local/var/lib/elasticsearch/
    Logs:    /usr/local/var/log/elasticsearch/elasticsearch_guodong.log
    Plugins: /usr/local/var/elasticsearch/plugins/
    Config:  /usr/local/etc/elasticsearch/
    
    

    可以安装
    https://github.com/NLPchina/elasticsearch-sql

    可以用SQL语句查询ES的内容。

    2.安装中文分词插件

    https://github.com/medcl/elasticsearch-analysis-ik

    根据es版本选择分词插件的版本,刚刚安装的6.7。

    mv elasticsearch-analysis-ik-6.7.0.zip /usr/local/var/elasticsearch/plugins/
    
    

    3. 初步使用

    3.1测试分词器

    使用默认的分词器

    curl -X POST "localhost:9200/_analyze" -H 'Content-Type: application/json' -d'
    {
      "analyzer": "standard",
      "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog\u0027s bone."
    }'
    

    返回结果如下

    {"tokens":[{"token":"the","start_offset":0,"end_offset":3,"type":"<ALPHANUM>","position":0},{"token":"2","start_offset":4,"end_offset":5,"type":"<NUM>","position":1},{"token":"quick","start_offset":6,"end_offset":11,"type":"<ALPHANUM>","position":2},{"token":"brown","start_offset":12,"end_offset":17,"type":"<ALPHANUM>","position":3},{"token":"foxes","start_offset":18,"end_offset":23,"type":"<ALPHANUM>","position":4},{"token":"jumped","start_offset":24,"end_offset":30,"type":"<ALPHANUM>","position":5},{"token":"over","start_offset":31,"end_offset":35,"type":"<ALPHANUM>","position":6},{"token":"the","start_offset":36,"end_offset":39,"type":"<ALPHANUM>","position":7},{"token":"lazy","start_offset":40,"end_offset":44,"type":"<ALPHANUM>","position":8},{"token":"dog's","start_offset":45,"end_offset":50,"type":"<ALPHANUM>","position":9},{"token":"bone","start_offset":51,"end_offset":55,"type":"<ALPHANUM>","position":10}]}%    
    

    可以看出默认的分词器standard对英文分词有着不错的效果。

    3.1 创建一个索引

    一个索引相当于一个数据库,每个索引都可以有自己的单独的单独配置。

    中文测试

    curl -X POST "localhost:9200/_analyze" -H 'Content-Type: application/json' -d'       
    {
      "analyzer": "standard",
      "text": "创业邦,帮助创业者走向成功的平台"                              
    }
    '
    
    {"tokens":[{"token":"创","start_offset":0,"end_offset":1,"type":"<IDEOGRAPHIC>","position":0},{"token":"业","start_offset":1,"end_offset":2,"type":"<IDEOGRAPHIC>","position":1},{"token":"邦","start_offset":2,"end_offset":3,"type":"<IDEOGRAPHIC>","position":2},{"token":"帮","start_offset":4,"end_offset":5,"type":"<IDEOGRAPHIC>","position":3},{"token":"助","start_offset":5,"end_offset":6,"type":"<IDEOGRAPHIC>","position":4},{"token":"创","start_offset":6,"end_offset":7,"type":"<IDEOGRAPHIC>","position":5},{"token":"业","start_offset":7,"end_offset":8,"type":"<IDEOGRAPHIC>","position":6},{"token":"者","start_offset":8,"end_offset":9,"type":"<IDEOGRAPHIC>","position":7},{"token":"走","start_offset":9,"end_offset":10,"type":"<IDEOGRAPHIC>","position":8},{"token":"向","start_offset":10,"end_offset":11,"type":"<IDEOGRAPHIC>","position":9},{"token":"成","start_offset":11,"end_offset":12,"type":"<IDEOGRAPHIC>","position":10},{"token":"功","start_offset":12,"end_offset":13,"type":"<IDEOGRAPHIC>","position":11},{"token":"的","start_offset":13,"end_offset":14,"type":"<IDEOGRAPHIC>","position":12},{"token":"平","start_offset":14,"end_offset":15,"type":"<IDEOGRAPHIC>","position":13},{"token":"台","start_offset":15,"end_offset":16,"type":"<IDEOGRAPHIC>","position":14}]}%  
    

    结果会把每个汉字都分词,效果不好。下面采用ik分词器进行分词测试。

     curl -X POST "localhost:9200/_analyze" -H 'Content-Type: application/json' -d'
    {
      "analyzer": "ik_max_word",
      "text": "创业邦,帮助创业者走向成功的平台"
    }
    '
    

    分词良好

    {"tokens":[{"token":"创业","start_offset":0,"end_offset":2,"type":"CN_WORD","position":0},{"token":"邦","start_offset":2,"end_offset":3,"type":"CN_CHAR","position":1},{"token":"帮助","start_offset":4,"end_offset":6,"type":"CN_WORD","position":2},{"token":"创业者","start_offset":6,"end_offset":9,"type":"CN_WORD","position":3},{"token":"创业","start_offset":6,"end_offset":8,"type":"CN_WORD","position":4},{"token":"业者","start_offset":7,"end_offset":9,"type":"CN_WORD","position":5},{"token":"走向","start_offset":9,"end_offset":11,"type":"CN_WORD","position":6},{"token":"成功","start_offset":11,"end_offset":13,"type":"CN_WORD","position":7},{"token":"的","start_offset":13,"end_offset":14,"type":"CN_CHAR","position":8},{"token":"平台","start_offset":14,"end_offset":16,"type":"CN_WORD","position":9}]}% 
    

    3.2 创建索引并使用分词器

     curl -X PUT "localhost:9200/cyzone" -H 'Content-Type: application/json' -d'
    {
        "settings":{
            "index":{
                "number_of_shards":3,
                "number_of_replicas":2
            }
        },
        "mappings":{
            "goods":{ 
                "properties":{  
                    "name":{
                        "type":"text",
                        "analyzer": "ik_max_word",
                        "search_analyzer": "ik_smart"
                    },                               
                    "content":{
                        "type":"text",
                        "analyzer": "ik_max_word",
                        "search_analyzer": "ik_smart"
                    },                               
                    "id":{
                        "type":"long"
                    }                
                }
            }
        }
    }'
    
    

    3.3给索引的type为goods的添加一些数据

    ➜  ~ curl -X POST "localhost:9200/cyzone/goods" -H 'Content-Type: application/json' -d'
    {
        "id" : "36",
        "name" : "创业邦会员 — 找人、找钱、找项目!",
        "content" : "这其实是内容,但是中间有双引号和单引号,我暂时不拼接了"
    }'
    {"_index":"cyzone","_type":"goods","_id":"1qtDo2oBACchbOnLTGLF","_version":1,"result":"created","_shards":{"total":3,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1}%                                                                                                                                             ➜  ~ curl -X POST "localhost:9200/cyzone/goods" -H 'Content-Type: application/json' -d'
    {
        "id" : "143",
        "name" : "创业邦会员 — 优惠价格 happy",       
        "content" : "just for test vip,创业"                               
    }'
    {"_index":"cyzone","_type":"goods","_id":"16tEo2oBACchbOnLZmK4","_version":1,"result":"created","_shards":{"total":3,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1}%               
    

    可以指定id,也可以不指定id,指定id的时候就是cyzone/goods/具体id号,但是要把POST改为PUT,不指定ID,ES内部有自增的id。

    3.4测试搜索功能是否好用

    简单搜索
    https://www.elastic.co/guide/en/elasticsearch/reference/6.7/search-uri-request.html

    ➜  ~ curl -X GET "localhost:9200/cyzone/_search?q=id:143" 
    
    {"took":1,"timed_out":false,"_shards":{"total":3,"successful":3,"skipped":0,"failed":0},"hits":{"total":1,"max_score":1.0,"hits":[{"_index":"cyzone","_type":"goods","_id":"16tEo2oBACchbOnLZmK4","_score":1.0,"_source":
    {
        "id" : "143",
        "name" : "创业邦会员 — 优惠价格 happy",
        "content" : "just for test vip,创业"
    }}]}}%  
    

    复杂的搜索

    https://www.elastic.co/guide/en/elasticsearch/reference/6.7/search-request-body.html

    ➜  ~ curl -XPOST "http://localhost:9200/cyzone/_search?pretty"  -H 'Content-Type:application/json' -d'
    {
        "query" : { "match" : { "content" : "创业" }}
    }
    '
    {
      "took" : 3,
      "timed_out" : false,
      "_shards" : {
        "total" : 3,
        "successful" : 3,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 1,
        "max_score" : 0.2876821,
        "hits" : [
          {
            "_index" : "cyzone",
            "_type" : "goods",
            "_id" : "16tEo2oBACchbOnLZmK4",
            "_score" : 0.2876821,
            "_source" : {
              "id" : "143",
              "name" : "创业邦会员 — 优惠价格 happy",
              "content" : "just for test vip,创业"
            }
          }
        ]
      }
    }
    

    再次尝试搜索name

    ➜  ~ curl -XPOST "http://localhost:9200/cyzone/_search?pretty"  -H 'Content-Type:application/json' -d'
    {
        "query" : { "match" : { "name" : "创业" }}
    }
    '
    {
      "took" : 3,
      "timed_out" : false,
      "_shards" : {
        "total" : 3,
        "successful" : 3,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 2,
        "max_score" : 0.2876821,
        "hits" : [
          {
            "_index" : "cyzone",
            "_type" : "goods",
            "_id" : "16tEo2oBACchbOnLZmK4",
            "_score" : 0.2876821,
            "_source" : {
              "id" : "143",
              "name" : "创业邦会员 — 优惠价格 happy",
              "content" : "just for test vip,创业"
            }
          },
          {
            "_index" : "cyzone",
            "_type" : "goods",
            "_id" : "1qtDo2oBACchbOnLTGLF",
            "_score" : 0.2876821,
            "_source" : {
              "id" : "36",
              "name" : "创业邦会员 — 找人、找钱、找项目!",
              "content" : "这其实是内容,但是中间有双引号和单引号,我暂时不拼接了"
            }
          }
        ]
      }
    }
    

    4.导入数据库已有的数据

    使用logstash工具同步数据库的数据到ES,不仅仅是logstash,但是这个推荐的比较多,官方网站上有这个。
    https://www.elastic.co/cn/products/logstash

    ➜  Desktop brew install logstash     
    Updating Homebrew...
    ==> Auto-updated Homebrew!
    Updated 2 taps (homebrew/core and homebrew/cask).
    ==> New Formulae
    imapsync
    ==> Updated Formulae
    aliyun-cli      conan           i2pd            joplin          lxc             postgresql@9.6  socat           wtf
    calicoctl       doctl           imagemagick     juju            netpbm          pygobject       ttyd            yarn
    cfn-lint        glooctl         istioctl        kotlin          oniguruma       serverless      ucloud          youtube-dl
    checkbashisms   gtk-doc         jhipster        kubeprod        opencoarrays    skaffold        utf8proc
    
    ==> Downloading https://artifacts.elastic.co/downloads/logstash/logstash-oss-7.0.1.tar.gz
    ######################################################################## 100.0%
    ==> Caveats
    Configuration files are located in /usr/local/etc/logstash/
    
    To have launchd start logstash now and restart at login:
      brew services start logstash
    Or, if you don't want/need a background service you can just run:
      logstash
    ==> Summary
      /usr/local/Cellar/logstash/7.0.1: 12,515 files, 283.9MB, built in 22 minutes 49 seconds
    

    进入logstash 相关目录下安装插件logstash-input-jdbc

    cd  /usr/local/Cellar/logstash/7.0.1
    ./logstash-plugin install logstash-input-jdbc
    

    插件使用文档:https://www.elastic.co/guide/en/logstash/7.0/plugins-inputs-jdbc.html

    input {
          jdbc {
            type=> 'goods'
            jdbc_driver_library => "/usr/local/Cellar/logstash/7.0.1/bin/mysql-connector-java-5.1.45/mysql-connector-java-5.1.45-bin.jar"
            jdbc_driver_class => "com.mysql.jdbc.Driver"
            jdbc_connection_string => "jdbc:mysql://127.0.0.1:3307/topcms_cyzone_cn_test"
            jdbc_user => "localhost"
            jdbc_password => "123456"
            statement => "SELECT id,name,content from shop order by id desc"
            tracking_column => "id"
            jdbc_paging_enabled=>"true"
            jdbc_page_size => "5000"
            #tracking_column => "timestamp"
            #tracking_column_type=>numeric
            schedule => "* * * * *"
          }
        }
        filter {
        }
        output {
            if[type]=="goods"{
                elasticsearch {
                    hosts => ["127.0.0.1:9200"]
                    index => "cyzone"
                    document_id => "%{id}"
                                document_type=>"goods"
                }
            }
            stdout {
                codec => json_lines
            }
        }
    
    

    配置参数具体以官方文档为准,不同的版本有不同的设置效果,比如document_type这个字段文档说以后要废弃,但是我用的是7.0,这个地方还必须用。

    document_type参数在下一个大版本废弃.png

    重复之前的测试,发现数据确实导入了,这里只导入了goods表的数据。如果需要导入其他的数据,可以在上面脚本接着扩展,后面还要对参数具体研究,来实现数据库变化的时候自动同步数据到ES。

    5.安装Elasticsearch的PHP库

    https://github.com/elastic/elasticsearch-php

    自己根据实际情况封装ES管理类。

    1.基础抽象类,提供公共的增删改成方法。

    <?php
    
    namespace app\common\lib\es;
    
    use Elasticsearch\ClientBuilder;
    use app\common\es\lib\ESBaseDoc;
    
    /**
     * 基本的ES工具类
     * Class CyEsTool
     * @package app\common\es\goods
     */
    abstract class AbstractES
    {
        /**
         * @var ClientBuilder
         */
        private $client;
    
        private static $instance;
    
        /**
         * 索引名称相当于数据库
         * @var string
         */
        protected $index = "cyzone";
    
        /**
         * 索引类型,相当于表
         * @var string
         */
        protected $type = "";
    
        /**
         * 创建索引的时候的mapping信息
         * @var array
         */
        protected $mappings = [
    
        ];
    
        /**
         * 默认的mappings信息
         * @var array
         */
        private $defaultMappings = [
    
            '_default_' => [ //默认配置,每个类型缺省的配置使用默认配置
                '_all' => [   //  关闭所有字段的检索
                    'enabled' => 'false'
                ],
                '_source' => [   //  存储原始文档
                    'enabled' => 'true'
                ],
            ]
        ];
    
        /**
         * 创建索引的时候的配置信息
         * @var array
         */
        private $setting = [
            "index" => [
                "number_of_shards" => 3,
                "number_of_replicas" => 2
            ]
        ];
    
        private function __construct()
        {
            $this->client = ClientBuilder::create()
                ->setHosts(['127.0.0.1:9200'])
                ->build();
        }
    
        public static function getInstance()
        {
            if (is_null(self::$instance)) {
                self::$instance = new static();
            }
            return self::$instance;
        }
    
        /**
         * 获取默认的搜索字段,就是mapping里面的配置
         * @param array $field
         * @param bool $exceptId
         * @return array
         */
        protected function getSearchFiled($field = [], $exceptId = true)
        {
            if ($field) {
                return $field;
            }
            $properties = $this->mappings[$this->type]['properties']??[];
            if (empty($properties)) {
                return [];
            }
            $fields = array_keys($properties);
            foreach ($fields as $key => $value) {
                if ($exceptId && strpos($value, "id") !== false) {
                    unset($fields[$key]);
                }
            }
            return $fields;
        }
    
        /**
         * 查看Mapping
         */
        public function getMappings()
        {
            $params = [
                'index' => $this->index
            ];
            $res = $this->client->indices()->getMapping($params);
            return $res;
        }
    
        /**
         * 修改Mapping
         * @return array
         */
        public function putMappings()
        {
            $mappings = array_merge($this->defaultMappings, $this->mappings);
            $params = [
                'index' => $this->index,
                'type' => $this->type,
                'body' => [
                    $mappings
                ]
            ];
    
            return $this->client->indices()->putMapping($params);
        }
    
        /**
         * 插入单条的文档
         * @param ESBaseDoc $baseDoc
         * @return array
         */
        public function insertOneDoc(ESBaseDoc $baseDoc)
        {
            //可以对param适当做些检查
            $params = [
                'index' => $this->index,
                'type' => $this->type,
                'body' => [
                    $baseDoc->toArray()
                ]
            ];
            return $this->client->index($params);
        }
    
        /**
         * @param ESBaseDoc[] $docArray
         */
        public function postBulkDoc(array $docArray)
        {
            if (count($docArray) == 0) {
                return;
            }
            $params = [];
            for ($i = 0; $i < count($docArray); $i++) {
                $params['body'][] = [
                    'index' => [
                        '_index' => $this->index,
                        '_type' => $this->type,
                    ]
                ];
                $params['body'][] = [
                    $docArray[$i]->toArray()
                ];
            }
            $this->client->bulk($params);
        }
    
        /**
         * 根据id获得doc
         * @param $id
         * @return array|bool
         */
        public function getDocById($id)
        {
            $params = [
                'index' => $this->index,
                'type' => $this->type,
                'id' => $id
            ];
            try {
                return $this->client->get($params);
            } catch (\Exception $exception) {
                return false;
            }
        }
    
        /**
         * 根据id更新文档的内容
         * @param $id
         * @param ESBaseDoc $baseDoc
         * @return array|bool
         */
        public function updateDocById($id, ESBaseDoc $baseDoc)
        {
    
            $params = [
                'index' => $this->index,
                'type' => $this->type,
                'id' => $id,
                'body' => [
                    'doc' => [
                        $baseDoc->toArray()
                    ]
                ]
            ];
            try {
                return $this->client->update($params);
            } catch (\Exception $exception) {
                return false;
            }
        }
    
        /**
         * 根据id删除文档的内容
         * @param $id
         * @return array |bool
         */
        public function deleteDocById($id)
        {
    
            $params = [
                'index' => $this->index,
                'type' => $this->type,
                'id' => $id
            ];
            try {
                return $this->client->delete($params);
            } catch (\Exception $exception) {
                return false;
            }
        }
    
    
        //Query的参数 https://www.elastic.co/guide/en/elasticsearch/reference/6.7/query-filter-context.html
        //https://es.xiaoleilu.com/054_Query_DSL/70_Important_clauses.html
        /**
         * 多个字段查询搜索,默认搜索可以用这个
         * @param $keyWords
         * @param array $field
         * @return array
         */
        public function search($keyWords, $field = [])
        {
            $params = [
                'index' => $this->index,
                'type' => $this->type,
                'body' => [
                    'query' => [
                        'multi_match' => [
                            'query' => $keyWords,
                            "fields" => $this->getSearchFiled($field)
                        ]
                    ]
                ]
            ];
    
            return $this->client->search($params);
        }
    }
    
    1. 具体业务类

    商品相关的ES类

    <?php
    /**
     * Created by PhpStorm.
     * User: guodong
     * Date: 2019/5/12
     * Time: 下午5:04
     */
    
    namespace app\common\es\lib;
    
    class ESBaseDoc
    {
    
        private $initParams;
    
        public function __construct(array $param)
        {
           foreach ($param as $key => $value){
               $reflect = new \ReflectionProperty(static::class,$key);
               if ($reflect->isPublic()){
                   if (property_exists($this,$key)){
                       $this->$key = $value;
                   }
               }
           }
           $this->initParams = $param;
        }
    
        public function toArray()
        {
            return $this->initParams;
        }
    }
    
    1. 具体的数据对应document对象。
    <?php
    /**
     * Created by PhpStorm.
     * User: guodong
     * Date: 2019/5/12
     * Time: 下午5:04
     */
    
    namespace app\common\es\lib;
    
    class ESBaseDoc
    {
    
        private $initParams;
    
        public function __construct(array $param)
        {
           foreach ($param as $key => $value){
               $reflect = new \ReflectionProperty(static::class,$key);
               if ($reflect->isPublic()){
                   if (property_exists($this,$key)){
                       $this->$key = $value;
                   }
               }
           }
           $this->initParams = $param;
        }
    
        public function toArray()
        {
            return $this->initParams;
        }
    }
    

    具体的业务类要继承这个类

    namespace app\common\lib\es\goods;
    
    use app\common\es\lib\ESBaseDoc;
    
    class ESGoodsDoc extends ESBaseDoc
    {
        public $id;
        public $name;
        public $content;
    }
    
    

    参考文档

    https://www.cnblogs.com/ajianbeyourself/p/5529575.html
    https://help.aliyun.com/document_detail/58107.html?spm=a2c4g.11186623.6.543.778473bf6G1rrB
    https://www.elastic.co/guide/en/elasticsearch/reference/6.7/index.html
    https://es.xiaoleilu.com/052_Mapping_Analysis/45_Mapping.html

    相关文章

      网友评论

          本文标题:PHP  Elasticsearch 6.7初步学习实践

          本文链接:https://www.haomeiwen.com/subject/pukaoqtx.html