美文网首页
Elasticsearch7.1.0集成IK分词器实现oracl

Elasticsearch7.1.0集成IK分词器实现oracl

作者: lowkey港 | 来源:发表于2022-07-21 15:49 被阅读0次

    一.下载elasticsearch-analysis-ik源码下载

    image.png

    二.解压修改源码文件

    我这里使用idea开发

    修改maven依赖es版本号

    <elasticsearch.version>7.1.0</elasticsearch.version>
    

    引入oracle驱动

            <!-- oracle驱动包 -->
            <dependency>
                <groupId>com.oracle</groupId>
                <artifactId>ojdbc7</artifactId>
                <version>12.1.0.2</version>
            </dependency>
    
    

    创建循环线程类

    package org.wltea.analyzer.dic;
    
    import org.apache.logging.log4j.Logger;
    import org.wltea.analyzer.help.ESPluginLoggerFactory;
    
    
    /**
     * @Description: HotDictReloadThread 获取词典单子实例,并执行它的reLoadMainDict方法
     * @Author: HuGang  1042258937@qq.com
     * @CreateDate: 2022/7/19 10:06
     */
    public class HotDictReloadThread {
    
        private static final Logger log = ESPluginLoggerFactory.getLogger(HotDictReloadThread.class.getName());
        public void initial(){
            while (true) {
                log.info("正在调用HotDictReloadThread...");
                Dictionary.getSingleton().reLoadMainDict();
            }
        }
    }
    
    
    image.png

    在项目中找到Dictionary类修改initial方法

                        //启动自定义线程
                        pool.execute(() -> new HotDictReloadThread().initial());
    
    image.png

    准备数据库文件

    -- Create table
    create table T_DSJFX_IK_LEXICON
    (
      TEXT             VARCHAR2(15) not null,
      TYPE             INTEGER default 0,
      STATUS           INTEGER default 0,
      SEARCH_COUNT     INTEGER default 0,
      LAST_SEARCH_TIME DATE default sysdate
    )
    -- Add comments to the table 
    comment on table T_DSJFX_IK_LEXICON
      is 'ES ik分词器自定义词条';
    -- Add comments to the columns 
    comment on column T_DSJFX_IK_LEXICON.TEXT
      is '词条';
    comment on column T_DSJFX_IK_LEXICON.TYPE
      is '0扩展词库 1停用词库';
    comment on column T_DSJFX_IK_LEXICON.STATUS
      is '词条状态 0正常 1暂停使用';
    comment on column T_DSJFX_IK_LEXICON.SEARCH_COUNT
      is '搜索次数';
    comment on column T_DSJFX_IK_LEXICON.LAST_SEARCH_TIME
      is '最后搜索时间';
    -- Create/Recreate primary, unique and foreign key constraints 
    alter table T_DSJFX_IK_LEXICON
      add constraint PK_LEXICON_TEXT primary key (TEXT)
      using index 
      tablespace TS_JCJMR_DATA
      pctfree 10
      initrans 2
      maxtrans 255
      storage
      (
        initial 64K
        next 1M
        minextents 1
        maxextents unlimited
      );
    

    然后我们在项目的根路径的config目录下新建配置文件jdbc-reload.properties,内容如下

    #  公司地址
    # 数据库地址
    jdbc.url=jdbc:oracle:thin:@127.0.0.1:1521/hnkcdb
    # 数据库用户名
    jdbc.user=user
    # 数据库密码
    jdbc.password=password
    # 数据库查询扩展词库sql语句
    jdbc.reload.sql=select text as word from t_dsjfx_ik_lexicon t where t.type = '0' and t.status = '0'
    # 数据库查询停用词sql语句
    jdbc.reload.stopword.sql=select text as word from t_dsjfx_ik_lexicon t where t.type = '1' and t.status = '0'
    # 数据库查询间隔时间 每隔60秒请求一次
    jdbc.reload.interval=60
    

    在类中创建获取数据库连接类

        private Connection getConn(){
            Connection conn = null;
            //加载配置文件
            Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
            try {
                props.load(new FileInputStream(file.toFile()));
                logger.info("[==========]jdbc-reload.properties");
                for(Object key : props.keySet()) {
                    logger.info("[==========]" + key + "=" + props.getProperty(String.valueOf(key)));
                }
                Class.forName("oracle.jdbc.driver.OracleDriver");//反射
                //获取连接对象 驱动成功后进行连接
                conn= DriverManager.getConnection(props.getProperty("jdbc.url"), props.getProperty("jdbc.user"), props.getProperty("jdbc.password"));
            }catch (SQLException throwables) {
                throwables.printStackTrace();
            } catch (Exception e) {
                e.printStackTrace();
            }
            return conn;
        }
    

    找到Dictionary类的reLoadMainDict方法,可以看到在方面里面,有2个方法tmpDict.loadMainDict()和tmpDict.loadStopWordDict(),分别维护的是扩展词库和停用词库,一块先看一下对扩展词库的维护;

    在方法tmpDict.loadMainDict()中,我们在最后一行加载远程自定义词库后面新增一个方法this.loadMySQLExtDict(),用于加载oracle词库,在加载oracle词库之前,我们需先准备一下oracle相关的配置以及sql语句;在数据库中新建一张表,用户维护扩展词和停用词,表结构如下

    从oracle中加载动态词库

        private void loadMyDbExtDict(){
            Connection conn=null;
            Statement st=null;
            ResultSet rs=null;
            try {
                //获取连接对象 驱动成功后进行连接
                conn = getConn();
                //2、创建statement类对象,用来执行SQL语句
                st=conn.createStatement();
                //3、创建sql查询语句
                String sql=props.getProperty("jdbc.reload.sql");
                //4、执行sql语句并且换回一个查询的结果集
                rs=st.executeQuery(sql);
                while(rs.next()) {  //循环遍历结果集
                    String theWord = rs.getString("word");
                    logger.info("[==========]正在加载自定义IK扩展词库词条: " + theWord);
                    _MainDict.fillSegment(theWord.trim().toCharArray());
                }
                Thread.sleep(Integer.valueOf(String.valueOf(props.get("jdbc.reload.interval"))) * 1000);
            } catch (Exception e) {
                e.printStackTrace();
            }finally {
                if(rs != null) {
                    try {
                        rs.close();
                    } catch (SQLException e) {
                        logger.error("error", e);
                    }
                }
                if(st != null) {
                    try {
                        st.close();
                    } catch (SQLException e) {
                        logger.error("error", e);
                    }
                }
                if(conn != null) {
                    try {
                        conn.close();
                    } catch (SQLException e) {
                        logger.error("error", e);
                    }
                }
            }
        }
    
    this.loadMySQLExtDict();
    
    image.png

    停用词类似 依葫芦画瓢

    oracl依赖包直接添加(不添加的话 也可自行拷贝jar相应目录)

            <dependencySet>
                <outputDirectory/>
                <useProjectArtifact>true</useProjectArtifact>
                <useTransitiveFiltering>true</useTransitiveFiltering>
                <includes>
                    <include>com.oracle:ojdbc7</include>
                </includes>
            </dependencySet>
    
    image.png

    安装IK分词器插件

    完成上述步骤后,拿到elasticsearch-analysis-ik-7.8.0.zip插件,我们将其放在ES安装目录下的plugins目录下,新建一个ik文件夹,将其解压到ik文件夹下


    image.png

    三.常见问题

    异常1

    java.sql.SQLException: Column 'word' not found.
    此异常是因为编写sql时,查询的数据库字段需要起别名为 word,修改一下sql即可解决这个问题;

    异常2 困恼了我一个星期,找了各种答案都做不到,重点要记笔记的

    java.security.AccessControlException: access denied ("java.lang.management.ManagementPermission" "control")

    [2022-07-22T13:42:21,186][ERROR][o.e.b.ElasticsearchUncaughtExceptionHandler] [node-1] fatal error in thread [elasticsearch[node-1][clusterApplierService#updateTask][T#1]], exiting
    java.lang.ExceptionInInitializerError: null
        at oracle.jdbc.driver.BlockSource$ThreadedCachingBlockSource.<clinit>(BlockSource.java:402) ~[?:?]
        at oracle.jdbc.driver.BlockSource.createBlockSource(BlockSource.java:80) ~[?:?]
        at oracle.jdbc.driver.BlockSource.createBlockSource(BlockSource.java:70) ~[?:?]
        at oracle.jdbc.driver.PhysicalConnection.setBlockSource(PhysicalConnection.java:593) ~[?:?]
        at oracle.jdbc.driver.PhysicalConnection.<init>(PhysicalConnection.java:631) ~[?:?]
        at oracle.jdbc.driver.T4CConnection.<init>(T4CConnection.java:398) ~[?:?]
        at oracle.jdbc.driver.T4CDriverExtension.getConnection(T4CDriverExtension.java:31) ~[?:?]
        at oracle.jdbc.driver.OracleDriver.connect(OracleDriver.java:566) ~[?:?]
        at java.sql.DriverManager.getConnection(DriverManager.java:677) ~[java.sql:?]
        at java.sql.DriverManager.getConnection(DriverManager.java:228) ~[java.sql:?]
        at org.wltea.analyzer.dic.Dictionary.getConn(Dictionary.java:686) ~[?:?]
        at org.wltea.analyzer.dic.Dictionary.loadMyDbExtDict(Dictionary.java:589) ~[?:?]
        at org.wltea.analyzer.dic.Dictionary.loadMainDict(Dictionary.java:398) ~[?:?]
        at org.wltea.analyzer.dic.Dictionary.initial(Dictionary.java:151) ~[?:?]
        at org.wltea.analyzer.cfg.Configuration.<init>(Configuration.java:40) ~[?:?]
        at org.elasticsearch.index.analysis.IkTokenizerFactory.<init>(IkTokenizerFactory.java:15) ~[?:?]
        at org.elasticsearch.index.analysis.IkTokenizerFactory.getIkSmartTokenizerFactory(IkTokenizerFactory.java:23) ~[?:?]
        at org.elasticsearch.index.analysis.AnalysisRegistry.buildMapping(AnalysisRegistry.java:338) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.index.analysis.AnalysisRegistry.buildTokenizerFactories(AnalysisRegistry.java:174) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:159) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.index.IndexService.<init>(IndexService.java:165) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.index.IndexModule.newIndexService(IndexModule.java:398) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.indices.IndicesService.createIndexService(IndicesService.java:544) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.indices.IndicesService.createIndex(IndicesService.java:493) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.indices.IndicesService.createIndex(IndicesService.java:161) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.indices.cluster.IndicesClusterStateService.createIndices(IndicesClusterStateService.java:498) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.indices.cluster.IndicesClusterStateService.applyClusterState(IndicesClusterStateService.java:268) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.cluster.service.ClusterApplierService.lambda$callClusterStateAppliers$5(ClusterApplierService.java:478) ~[elasticsearch-7.1.0.jar:7.1.0]
        at java.lang.Iterable.forEach(Iterable.java:75) ~[?:?]
        at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateAppliers(ClusterApplierService.java:476) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.cluster.service.ClusterApplierService.applyChanges(ClusterApplierService.java:459) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.cluster.service.ClusterApplierService.runTask(ClusterApplierService.java:413) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.cluster.service.ClusterApplierService$UpdateTask.run(ClusterApplierService.java:164) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:681) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:252) ~[elasticsearch-7.1.0.jar:7.1.0]
        at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:215) ~[elasticsearch-7.1.0.jar:7.1.0]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]
        at java.lang.Thread.run(Thread.java:835) [?:?]
    Caused by: java.security.AccessControlException: access denied ("java.lang.management.ManagementPermission" "control")
        at java.security.AccessControlContext.checkPermission(AccessControlContext.java:472) ~[?:?]
        at java.security.AccessController.checkPermission(AccessController.java:1042) ~[?:?]
        at java.lang.SecurityManager.checkPermission(SecurityManager.java:408) ~[?:?]
        at sun.management.Util.checkAccess(Util.java:77) ~[?:?]
        at sun.management.Util.checkControlAccess(Util.java:85) ~[?:?]
        at sun.management.MemoryPoolImpl.setCollectionUsageThreshold(MemoryPoolImpl.java:213) ~[?:?]
        at oracle.jdbc.driver.BlockSource$ThreadedCachingBlockSource$BlockReleaserListener$1.run(BlockSource.java:376) ~[?:?]
        at java.security.AccessController.doPrivileged(AccessController.java:310) ~[?:?]
        at oracle.jdbc.driver.BlockSource$ThreadedCachingBlockSource$BlockReleaserListener.<init>(BlockSource.java:374) ~[?:?]
        at oracle.jdbc.driver.BlockSource$ThreadedCachingBlockSource$BlockReleaserListener.<clinit>(BlockSource.java:348) ~[?:?]
        ... 39 more
    

    解决方法:在plugin-security.policy文件中添加

    permission java.lang.management.ManagementPermission "control";
    
    image.png

    异常3:AccessControlException: access denied ("java.net.SocketPermission" "127.0.0.1:3306" "connect,resolve")

    在D:\tool\ELK\elasticsearch-7.1.0\jdk\conf\securityjava.polic文件中添加

    permission java.net.SocketPermission "*", "connect,resolve";
    #有相应报错的就添加
        permission java.lang.RuntimePermission "accessClassInPackage.sun.security.krb5";
        permission java.lang.RuntimePermission "accessDeclaredMembers";
        permission javax.management.MBeanServerPermission "createMBeanServer";
    
    image.png

    四、测试

    GET _analyze
    {
      "analyzer": "ik_max_word",
      "text": "微信"
    }
    
    image.png

    无法分词,在数据中添加“微信”等60秒


    image.png

    成功!!!

    mysql查看Elasticsearch7.8.0集成IK分词器改源码实现MySql5.7.2实现动态词库实时更新

    相关文章

      网友评论

          本文标题:Elasticsearch7.1.0集成IK分词器实现oracl

          本文链接:https://www.haomeiwen.com/subject/lammirtx.html