美文网首页
Lucene入门1:索引的增、查

Lucene入门1:索引的增、查

作者: 半帅气 | 来源:发表于2018-03-29 19:38 被阅读0次

    因工作需要接触Lucene,今天简单对Lucene索引的增删改查有了一个初步了解。先用起来,再逐步根据需要深入了解其原理。

    1. 新增索引

    步骤:

    1)创建Directory,指定索引存放目录
    2)创建索引写入配置对象,指定分词器
    3)创建索引写入对象IndexWriter
    4)创建Document对象,存储索引
    5)为Document添加Field
    6)添加Document到Index

    public class luceneIndexUtil {
    
        private int[] ids = {1, 2, 3};
        private String[] authors = {"jason", "neo", "rzexin"};
        private String[] articles = {"vim can do everything!", "go go", "blockchain"};
        private String[] file_paths = {"/tmp/lucene/a.txt", "/tmp/lucene/b.txt", "/tmp/lucene/c.txt"};
    
        private static String INDEX_PATH = "/tmp/lucene/index.1";
    
        public void createIndex() throws IOException {
    
            //1)创建Directory,指定索引存放目录
            FSDirectory dir = FSDirectory.open(Paths.get(INDEX_PATH));
    
            //2)创建索引写入配置对象,指定分词器
            // 因存在中文文档,使用中文分词器,默认是标准分词器(StandardAnalyzer)
            // pom.xml需添加库依赖:lucene-analyzers-smartcn
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new SmartChineseAnalyzer());
    
            //3)创建索引写入对象IndexWriter
            IndexWriter indexWriter = new IndexWriter(dir, indexWriterConfig);
    
            Document doc = null;
            for (int i=0; i<ids.length; ++i) {
                //4)创建Document对象,存储索引
                doc = new Document();
    
                //5)为Document添加Field
                //IntPoint - 对int字段,只索引不存储
                doc.add(new IntPoint("id", ids[i]));
                //存储Field的值
                doc.add(new StoredField("id", ids[i]));
    
                //StringField - 只索引不分词,即字符串作为一个整体进行索引
                //Field.Store.YES - 将这个域中内容完全存储到文件,方便进行文本还原
                doc.add(new StringField("author", authors[i], Field.Store.YES));
    
                //TextField - 索引并分词
                //Field.Store.NO - 不存储到文件,可以被索引,但内容不可还原
                doc.add(new TextField("article", articles[i], Field.Store.NO));
    
                //读取文件,默认:Field.Store.NO
                doc.add(new TextField("content", new FileReader(new File(file_paths[i]))));
    
                //6)添加Document到Index
                indexWriter.addDocument(doc);
            }
    
            indexWriter.close();
        }
    }
    

    测试:

        @Test
        public void testCreateIndex() throws IOException {
            luceneIndexUtil luceneIndexUtil = new luceneIndexUtil();
            luceneIndexUtil.createIndex();
        }
    
    执行后,生成文件:

    2. 搜索

    步骤:

    1)创建Directory
    2)创建IndexReader
    3)创建IndexSearcher
    4)创建搜索的Query
    5)返回TopDocs
    6)获取ScoreDoc对象
    7)获取具体Document对象
    8)从Document对象中获取需要值

        public void searchIndex() throws IOException, ParseException {
            //1)创建Directory
            FSDirectory dir = FSDirectory.open(Paths.get(INDEX_PATH));
    
            //2)创建IndexReader
            IndexReader indexReader = DirectoryReader.open(dir);
    
            //3)创建IndexSearcher
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    
            //4)创建搜索的Query
            QueryParser parser = new QueryParser("content", new SmartChineseAnalyzer());
            //搜索content包含关键字【天空】的文档
            Query query = parser.parse("天空");
            //QueryParser parser = new QueryParser("article", new StandardAnalyzer());
            //Query query = parser.parse("Go");
    
            //5)返回TopDocs
            TopDocs topDocs = indexSearcher.search(query, 10);
    
            //6)获取ScoreDoc对象
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for(ScoreDoc scoreDoc : scoreDocs) {
                //7)获取具体Document对象
                Document doc = indexSearcher.doc(scoreDoc.doc);
    
                //8)从Document对象中获取需要值
                System.out.printf("id:%s, author:%s, article:%s, content:%s\n",
                        doc.get("id"), doc.get("author"),
                        doc.get("article"), doc.get("content"));
            }
    
            indexReader.close();
        }
    

    测试:

        @Test
        public void testSearchIndex() throws IOException, ParseException {
            luceneIndexUtil luceneIndexUtil = new luceneIndexUtil();
            luceneIndexUtil.searchIndex();
        }
    

    相关文章

      网友评论

          本文标题:Lucene入门1:索引的增、查

          本文链接:https://www.haomeiwen.com/subject/cydvcftx.html