lucene全文检索,还是直接看代码吧
lucene依赖
<!--lucene核心及其依赖-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>7.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>7.6.0</version>
</dependency>
<!--中文分词器-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>7.6.0</version>
</dependency>
IndexReader与IndeaSearch工厂类
新版里面都是用 DirectoryReader 生成IndexReader呢,这里跟老版本很不一样,网上很多都说的是lucene老版本
package cn.wgd.zmx.utils;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import java.io.File;
import java.io.IOException;
/**
* 用于维护IndexReader和IndexSearch
*/
public class LuceneFactory {
private static DirectoryReader indexReader = null;
private static IndexSearcher indexSearcher = null;
public static DirectoryReader getIndexReader(IndexWriter indexWriter) {
synchronized (Object.class) {
if (indexReader == null) {
synchronized (Object.class) {
if (indexReader == null) {
try {
indexReader = DirectoryReader.open(indexWriter);
} catch (IOException e) {
e.printStackTrace();
}
}else{
try {
DirectoryReader reader = DirectoryReader.openIfChanged(indexReader);
indexReader.close();
indexReader = reader;
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
return indexReader;
}
public static IndexSearcher getIndexSearch(IndexWriter indexWriter) {
synchronized (Object.class) {
if (indexSearcher == null) {
synchronized (Object.class) {
if (indexSearcher == null) {
DirectoryReader indexReader = LuceneFactory.getIndexReader(indexWriter);
indexSearcher = new IndexSearcher(indexReader);
}else{
try {
DirectoryReader directoryReader = DirectoryReader.openIfChanged(LuceneFactory.getIndexReader(indexWriter));
indexSearcher = new IndexSearcher(directoryReader);
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
return indexSearcher;
}
}
官网上有说到,DirectoryReader.open(indexWriter);创建IndexReader更快,并且IndexReader每次创建的话,开销非常大,所以上面使用了单例模式,并且如果IndexReader发生变化的时候,我们不直接创建,使用官网推荐的方法DirectoryReader.openIfChanged,先判断变化没,如果变化,我们就在旧的基础上创建IndexReader,然后再通过IndexReader创建IndexSearch开销会比较小。
package cn.wgd.zmx.utils;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.IOException;
import java.nio.file.Paths;
/**
* lucene工具类
* 可优化的地方:先将索引写入到内存,再批量写入到文件
*/
public class LuceneUtils {
private static final String INDEX_PATH = "indexDir/";
/**
* 索引存放得位置,设置再当前目录下
*/
private static Directory directory;
static {
try {
directory = FSDirectory.open(Paths.get(INDEX_PATH));
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 中文分词器
*/
private static SmartChineseAnalyzer smartChineseAnalyzer = new SmartChineseAnalyzer();
/**
* 创建索引写入配置
*/
private static IndexWriterConfig indexWriterConfig = new IndexWriterConfig(smartChineseAnalyzer);
/**
* 创建索引写入对象
*/
private static IndexWriter indexWriter;
static {
try {
indexWriter = new IndexWriter(directory, indexWriterConfig);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 将indexReader维护成单例
*/
private static DirectoryReader indexReader = null;
/**
* 将indexSearch也维护成单例的
*/
private static IndexSearcher indexSearch = LuceneFactory.getIndexSearch(indexWriter);
// 存放到内存中
// Directory directory = new RAMDirectory();
public LuceneUtils(){
}
/**
* 增加索引
*/
public static void addIndex(Document doc) throws IOException {
/**
* 通过设置IndexWrite的参数优化索引建立
*/
//将对象保存到索引库中
indexWriter.addDocument(doc);
/**
* 提交索引
*/
indexWriter.commit();
/**
* 索引关闭
*/
indexWriter.close();
}
/**
* 查询索引
*/
public static String findIndex(Term term, Integer n, String field) throws IOException, ParseException {
TermQuery termQuery = new TermQuery(term);
TopDocs search = indexSearch.search(termQuery, n);
ScoreDoc[] scoDoc = search.scoreDocs;
if (scoDoc == null || scoDoc.length == 0) {
System.out.println("索引不存在!");
}
String s = "";
for (int i = 0; i < scoDoc.length; i++) {
Document doc = indexSearch.doc(scoDoc[i].doc);
s += doc.getField(field);
}
return s;
}
/**
* 删除索引
*/
public static void delIndex(Term term) throws IOException {
TermQuery termQuery = new TermQuery(term);
indexWriter.deleteDocuments(termQuery);
indexWriter.close();
}
/**
* 更新索引
*/
public static void updateIndex(Term query, Document doc) throws IOException {
indexWriter.updateDocument(query, doc);
indexWriter.commit();
indexWriter.close();
}
/**
* 清空回收站,强制优化
*/
public static void forceDelete(){
try {
indexWriter.forceMergeDeletes();
} catch (IOException e) {
e.printStackTrace();
}finally {
if(indexWriter!=null){
try {
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 用于流的关闭
*/
}
上面带有注释
接下来看下测试,将IndexReader维护成单例和没有维护成单例的时候
image.png image.png
从上面可以很明显的看出,第一次因为需要创建索引,速度差不多,但是从后面读的次数很明显可以看出,速度有提升。
需要测试代码的私信我,不想在文章里面贴的太长,影响阅读
网友评论