创建索引库
public static void main(String[] args) throws IOException{
//创建一个IndexWriter对象
IndexWriter indexWriter = new IndexWriter(
//索引库的创建路径
FSDirectory.open(Paths.get("索引存储路径")),
//索引配置,文本解析器
new IndexWriterConfig(new StandardAnalyzer())
);
//把原始的文档转换成Document对象(把文档的信息封装到Field属性里面去)
File file = new File("");
Document doc = new convertToDocument(file);
//使用IndexWriter把Document对象写到索引库里卖弄
indexWriter.addDocument(doc);
indexWriter.close();
}
//把原始的文档转化为Document对象
private static Document convertToDocument(File file)throws FileNotFoundException{
Document doc = new Document();
//把文件的每一个信息封装到Field对象里面去
//名字,路径,大小,内容
Field nameField = new TextField("name",file.getName(),Store.YES);
Field pathField = new StoreField("path",file.getAbsolutePath());
Field sizeField = new LongPoint("size",fiel.length());
Field contentField = new TextField("content",new BufferedReader(new File(file)));
doc.add(nameField);
doc.add(pathField);
doc.add(sizeField);
doc.add(contentField);
}
对文件夹下所有文件进行索引
public class LuceneDirectoryIndex{
private static final String DATA_PATH = "";
private static final String INDEX_PATH = "";
public static void main(String[] args) throws IOException{
//把目录里面的文件都遍历出来
File file = new File(DATA_PATH);
// File[] files = file.listFiles(); //列出目录下的所有文件
List<File> files = new ArrayList<>();
//遍历文件(遍历目录和子目录,速度非常快)
Files.walkFileTree(Paths.get(DATA_PATH),new SimpleFileVisitor<Path>){
//遍历到每个文件的时候,visitFile方法都会被调用
@Override
public FileVisitResult(Path file,BasicFileAttributes atte) throws IOException{
IF(file.getFileName().toString().endWith(".docx")); //只查询word文件
files.add(file.toFile());
return FileVisitResult.CONTINUE;
}
}
//对每个文件进行索引
IndexWriter indexWriter = createIndexWriter();
for(File f:files){
indexWriter.addDocument(convertToDocument(f));
}
indexWriter.close();
}
//创建一个索引写入器
private static IndexWriter createIndexWriter() throws IOException{
IndexWriter indexWriter = new IndexWriter(
FSDirectory.open(Paths.get("索引存储路径")),
new IndexWriterConfig(new StandardAnalyzer()));
return indexWriter;
}
//把原始的文档转化为Document对象
private static Document convertToDocument(File file)throws FileNotFoundException{
Document doc = new Document();
//把文件的每一个信息封装到Field对象里面去
//名字,路径,大小,内容
Field nameField = new TextField("name",file.getName(),Store.YES);
Field pathField = new StoreField("path",file.getAbsolutePath());
Field sizeField = new LongPoint("size",fiel.length());
Field contentField = new TextField("content",new BufferedReader(new File(file)));
doc.add(nameField);
doc.add(pathField);
doc.add(sizeField);
doc.add(contentField);
}
}
文档搜索
public class LuceneSearcher{
private static final String INDEX_PATH = "";
public static void main(String[] args) throws IOException {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get("INDEX_PATH")));
IndexSearcher seacher = new IndexSeacher(reader);
Analyzer analyzer = new StandardAnalyzer();
TopDocs topDocs = seacher.seach(new TermQuery(new Term("name","spring")),10);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc d:scoreDocs){
int doc =d.doc; //每一条记录的id(理解为数据库里面的主键)
Document doc = searcher.doc(id);
String string = doc.get("name");
String path = doc.get("path");
String size = doc.get("size");
String content = doc.get("content");
String msg = "name: %s path: %s size: %s content: %s \n";
System.out.println(msg,name,path,size,content);
}
}
}
网友评论