lucene

作者: 冰与河豚鱼 | 来源:发表于2018-10-12 18:13 被阅读0次
    创建索引库
    public static void main(String[] args) throws IOException{
        //创建一个IndexWriter对象
        IndexWriter indexWriter = new IndexWriter(
        //索引库的创建路径
        FSDirectory.open(Paths.get("索引存储路径")),
        //索引配置,文本解析器
        new IndexWriterConfig(new StandardAnalyzer())
        );
    
        //把原始的文档转换成Document对象(把文档的信息封装到Field属性里面去)
        File file = new File("");
        Document doc = new convertToDocument(file);
    
        //使用IndexWriter把Document对象写到索引库里卖弄
        indexWriter.addDocument(doc);
    
        indexWriter.close();
    
    }
    
    //把原始的文档转化为Document对象
    private static Document convertToDocument(File file)throws FileNotFoundException{
        Document doc = new Document();
    
        //把文件的每一个信息封装到Field对象里面去
        //名字,路径,大小,内容
        Field nameField = new TextField("name",file.getName(),Store.YES);
        Field pathField = new StoreField("path",file.getAbsolutePath());
        Field sizeField = new LongPoint("size",fiel.length());
        Field contentField = new TextField("content",new BufferedReader(new File(file)));
    
        doc.add(nameField);
        doc.add(pathField);
        doc.add(sizeField);
        doc.add(contentField);
    }
    
    对文件夹下所有文件进行索引
    public class LuceneDirectoryIndex{
        private static final String DATA_PATH = "";
        private static final String INDEX_PATH = "";
        public static void main(String[] args) throws IOException{
        //把目录里面的文件都遍历出来
        File file = new File(DATA_PATH);
        // File[] files = file.listFiles();  //列出目录下的所有文件
    
      List<File> files = new ArrayList<>();
      
      //遍历文件(遍历目录和子目录,速度非常快)
        Files.walkFileTree(Paths.get(DATA_PATH),new SimpleFileVisitor<Path>){
           
           //遍历到每个文件的时候,visitFile方法都会被调用
           @Override
           public FileVisitResult(Path file,BasicFileAttributes atte) throws IOException{
               IF(file.getFileName().toString().endWith(".docx"));  //只查询word文件
               
               files.add(file.toFile());
    
               return FileVisitResult.CONTINUE;
           }
        }
    
        //对每个文件进行索引
        IndexWriter indexWriter = createIndexWriter();
        for(File f:files){
           indexWriter.addDocument(convertToDocument(f));
        }
           indexWriter.close();
        }
    
        //创建一个索引写入器
        private static IndexWriter createIndexWriter() throws IOException{
            IndexWriter indexWriter = new IndexWriter(
              FSDirectory.open(Paths.get("索引存储路径")),
              new IndexWriterConfig(new StandardAnalyzer()));
          return indexWriter;
        }
    
        //把原始的文档转化为Document对象
        private static Document convertToDocument(File file)throws FileNotFoundException{
        Document doc = new Document();
    
        //把文件的每一个信息封装到Field对象里面去
        //名字,路径,大小,内容
        Field nameField = new TextField("name",file.getName(),Store.YES);
        Field pathField = new StoreField("path",file.getAbsolutePath());
        Field sizeField = new LongPoint("size",fiel.length());
        Field contentField = new TextField("content",new BufferedReader(new File(file)));
    
        doc.add(nameField);
        doc.add(pathField);
        doc.add(sizeField);
        doc.add(contentField);
    }
    }
    
    文档搜索
    public class LuceneSearcher{
    
        private static final String INDEX_PATH = "";
    
          public static void main(String[] args) throws IOException {
             IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get("INDEX_PATH")));
             IndexSearcher seacher = new IndexSeacher(reader);
    
             Analyzer analyzer = new StandardAnalyzer();
    
             TopDocs topDocs = seacher.seach(new TermQuery(new Term("name","spring")),10);
             ScoreDoc[] scoreDocs = topDocs.scoreDocs;
             for (ScoreDoc d:scoreDocs){
                int doc =d.doc; //每一条记录的id(理解为数据库里面的主键)
                Document doc = searcher.doc(id);
    
                String string = doc.get("name");
                String path = doc.get("path");
                String size = doc.get("size");
                String content = doc.get("content");
    
                String msg = "name: %s path: %s size: %s content: %s \n";
                System.out.println(msg,name,path,size,content);
             }
          }
    }

    相关文章

      网友评论

          本文标题:lucene

          本文链接:https://www.haomeiwen.com/subject/dywgaftx.html