美文网首页
smm 引入Lucene

smm 引入Lucene

作者: 晴天M雨天 | 来源:发表于2019-10-16 14:14 被阅读0次

全文检所估计大家在平时的开发中也遇到过,我这里整理了一个小demo
方便大家参考:
加入对应的jar


<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>{lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>{lucene.version}</version>
</dependency>

<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>{lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>{lucene.version}</version>
</dependency>


<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${lucene.version}</version>
</dependency>
2编写news类
创建索引并加入
package lc.ssm.lucene;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import static java.nio.file.Paths.*;

/**

  • Lucene 创建索引
  • @author moonxy

*/
public class CreateIndex {

public static void main(String[] args) {

    // 创建3个News对象
    News news1 = new News();
    news1.setId(1);
    news1.setTitle("安倍晋三本周会晤特朗普 将强调日本对美国益处");
    news1.setContent("日本首相安倍晋三计划2月10日在华盛顿与美国总统特朗普举行会晤时提出加大日本在美国投资的设想");
    news1.setReply(672);

    News news2 = new News();
    news2.setId(2);
    news2.setTitle("北大迎4380名新生 农村学生700多人近年最多");
    news2.setContent("昨天,北京大学迎来4380名来自全国各地及数十个国家的本科新生。其中,农村学生共700余名,为近年最多...");
    news2.setReply(995);

    News news3 = new News();
    news3.setId(3);
    news3.setTitle("特朗普宣誓(Donald Trump)就任美国第45任总统");
    news3.setContent("当地时间1月20日,唐纳德·特朗普在美国国会宣誓就职,正式成为美国第45任总统。");
    news3.setReply(1872);

    // 开始时间
    Date start = new Date();
    System.out.println("**********开始创建索引**********");
    // 创建IK分词器
    SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
    IndexWriterConfig icw = new IndexWriterConfig(analyzer);

// CREATE 表示先清空索引再重新创建
icw.setOpenMode(OpenMode.CREATE);
Directory dir = null;
IndexWriter inWriter = null;
// 存储索引的目录
Path indexPath = get("c://indexdir");

    try {
        if (!Files.isReadable(indexPath)) {
            System.out.println("索引目录 '" + indexPath.toAbsolutePath() + "' 不存在或者不可读,请检查");
            System.exit(1);
        }
        dir = FSDirectory.open(indexPath);
        inWriter = new IndexWriter(dir, icw);

        // 设置新闻ID索引并存储
        FieldType idType = new FieldType();
        idType.setIndexOptions(IndexOptions.DOCS);
        idType.setStored(true);

        // 设置新闻标题索引文档、词项频率、位移信息和偏移量,存储并词条化
        FieldType titleType = new FieldType();
        titleType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        titleType.setStored(true);
        titleType.setTokenized(true);

        FieldType contentType = new FieldType();
        contentType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        contentType.setStored(true);
        contentType.setTokenized(true);
        contentType.setStoreTermVectors(true);
        contentType.setStoreTermVectorPositions(true);
        contentType.setStoreTermVectorOffsets(true);

        Document doc1 = new Document();
        doc1.add(new Field("id", String.valueOf(news1.getId()), idType));
        doc1.add(new Field("title", news1.getTitle(), titleType));
        doc1.add(new Field("content", news1.getContent(), contentType));
        doc1.add(new IntPoint("reply", news1.getReply()));
        doc1.add(new StoredField("reply_display", news1.getReply()));

        Document doc2 = new Document();
        doc2.add(new Field("id", String.valueOf(news2.getId()), idType));
        doc2.add(new Field("title", news2.getTitle(), titleType));
        doc2.add(new Field("content", news2.getContent(), contentType));
        doc2.add(new IntPoint("reply", news2.getReply()));
        doc2.add(new StoredField("reply_display", news2.getReply()));

        Document doc3 = new Document();
        doc3.add(new Field("id", String.valueOf(news3.getId()), idType));
        doc3.add(new Field("title", news3.getTitle(), titleType));
        doc3.add(new Field("content", news3.getContent(), contentType));
        doc3.add(new IntPoint("reply", news3.getReply()));
        doc3.add(new StoredField("reply_display", news3.getReply()));

        inWriter.addDocument(doc1);
        inWriter.addDocument(doc2);
        inWriter.addDocument(doc3);

        inWriter.commit();

        inWriter.close();
        dir.close();

    } catch (IOException e) {
        e.printStackTrace();
    }
    Date end = new Date();
    System.out.println("索引文档用时:" + (end.getTime() - start.getTime()) + " milliseconds");
    System.out.println("**********索引创建完成**********");
}

}
此时磁盘下已经有对应的索引
package lc.ssm.lucene;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.ParseException;

public class QueryParseTest {

public static void main(String[] args) throws ParseException, IOException, org.apache.lucene.queryparser.classic.ParseException {
    // 搜索单个字段

// String field = "title";
// 搜索多个字段时使用数组
String[] fields = { "title", "content" };

    Path indexPath = Paths.get("c://indexdir");
    Directory dir = FSDirectory.open(indexPath);
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
    //QueryParser parser = new QueryParser(field, analyzer);
    String[] stringQuery = { "1", "学生" };
    BooleanClause.Occur[] occ = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };


    // 多域搜索
    Query query = MultiFieldQueryParser.parse(stringQuery, fields, occ, analyzer);

    // 关键字同时成立使用 AND, 默认是 OR
    //parser.setDefaultOperator(QueryParser.Operator.AND);
    // 查询语句
    System.out.println("Query:" + query.toString());

    // 返回前10条
    TopDocs tds = searcher.search(query, 10);
    for (ScoreDoc sd : tds.scoreDocs) {
        // Explanation explanation = searcher.explain(query, sd.doc);
        // System.out.println("explain:" + explanation + "\n");
        Document doc = searcher.doc(sd.doc);
        System.out.println("DocID:" + sd.doc);
        System.out.println("id:" + doc.get("id"));
        System.out.println("title:" + doc.get("title"));
        System.out.println("content:" + doc.get("content"));
        System.out.println("文档评分:" + sd.score);
    }
    dir.close();
    reader.close();
}

}
最后是编写查询类 这个也是比较简单的。

相关文章

网友评论

      本文标题:smm 引入Lucene

      本文链接:https://www.haomeiwen.com/subject/ggtumctx.html