美文网首页
Solr中文分词器配置及数据检索

Solr中文分词器配置及数据检索

作者: 孝为先 | 来源:发表于2016-05-29 11:36 被阅读0次

    <blockquote><h4>导入MySQL数据</h4></blockquote>

    *首先:在做检索数据之前,先将数据导入到Solr中,根据http://www.jianshu.com/p/7ce281b2be30

    *然后:这次我用的是article这张表,所以在以前的配置上要改相应的配置文件(schema.xml、data-config.xml),将IKAnalyzer2012_FF_hf1.jar拷贝到/solr/WEB-INF/lib,注意:必须是IKAnalyzer2012_FF_hf1.jar,别的jar包好像不怎么好使,我试了好几个jar包。

    schema.xml:
    
    <fieldType name="text_ik" class="solr.TextField">
        <analyzer class="org.wltea.analyzer.lucene.IKAnalyzer" />
       </fieldType>
       <field name="text_ikarticle" type="text_ik" indexed="true" stored="true" required="true" /> 
    

    其中"text_ikarticle"这个字段必须是你要中文分词的那个字段

    data-config.xml:
    
    <dataConfig>  
        <dataSource type="JdbcDataSource"  
       driver="com.mysql.jdbc.Driver"  
       url="jdbc:mysql://127.0.0.1:3306/zhangxp"  
       user="root"  
       password="123456"/>  
        <document name="search_object">   
          <entity name="article"   query="select articleID, content  from article">   
            <field column="articleID" name="id"/>
            <field column="content" name="text_ikarticle"/>
          </entity>    
       </document> 
    </dataConfig>
    

    其中field这块是添加名称转化
    *其次:导入数据,导入数据如图:


    dataimport.png

    使用分词查询,结果如下

    分词查询.png

    <blockquote><h4>基于WEB全文检索</h4></blockquote>
    基于Web查询数据,关键词用红字标注
    SolrArticleController .java

    package com.xx.controller;
    
    import java.util.ArrayList;
    import java.util.Collection;
    import java.util.List;
    import java.util.Map;
    
    import javax.servlet.http.HttpServletRequest;
    
    import org.apache.commons.lang.StringUtils;
    import org.apache.solr.client.solrj.SolrQuery;
    import org.apache.solr.client.solrj.impl.HttpSolrServer;
    import org.apache.solr.client.solrj.response.QueryResponse;
    import org.apache.solr.common.SolrDocument;
    import org.apache.solr.common.SolrDocumentList;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    import org.springframework.stereotype.Controller;
    import org.springframework.ui.Model;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.servlet.ModelAndView;
    
    import com.xx.other.bean.Article;
    
    
    @Controller
    @RequestMapping("/solr")
    public class SolrArticleController {
        private static final String USER_URL = "http://localhost:8188/solr/article";
        private static final Logger LOG = LoggerFactory.getLogger(SolrArticleController.class);
        private static HttpSolrServer solrServer = new HttpSolrServer(USER_URL);
        @RequestMapping(value="/query")
        public String queryArticle(HttpServletRequest request,Model model) throws Exception{
            String reqparam=request.getParameter("param");
            reqparam=new String(reqparam.getBytes("iso-8859-1"), "utf-8");
            if(StringUtils.isEmpty(reqparam)){
                reqparam="*";
            }
    //      new String(reqparam.getBytes(charset), charset)
            SolrQuery query = new SolrQuery();
            query.setHighlight(true);
            query.addHighlightField("text_ikarticle");
            query.setHighlightSimplePre("<font color=\"red\">");
            query.setHighlightSimplePost("</font>");
            query.setHighlightSnippets(1);
            query.setHighlightFragsize(100);
            query.set("q", "text_ikarticle:"+reqparam);
            QueryResponse response = solrServer.query(query);
            SolrDocumentList docList = response.getResults();
            Map<String, Map<String, List<String>>> highmap = response.getHighlighting();
            LOG.info("一共有 " + docList.getNumFound() + " 条记录");
            LOG.info("==================");
            List<Article> list=new ArrayList<Article>();
            Article article=null;
            for (SolrDocument doc : docList) {
                Collection<String> cols = doc.getFieldNames();
                for (String field : cols) {
                    LOG.info(field + ":" + doc.get(field));
                }
                LOG.info("==================");
                LOG.info(highmap.get(doc.get("id")).get("text_ikarticle").toString());
                article=new Article(doc.get("id").toString(),highmap.get(doc.get("id")).get("text_ikarticle").toString());
                list.add(article);
            }
            model.addAttribute("list", list);
            return "solrArticle";
        }
    }
    

    solrArticle.jsp

    <%@ page language="java" contentType="text/html; charset=UTF-8"
        pageEncoding="UTF-8"%>
    <%@ taglib prefix="c"  uri="http://java.sun.com/jsp/jstl/core" %>
    <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
    <html>
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
    <title>Insert title here</title>
    </head>
    <body>
    <ul>
    <c:forEach items="${list}" var="article" varStatus="vs">  
    <li>${article.articleID}===>${article.content}</li>
    </c:forEach>    
    </ul>
    </body>
    </html>
    

    检索成功,显示数据如图所示:

    检索一半.png 检索你的话.png

    相关文章

      网友评论

          本文标题:Solr中文分词器配置及数据检索

          本文链接:https://www.haomeiwen.com/subject/ztkqdttx.html