美文网首页
java lire es cedd 图片相似搜索

java lire es cedd 图片相似搜索

作者: 乘以零 | 来源:发表于2022-06-15 16:10 被阅读0次

    我最喜欢直接贴代码

    es 8.2.2
    
    <dependency>
                <groupId>co.elastic.clients</groupId>
                <artifactId>elasticsearch-java</artifactId>
                <version>8.2.2</version>
            </dependency>
            <dependency>
                <groupId>org.elasticsearch.client</groupId>
                <artifactId>elasticsearch-rest-client</artifactId>
                <version>8.2.2</version>
            </dependency>
            <dependency>
                <groupId>org.elasticsearch</groupId>
                <artifactId>elasticsearch-x-content</artifactId>
                <version>8.2.2</version>
            </dependency>
            <dependency>
                <groupId>com.github.zengde</groupId>
                <artifactId>lire</artifactId>
                <version>1.0b2</version>
            </dependency>
    
        // 获取csclient
        private static ElasticsearchClient createElasticsearchClient() {
            String host = "10.6.30.139";
            Integer port = 9200;
            RestClient restClient = RestClient.builder(new HttpHost(host, port, "http")).build();
            ElasticsearchTransport transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
            ElasticsearchClient client = new ElasticsearchClient(transport);
            System.out.println(client);
            return client;
        }
    
    
    // 新建索引 dims指定144维度
    private static void createIndex() throws IOException {
            String mapping = "{" +
                    "  \"mappings\": {" +
                    /*                "    \"product\":{" +*/
                    "      \"properties\": {" +
                    "        \"imageVector\": {" +
                    "          \"type\": \"dense_vector\"," +
                    "          \"dims\": 144," +
                    "          \"index\": true,\n" +
                    "          \"similarity\": \"dot_product\"" +
                    "        }," +
                    "        \"imageUrl\": {" +
                    "          \"type\": \"text\"" +
                    "        }" +
                    "      }" +
                    /*"    }" +*/
                    "  }" +
                    "}";
            // 额外指定 index 为 true 是因为,为了实现 _knn_search,ES 必须在底层构建一个新的数据结构(目前使用的是 HNSW graph )。
            // similarity 指定向量相似度算法,可以是 l2_norm 、dot_product、cosine
            CreateIndexRequest request = new CreateIndexRequest.Builder().index(INDEX_NAME).withJson(IOUtils.toInputStream(mapping, StandardCharsets.UTF_8)).build();
            CreateIndexResponse response = client.indices().create(request);
            System.out.println(response);
        }
    
    //根据图片地址返回144维度cedd向量
    private static double[] imageVector(String url) throws Exception {
            url = OssUtils.processPic(url, "?x-oss-process=image/resize,m_lfit,h_250,w_250");
            System.out.println(url);
            BufferedImage img = ImageIO.read(new URL(url));
            CEDD lireFeature = new CEDD();
            lireFeature.extract(img);
            byte[] byteHistogram = lireFeature.getByteHistogram();
            printArray(byteHistogram);
            double[] ds = SerializationUtils.castToDoubleArray(byteHistogram);
            printArray(ds);
            return ds;
        }
    
    //根据图片地址返回144维度cedd向量
        private static List<Double> imageVectorList(String url) throws Exception {
            double[] searchVector = imageVector(url);
            List<Double> vectors = new ArrayList<>();
            for (double d : searchVector) {
                vectors.add(d);
            }
            return vectors;
        }
    
    // 增加索引
        public static void index(String url, Long productId) throws Exception {
            Map<String, Object> doc = new HashMap();
            doc.put("imageVector", imageVector(url));
            doc.put("imageUrl", OssUtils.processPic(url, ""));
            IndexRequest<Object> request = new IndexRequest.Builder<>().index(INDEX_NAME).id(productId + "").document(doc).build();
            IndexResponse response = client.index(request);
        }
    
    
    //搜索
    public static Pager<HashMap> search(String url, Integer pageNum, Integer pageSize) throws Exception {
            Pager<HashMap> pager = new Pager(pageNum, pageSize);
            List<Double> vectors = imageVectorList(url);
            InlineScript inlineScript = new InlineScript.Builder()
                    .source("cosineSimilarity(params.query_vector, 'imageVector') + 1.0")
                    .params("query_vector", JsonData.of(vectors)).build();
            Script script = new Script.Builder().inline(inlineScript).build();
            ScriptScoreQuery scriptScoreQuery = new ScriptScoreQuery.Builder().script(script).query(new MatchAllQuery.Builder().build()._toQuery()).build();
            SearchRequest request = new SearchRequest.Builder().index(INDEX_NAME).from(pager.getFirstResult()).size(pager.getPageSize())
                    .query(scriptScoreQuery._toQuery()).build();
            SearchResponse<HashMap> response = client.search(request, HashMap.class);
            List<HashMap> datas = new ArrayList<>();
            for (Hit<HashMap> hit : response.hits().hits()) {
    //            System.out.println(hit.id() + " -- " + hit.score() + " -- " + hit.source());
                HashMap data = hit.source();
                data.put("id", hit.id());
                data.put("score", hit.score());
                datas.add(data);
            }
            pager.setDatas(datas);
            return pager;
        }
    
    

    相关文章

      网友评论

          本文标题:java lire es cedd 图片相似搜索

          本文链接:https://www.haomeiwen.com/subject/oeuxvrtx.html