美文网首页
elasticSearch 父子模型

elasticSearch 父子模型

作者: 念䋛 | 来源:发表于2022-05-23 08:58 被阅读0次

创建一个带子属性的映射

127.0.0.1:9200/user_index/_mapping   put方法
{
        "properties": {
            "login_name": {
                "type": "keyword"
            },
            "age ": {
                "type": "short"
            },
            "address": { //地址 创建了子属性
                "properties": {
                    "province": { //省份
                        "type": "keyword"
                    },
                    "city": { //城市
                        "type": "keyword"
                    },
                    "street": { //街道
                        "type": "keyword"
                    }
                }
            }
        }
}

创建数据

127.0.0.1:9200/user_index/_doc/1  put方法
{
    "login_name": "jack",
    "age": 25,
    "address": [
        {
            "province": "北京",
            "city": "北京",
            "street": "枫林三路"
        },
        {
            "province": "天津",
            "city": "天津",
            "street": "华夏路"
        }
    ]
}
127.0.0.1:9200/user_index/_doc/2 put方法
{
    "login_name": "rose",
    "age": 21,
    "address": [
        {
            "province": "河北",
            "city": "廊坊",
            "street": "燕郊经济开发区"
        },
        {
            "province": "天津",
            "city": "天津",
            "street": "华夏路"
        }
    ]
}

获取数据

127.0.0.1:9200/user_index/_search
{
    "query": {
        "bool": {
            "must": [
                {
                    "match": {
                        "address.province": "北京"  //注意这里的 address.province
                    }
                },
                {
                    "match": {
                        "address.city": "天津"
                    }
                }
            ]
        }
    }
}
结果
{
    "took": 820,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": 1.2401118,
        "hits": [
            {
                "_index": "user_index",
                "_type": "_doc",
                "_id": "1",
                "_score": 1.2401118,
                "_source": {
                    "login_name": "jack",
                    "age": 25,
                    "address": [
                        {
                            "province": "北京",
                            "city": "北京",
                            "street": "枫林三路"
                        },
                        {
                            "province": "天津",
                            "city": "天津",
                            "street": "华夏路"
                        }
                    ]
                }
            },
            {
                "_index": "user_index",
                "_type": "_doc",
                "_id": "2",
                "_score": 0.5469647,
                "_source": {
                    "login_name": "rose",
                    "age": 21,
                    "address": [
                        {
                            "province": "河北",
                            "city": "廊坊",
                            "street": "燕郊经济开发区"
                        },
                        {
                            "province": "天津",
                            "city": "天津",
                            "street": "华夏路"
                        }
                    ]
                }
            }
        ]
    }
}

注意这里结果并不是想要的结果,查询的条件是 省份是北京 城市天津
在实际生产中,我们是想要 省份是北京 城市是天津这两个条件作为一个整体,
而不是只要省份是北京的,或者城市为天津的数据都展示
普通的数组数据在ES中会被扁平化处理,处理方式如下:(如果字段需要分词,会将分词 数据保存在对应的字段位置,当然应该是一个倒排索引,这里只是一个直观的案例)
jack的数据呈现为
{
"login_name" : ["jack"]
"address.province" : [ "北京", "天津" ],
"address.city" : [ "北京", "天津" ]
"address.street" : [ "华夏路","枫林三路" ]
}
可以看到省份和城市并没有强的关联性,所用查询得到的结果和预想的不一样.

nested

可以使用nested方式得出想要的结果
创建映射

127.0.0.1:9200/user_index_nested/_mapping
{
    "properties": {
        "login_name": {
            "type": "keyword"
        },
        "age ": {
            "type": "short"
        },
        "address": {
            "type": "nested", //增加nested 内嵌属性
            "properties": {
                "province": {
                    "type": "keyword"
                },
                "city": {
                    "type": "keyword"
                },
                "street": {
                    "type": "keyword"
                }
            }
        }
    }
}

添加数据和普通模式一样
获取数据

127.0.0.1:9200/user_index_nested/_search get 请求
{
    "query": {
        "bool": {
            "must": [
                {
                    "nested": {
                        "path": "address",
                        "query": {
                            "bool": {
                                "must": [
                                    {
                                        "match": {
                                            "address.province": "北京"
                                        }
                                    },
                                    {
                                        "match": {
                                            "address.city": "北京"
                                        }
                                    }
                                ]
                            }
                        }
                    }
                }
            ]
        }
    }
}

这样得到的结果就是预期的结果
nested模型存储结构为

{
  "login_name" : "jack"
}
{
  "address.province" : "北京",
  "address.city" : "北京",
  "address.street" : "枫林三路"
}
{
  "address.province" : "天津",
  "address.city" : "天津",
  "address.street" : "华夏路",
}

可以看到并没有扁平化

父子关系数据建模

nested object的建模,有个不好的地方,就是采取的是类似冗余数据的方式,将多个数据都放在一起了,维护成本就比较高
每次更新,需要重新索引整个对象(包括跟对象和嵌套对象)
ES 提供了类似关系型数据库中 Join 的实现。使用 Join 数据类型实现,可以通过 Parent / Child 的关系,从而分离两个对象.父文档和子文档是两个独立的文档
更新父文档无需重新索引整个子文档。子文档被新增,更改和删除也不会影响到父文档和其他子文档。
1.每一个mapping下只能有一个join类型的字段。
2.父文档和子文档必须在同一个分片(shard)上。即: 增删改查一个子文档都必须和父文档使用相同的 routing key。
3.每个元素只能有一个父,但是可以存在多个子。
4.可以为一个已经存在的 join 字段增加新的关联关系。
5.可以为一个已经是父的元素增加一个子元素。

join数据类型在elasticsearch中不应该像关系型数据库那种使用。而且has_child和
has_parent都是比较消耗性能的。
只有当 子的数据 远远大于 父的数据时,使用join才是有意义的。比如:一个博客下,有多个评论。

创建父映射

127.0.0.1:9200/my_blogs
{
    "mappings": {
        "properties": {
            "blog_comments_relation": { //名称可自己定义
                "type": "join",  //join类型
                "relations": {
                    "blog": "comment"  //父子的关联关系 blog为父
                }
            },
            "content": {
                "type": "text"
            },
            "title": {
                "type": "keyword"
            }
        }
    }
}

创建父数据

127.0.0.1:9200/my_blogs/_doc/blog1
{
    "title": "Learning Elasticsearch",
    "content": "learning ELK is happy",
    "blog_comments_relation": {  //关联的名称,这个要与映射对应
        "name": "blog" //父定义名称为blog,与映射对应
    }
}
127.0.0.1:9200/my_blogs/_doc/blog2
{
    "title": "Learning Hadoop",
    "content": "learning Hadoop",
    "blog_comments_relation": {
        "name": "blog"
    }
}

创建子数据

127.0.0.1:9200/my_blogs/_doc/comment1?routing=blog1  puti请求 routing是为了和父数据路到一个分片中,还有要注意的是虽然是子数据,索引名称依然是my_blogs
{
    "comment": "I am learning ELK",
    "username": "Jack",
    "blog_comments_relation": {
        "name": "comment",
        "parent": "blog1"
    }
}
127.0.0.1:9200/my_blogs/_doc/comment3?routing=blog2
{
    "comment": "I like Hadoop!!!!!",
    "username": "Jack",
    "blog_comments_relation": {
        "name": "comment",
        "parent": "blog2"
    }
}
127.0.0.1:9200/my_blogs/_doc/comment3?routing=blog2
{
    "comment": "Hello Hadoop",
    "username": "Bob",
    "blog_comments_relation": {
        "name": "comment",
        "parent": "blog2"
    }
}

数据查询
has_child 通过子查询父数据

127.0.0.1:9200/my_blogs/_search
{
    "query": {
        "has_child": {
            "type": "comment", //父子映射关系字段
            "query": {
                "match": {
                    "username": "Jack" //获取子数据中为jack所关联的主数据
                }
            }
        }
    }
}
得到结果
{
    "took": 16,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": 1.0,
        "hits": [
            {
                "_index": "my_blogs",
                "_type": "_doc",
                "_id": "blog1",
                "_score": 1.0,
                "_source": {
                    "title": "Learning Elasticsearch",
                    "content": "learning ELK is happy",
                    "blog_comments_relation": {
                        "name": "blog"
                    }
                }
            },
            {
                "_index": "my_blogs",
                "_type": "_doc",
                "_id": "blog2",
                "_score": 1.0,
                "_source": {
                    "title": "Learning Hadoop",
                    "content": "learning Hadoop",
                    "blog_comments_relation": {
                        "name": "blog"
                    }
                }
            }
        ]
    }
}
这里只得到主数据

has_parent 通过主数据获取子数据

127.0.0.1:9200/my_blogs/_search
{
    "query": {
        "has_parent": {
            "parent_type": "blog",  //映射关系
            "query": {
                "match": {
                    "title": "Learning Hadoop" //获取主数据title为Learning Hadoop关联的子数据
                }
            }
        }
    }
}
结果
{
    "took": 4,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 2,
            "relation": "eq"
        },
        "max_score": 1.0,
        "hits": [
            {
                "_index": "my_blogs",
                "_type": "_doc",
                "_id": "comment2",
                "_score": 1.0,
                "_routing": "blog2",
                "_source": {
                    "comment": "I like Hadoop!!!!!",
                    "username": "Jack",
                    "blog_comments_relation": {
                        "name": "comment",
                        "parent": "blog2"
                    }
                }
            },
            {
                "_index": "my_blogs",
                "_type": "_doc",
                "_id": "comment3",
                "_score": 1.0,
                "_routing": "blog2",
                "_source": {
                    "comment": "Hello Hadoop",
                    "username": "Bob",
                    "blog_comments_relation": {
                        "name": "comment",
                        "parent": "blog2"
                    }
                }
            }
        ]
    }
}
只能获取到子数据

post-man请求地址
git@gitee.com:zhangjijige/file.git

相关文章

网友评论

      本文标题:elasticSearch 父子模型

      本文链接:https://www.haomeiwen.com/subject/ihxlprtx.html