美文网首页
Ceph RGW:数据的存储及寻址

Ceph RGW:数据的存储及寻址

作者: 圣地亚哥_SVIP | 来源:发表于2019-02-12 10:54 被阅读0次

    RGW数据分布及寻址

    RGW是一个对象处理网关。数据实际存储在ceph集群中。利用librados的接口,与ceph集群通信。RGW主要存储三类数据:元数据(metadata)、索引数据(bucket index)、数据(data)。这三类数据一般存储在不同的pool中,元数据也分多种元数据,存在不同的ceph pool中。

    1、 Metadata
    元数据信息包括:user,bucket,以及bucket.instance。其中:
    user: 主要是对象存储的用户信息
    bucket:主要维护bucket name与bucket instance id之间的映射信息
    bucket.instance:维护了bucket instance信息

    查看user的元数据如下:
    radosgw-admin metadata list user:

    //user相关的数据信息
    [
        "ups3",
        "56789abcdef0123456789abcdef0123456789abcdef0123456789abcdef01234",
        "testx$9876543210abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
        "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
        "test",
        "testid"
    ]
    

    radosgw-admin metadata get user:testid:

    //获取用户testid的数据信息
    {
        "key": "user:testid",
        "ver": {
            "tag": "_cAI-F3h-MuIUGNtXh2RgHlf",
            "ver": 1
        },
        "mtime": "2018-11-05 08:45:47.819290Z",
        "data": {
            "user_id": "testid",
            "display_name": "M. Tester",
            "email": "tester@ceph.com",
            "suspended": 0,
            "max_buckets": 1000,
            "auid": 0,
            "subusers": [],
            "keys": [
                {
                    "user": "testid",
                    "access_key": "0555b35654ad1656d804",
                    "secret_key": "h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=="
                }
            ],
            "swift_keys": [],
            "caps": [],
            "op_mask": "read, write, delete",
            "default_placement": "",
            "placement_tags": [],
            "bucket_quota": {
                "enabled": false,
                "check_on_raw": false,
                "max_size": -1,
                "max_size_kb": 0,
                "max_objects": -1
            },
            "user_quota": {
                "enabled": false,
                "check_on_raw": false,
                "max_size": -1,
                "max_size_kb": 0,
                "max_objects": -1
            },
            "temp_url_keys": [],
            "type": "rgw",
            "mfa_ids": [],
            "attrs": [
                {
                    "key": "user.rgw.idtag",
                    "val": ""
                }
            ]
        }
    }
    
    

    radosgw-admin metadata list bucket:

    //bucket相关的元数据
    [
        "first"
    ]
    

    radosgw-admin metadata get bucket:first:

    //bucket相关的元数据
    {
        "key": "bucket:first",
        "ver": {
            "tag": "_Hx7x_Mor001U3WM8zfUAH6C",
            "ver": 1
        },
        "mtime": "2018-11-05 09:14:01.135441Z",
        "data": {
            "bucket": {
                "name": "first",
                "marker": "b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
                "bucket_id": "b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
                "tenant": "",
                "explicit_placement": {
                    "data_pool": "",
                    "data_extra_pool": "",
                    "index_pool": ""
                }
            },
            "owner": "testid",
            "creation_time": "2018-11-05 09:13:59.491939Z",
            "linked": "true",
            "has_bucket_info": "false"
        }
    }
    

    radosgw-admin metadata list bucket.instance:

    //bucket.instance相关的元数据
    [
        "first:b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1"
    ]
    

    radosgw-admin metadata get bucket.instance:first:{bucket_id}

    //bucket.instance相关的元数据
    {
        "key": "bucket.instance:first:b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
        "ver": {
            "tag": "_CUSoooY8ectqQPoZELOd_BE",
            "ver": 1
        },
        "mtime": "2018-11-05 09:14:01.132899Z",
        "data": {
            "bucket_info": {
                "bucket": {
                    "name": "first",
                    "marker": "b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
                    "bucket_id": "b96b1b04-cb67-4f0d-81ba-dbb70fd80678.4161.1",
                    "tenant": "",
                    "explicit_placement": {
                        "data_pool": "",
                        "data_extra_pool": "",
                        "index_pool": ""
                    }
                },
                "creation_time": "2018-11-05 09:13:59.491939Z",
                "owner": "testid",
                "flags": 0,
                "zonegroup": "98906840-3c9c-4402-beec-1616bcf2e07c",
                "placement_rule": "default-placement",
                "has_instance_obj": "true",
                "quota": {
                    "enabled": false,
                    "check_on_raw": false,
                    "max_size": -1,
                    "max_size_kb": 0,
                    "max_objects": -1
                },
                "num_shards": 0,
                "bi_shard_hash_type": 0,
                "requester_pays": "false",
                "has_website": "false",
                "swift_versioning": "false",
                "swift_ver_location": "",
                "index_type": 0,
                "mdsearch_config": [],
                "reshard_status": 0,
                "new_bucket_instance_id": ""
            },
            "attrs": [
                {
                    "key": "user.rgw.acl",
                    "val": "AgKNAAAAAwIXAAAABgAAAHRlc3RpZAkAAABNLiBUZXN0ZXIEA2oAAAABAQAAAAYAAAB0ZXN0aWQPAAAAAQAAAAYAAAB0ZXN0aWQFAzsAAAACAgQAAAAAAAAABgAAAHRlc3RpZAAAAAAAAAAAAgIEAAAADwAAAAkAAABNLiBUZXN0ZXIAAAAAAAAAAAAAAAAAAAAA"
                },
                {
                    "key": "user.rgw.idtag",
                    "val": ""
                }
            ]
        }
    }
    

    2、Bucket Index
    bucket index主要维护的是一个bucket中object的索引信息。一个bucket对应一个或多个rados object(开启bucket shards下)。维护的是一个key-val的map结构,map存放在object的omap(rocksdb)中,key对应的rgw object,val是关于rgw object的一些元数据信息,检索bucket的存放的object时,需要这些信息。omap也包含一个Header,其存放的是bucket account info,如此bucket中Object的个数,总的size等。
    3、Data
    rgw object内容,存放在一个或多个rados object中。rados object分为header和tail部分,header最多可以容纳512KB的数据,如果一个rgw object的大小小于512KB,那么只有header。否则剩余的数据会按照集群rados object的大小条带化分割成多个rados object。

    数据检索路径

    • .rgw.root
    • {zone}.rgw.control
    • {zone}.rgw.meta
    • {zone}.rgw.log
    • {zone}.rgw.buckets.index
    • {zone}.rgw.buckets.data
    • {zone}.rgw.buckets.non-ec

    在Pool: {zone}.rgw.meta利用namespace隔离多个存储空间:

    • root: bucket及bucket-instance
    • users.keys: 用户key
    • users.email:用户Email,object的key值=email
    • users.swift: swift账号
    • users.uid: s3用户及用户的Bucket信息
    • roles:
    • heap:

    对于Pool: {zone}.rgw.log也包含多个namespace:

    • gc: 垃圾清理
    • lc: lifecycle
    • reshard:

    当检索对象存储中的一个object时,会包含三个要素:user,bucket,object。user主要是RGW用于获取user id验证ACL;bucket及obejct用于确定object在pool中的位置。

    User

    user数据存储在{zone}.rgw.meta:users.uid中,如下:

    [root@luminous1 ~]# rados -p upc.rgw.meta -N users.uid ls
    ups3
    ups3.buckets
    

    包含两部分: ups3: user本身信息; ups3.buckets: 用户所属的bucket。

    ups3: 用户的基本信息,及ACL/Bucekt Quota/User Quota等;对应struct RGWUserInfo, 定义于rgw_common.h。
    ups3.buckets:用户所属的Buckets,key-value结构,存放于omap结构中;对应struct cls_user_bucket_entry,定义于rgw_common.h,数据操作如下:

    # rados -p upc.rgw.meta  -N users.uid listomapkeys ups3.buckets
    first
    # rados -p upc.rgw.meta  -N users.uid getomapval ups3.buckets first ups3_bucket
    Writing to ups3_bucket
    # ceph-dencoder import ups3_bucket type cls_user_bucket_entry decode dump_json
        {
            "bucket": {
                "name": "first",
                "marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
                "bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4"
            },
            "size": 1887,
            "size_rounded": 4096,
            "creation_time": "2019-07-22 02:48:17.945890Z",
            "count": 1,
            "user_stats_sync": "true"
        }
    

    通过{uid}.buckets查到用户具有哪些buckets,并且这些bucket以下基本数据。

    Bucket

    Bucket信息存在在{zone}.rgw.meta:root中,如下:

    [root@luminous1 ~]# rados -p upc.rgw.meta  -N root ls
    .bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4
    first
    

    first: 记录了bucket与bucket_instance_id的对应关系,其对应于数据结构:struct RGWBucketEntryPoint
    .bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4: bucket instance;寻址方式:.bucket.meta.{tenant}:{bucket.name}:{bucket_id};对应结构体:struct RGWBucketInfo。
    其中Bucket ACL及IAM Policy存放在bucket instance object的attr中。如下:

    # rados -p upc.rgw.meta  -N root get .bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 bucket_instance
    通过读取bucket-instance object获取bucket的元数据信息
    # ceph-dencoder import bucket_instance type RGWBucketInfo decode dump_json
    {
        "bucket": {
            "name": "first",
            "marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
            "bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
            "tenant": "",
            "explicit_placement": {
                "data_pool": "",
                "data_extra_pool": "",
                "index_pool": ""
            }
        },
        "creation_time": "2019-07-22 02:48:17.945890Z",
        "owner": "ups3",
        "flags": 0,
        "zonegroup": "2e7ac7db-8e21-43ed-9f3c-5a061ce1c7e3",
        "placement_rule": "default-placement",
        "has_instance_obj": "true",
        "quota": {
            "enabled": false,
            "check_on_raw": false,
            "max_size": -1,
            "max_size_kb": 0,
            "max_objects": -1
        },
        "num_shards": 0,
        "bi_shard_hash_type": 0,
        "requester_pays": "false",
        "has_website": "false",
        "swift_versioning": "false",
        "swift_ver_location": "",
        "index_type": 0,
        "mdsearch_config": [],
        "reshard_status": 0,
        "new_bucket_instance_id": ""
    }
    

    获取Bucket ACL及IAM Policy数据如下:

    ACL:
    
    #rados -p upc.rgw.meta  -N root getxattr .bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 user.rgw.acl > acl
    #ceph-dencoder type RGWAccessControlPolicy import acl decode dump_json
        {
            "acl": {
                "acl_user_map": [
                    {
                        "user": "ups3",
                        "acl": 15
                    }
                ],
                "acl_group_map": [],
                "grant_map": [
                    {
                        "id": "ups3",
                        "grant": {
                            "type": {
                                "type": 0
                            },
                            "id": "ups3",
                            "email": "",
                            "permission": {
                                "flags": 15
                            },
                            "name": "S3 User",
                            "group": 0,
                            "url_spec": ""
                        }
                    }
                ]
            },
            "owner": {
                "id": "ups3",
                "display_name": "S3 User"
            }
        }
    
    Bucket Policy:
    
    #rados -p upc.rgw.meta  -N root getxattr .bucket.meta.first:1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 user.rgw.iam-policy
    {
      "Version": "2012-10-17",
      "Statement": [{
        "Effect": "Allow",
        "Principal": {"AWS": ["arn:aws:iam::usfolks:user/fred"]},
        "Action": "s3:PutObjectAcl",
        "Resource": [
          "arn:aws:s3:::first/*"
        ]
      }]
    }
    

    Object

    • Bucket Index:

    Bucket Index: Bucket中包含的Object信息,都存放在一个或多个Object的 omap 中。此omap为一个key-value结构,key为object的名称,value对应struct rgw_bucket_dir_entry: cls_rgw_types.h
    Bucket Index Object:

    pool: '{zone}.rgw.buckets.index';
    Object名称:1. 无shard下,'.dir.{bucket_id}';2. shard,'.dir.{bucket_id}.{shard_id}'。
    

    如下:

    [root@luminous1 ~]# rados -p upc.rgw.buckets.index ls
    .dir.1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4
    

    在此bucket下,有一个object: ntp.conf:

    [root@luminous1 ~]# rados -p upc.rgw.buckets.index listomapkeys .dir.1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4
    ntp.conf
    

    检索value:

    [root@luminous1 ~]# rados -p upc.rgw.buckets.index getomapval .dir.1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 ntp.conf object_key
    Writing to object_key
    [root@luminous1 ~]# ceph-dencoder type rgw_bucket_dir_entry import object_key decode dump_json
    {
        "name": "ntp.conf",
        "instance": "",
        "ver": {
            "pool": 9,
            "epoch": 1
        },
        "locator": "",
        "exists": "true",
        "meta": {
            "category": 1,
            "size": 1887,
            "mtime": "2019-07-22 02:48:36.088530Z",
            "etag": "385c339c343a2495fd4479c992bfeb10",
            "owner": "ups3",
            "owner_display_name": "S3 User",
            "content_type": "text/plain",
            "accounted_size": 1887,
            "user_data": ""
        },
        "tag": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44192.174",
        "flags": 0,
        "pending_map": [],
        "versioned_epoch": 0
    }
    

    omap header记录了以下统计信息:

    [root@luminous1 ~]# rados -p upc.rgw.buckets.index getomapheader .dir.1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4 index_object_header
    Writing to index_object_header
    [root@luminous1 ~]# ceph-dencoder type rgw_bucket_dir_header import index_object_header decode dump_json
    {
        "ver": 33,
        "master_ver": 0,
        "stats": [
            1,
            {
                "total_size": 1887,
                "total_size_rounded": 4096,
                "num_entries": 1,
                "actual_size": 1887
            },
            3,
            {
                "total_size": 0,
                "total_size_rounded": 0,
                "num_entries": 0,
                "actual_size": 0
            },
            {
                "reshard_status": "none",
                "new_bucket_instance_id": "",
                "num_shards": -1
            }
        ]
    }
    
    • Object Data:

    对象存储object的数据存放在pool: {zone}.rgw.buckets.data中。object的构成及寻址分为以下两类:

    一个RGW Object可以由一个或多个rados object构成。其中第一个 object 是此RGW 的 head 对象,主要包含一些元数据信息,如manifest, ACLs, content type, ETag, and user-defined metadata。这些metadata存放在此head 对象的xattr中。其中manifest描述了此rgw object在分布情况。同时,此head对象,最多可额外容纳4MB数据,如果RGW Object大小下于4MB,那么此 RGW Object就不会分片,只有此 head 对象。
    如下检索:

    [root@luminous1 ~]# rados -p upc.rgw.buckets.data listxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_ntp.conf
    user.rgw.acl
    user.rgw.content_type
    user.rgw.etag
    user.rgw.idtag
    user.rgw.manifest
    user.rgw.pg_ver
    user.rgw.source_zone
    user.rgw.tail_tag
    user.rgw.x-amz-content-sha256
    user.rgw.x-amz-date
    user.rgw.x-amz-meta-s3cmd-attrs
    user.rgw.x-amz-storage-class
    
    1. 非multipart上传的object

    目前bucket下有一个ntp.conf<4MB。检索其manifest:

    # rados -p upc.rgw.buckets.data getxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_ntp.conf user.rgw.manifest > manifest
    
    # ceph-dencoder type RGWObjManifest import manifest decode dump_json
    {
        "objs": [],
        "obj_size": 1887,
        "explicit_objs": "false",
        "head_size": 1887,
        "max_head_size": 4194304,
        "prefix": ".Y3GeEIYgfMSqzZKW6xUfX-dPtPSH50f_",
        "rules": [
            {
                "key": 0,
                "val": {
                    "start_part_num": 0,
                    "start_ofs": 4194304,
                    "part_size": 0,
                    "stripe_max_size": 4194304,
                    "override_prefix": ""
                }
            }
        ],
        "tail_instance": "",
        "tail_placement": {
            "bucket": {
                "name": "first",
                "marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
                "bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
                "tenant": "",
                "explicit_placement": {
                    "data_pool": "",
                    "data_extra_pool": "",
                    "index_pool": ""
                }
            },
            "placement_rule": "default-placement"
        }
    }
    

    如上:
    max_head_size: 表示head对象最大size;
    head_size: 表示当前head 对象size;
    prefix: 用于在rados中分片object的寻址。

    RGW OBject ACL:

    # rados -p upc.rgw.buckets.data getxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_ntp.conf user.rgw.acl > object_acl
    # ceph-dencoder type RGWAccessControlPolicy import object_acl decode dump_json
    {
        "acl": {
            "acl_user_map": [
                {
                    "user": "ups3",
                    "acl": 15
                }
            ],
            "acl_group_map": [],
            "grant_map": [
                {
                    "id": "ups3",
                    "grant": {
                        "type": {
                            "type": 0
                        },
                        "id": "ups3",
                        "email": "",
                        "permission": {
                            "flags": 15
                        },
                        "name": "S3 User",
                        "group": 0,
                        "url_spec": ""
                    }
                }
            ]
        },
        "owner": {
            "id": "ups3",
            "display_name": "S3 User"
        }
    }
    

    上传一个>4MB的 RGW Object,检索其manifest信息:

    # dd if=/dev/zero of=./rgw_object bs=1024 count=13000
    # s3cmd put rgw_object s3://first
    # rados -p upc.rgw.buckets.data ls
        1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_rgw_object
        1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_1
        1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_3
        1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_ntp.conf
        1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_2
    如上,Object分为4个对象,head,3个shadow分片。手动拼接如下,检查md5值相同:
    
    # md5sum rgw_object 
        315e281f1e162ea635b56f7e0a2e25d8  rgw_object
    # rados -p upc.rgw.buckets.data get 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_1 1
    # cat 1 >> rgw_object_down 
    # rados -p upc.rgw.buckets.data get 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_2 2
    # cat 2 >> rgw_object_down 
    # rados -p upc.rgw.buckets.data get 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_.KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_3 3
    # cat 3 >> rgw_object_down 
    # md5sum rgw_object_down 
        315e281f1e162ea635b56f7e0a2e25d8  rgw_object_down
    

    Manifest信息:

    [root@luminous1 ~]# rados -p upc.rgw.buckets.data getxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_rgw_object user.rgw.manifest > manifest
    [root@luminous1 ~]# ceph-dencoder type RGWObjManifest import manifest decode dump_json
    {
        "objs": [],
        "obj_size": 13312000,
        "explicit_objs": "false",
        "head_size": 4194304,
        "max_head_size": 4194304,
        "prefix": ".KaKMSwXh-SMiV-CYraCvEIUgz-_4tfD_",
        "rules": [
            {
                "key": 0,
                "val": {
                    "start_part_num": 0,
                    "start_ofs": 4194304,
                    "part_size": 0,
                    "stripe_max_size": 4194304,
                    "override_prefix": ""
                }
            }
        ],
        "tail_instance": "",
        "tail_placement": {
            "bucket": {
                "name": "first",
                "marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
                "bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
                "tenant": "",
                "explicit_placement": {
                    "data_pool": "",
                    "data_extra_pool": "",
                    "index_pool": ""
                }
            },
            "placement_rule": "default-placement"
        }
    }
    

    根据manifest检索对象:

    - head: {bucket-id}_{object_name},最大容纳4MB大小数据;
    - shadow分片: {bucket-id}__shadow_{prefix}_{id} //id根据RGW Object大小及Rados Object(stripe_max_size)大小计算。
    
    1. multipart object

    对于一个大的RGW Object,会被切割成多个独立的RGW Object上传,称为multipart。multipar的优势是断点续传。s3接口默认切割大小为15MB。

    在此,上传一个60MB大小的Object。

    [root@luminous1 ~]# s3cmd put rgw_object s3://first
    upload: 'rgw_object' -> 's3://first/rgw_object'  [part 1 of 4, 15MB] [1 of 1]
     15728640 of 15728640   100% in    1s    11.80 MB/s  done
    upload: 'rgw_object' -> 's3://first/rgw_object'  [part 2 of 4, 15MB] [1 of 1]
     15728640 of 15728640   100% in    1s    13.61 MB/s  done
    upload: 'rgw_object' -> 's3://first/rgw_object'  [part 3 of 4, 15MB] [1 of 1]
     15728640 of 15728640   100% in    1s    14.76 MB/s  done
    upload: 'rgw_object' -> 's3://first/rgw_object'  [part 4 of 4, 920kB] [1 of 1]
     942080 of 942080   100% in    0s     6.38 MB/s  done
    

    分成了四个部分上传,查看rados对象:

    [root@luminous1 ~]# rados -p upc.rgw.buckets.data ls  | sort
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__multipart_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.1
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__multipart_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.2
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__multipart_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.3
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__multipart_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.4
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_rgw_object
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.1_1
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.1_2
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.1_3
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.2_1
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.2_2
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.2_3
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.3_1
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.3_2
    1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4__shadow_rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd.3_3
    

    包含了三类对象,head,multipart,shadow

    - head: {bucket_id}_{object_name},只在xattr中存元数据,并不实际包含object data;
    - multipart: multipart分段首对象,{bucket_id}__multipart_{prefix}.{multipart_id},其中`multipart_id`根据`manifest`计算;
    - shadow: 从属于multipart的分段对象,{bucket_id}__shadow_{prefix}.{multipart_id}_{shadow_id},`shadow_id`:根据`manifest.rule.part_size`及 `manifest.rule.stripe_max_size`计算。
    

    multipart下的manifest:

    # rados -p upc.rgw.buckets.data getxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4_rgw_ob
    # ceph-dencoder type RGWObjManifest import manifest decode dump_json
    {
        "objs": [],
        "obj_size": 48128000,
        "explicit_objs": "false",
        "head_size": 0,
        "max_head_size": 0,
        "prefix": "rgw_object.2~BVWLOnCfNdXk4Tky2TB6hLD-VqgYbyd",
        "rules": [
            {
                "key": 0,
                "val": {
                    "start_part_num": 1,
                    "start_ofs": 0,
                    "part_size": 15728640,
                    "stripe_max_size": 4194304,
                    "override_prefix": ""
                }
            },
            {
                "key": 47185920,
                "val": {
                    "start_part_num": 4,
                    "start_ofs": 47185920,
                    "part_size": 942080,
                    "stripe_max_size": 4194304,
                    "override_prefix": ""
                }
            }
        ],
        "tail_instance": "",
        "tail_placement": {
            "bucket": {
                "name": "first",
                "marker": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
                "bucket_id": "1c60b268-0a5d-4718-ad02-e4b5bce824bf.44166.4",
                "tenant": "",
                "explicit_placement": {
                    "data_pool": "",
                    "data_extra_pool": "",
                    "index_pool": ""
                }
            },
            "placement_rule": "default-placement"
        }
    }
    

    所有的object的检索是根据上述manifest信息构建object index:

    - head_size: head对象大小,此处为0,表示无object data,只有xattr的元数据
    - obj_size: RGW Object的原始大小
    - prefix: object index前缀
    - rules: multipart 及 shadow object检索信息。如在此manifest表示有4个multipart,其中1-3的大小为15MB,第4个为920KB
    - rules.start_part_num: multipart 序号
    - rules.part_size: 此multipart的分段大小
    - rules.stripe_max_size: rados object的大小
    

    在上以上的信息中,此RGW Object大小为48128000字节,分为4段,三段15MB,最后一段为920KB。同时每段存储在rados集群中的条带化大小为4MB。因此15MB大小的分段,也分为4个rados object,一个multipart首部,及3个shadow分片。920KB大小的分段只有一个multipart首部。

    .rgw.root:

    包含的都是zone,zonegroup,realm等信息

    # rados -p .rgw.root ls
        period_config.b30c383b-25d3-46aa-8ef1-aedbd0196579
        default.realm
        periods.383529f4-1566-4061-a4ed-5a39188845dc.latest_epoch
        default.zone.b30c383b-25d3-46aa-8ef1-aedbd0196579
        zone_names.upc
        periods.383529f4-1566-4061-a4ed-5a39188845dc.1
        realms.b30c383b-25d3-46aa-8ef1-aedbd0196579
        default.zonegroup.b30c383b-25d3-46aa-8ef1-aedbd0196579
        realms_names.sh
        zonegroup_info.2e7ac7db-8e21-43ed-9f3c-5a061ce1c7e3
        realms.b30c383b-25d3-46aa-8ef1-aedbd0196579.control
        zone_info.1c60b268-0a5d-4718-ad02-e4b5bce824bf
        zonegroups_names.pd

    相关文章

      网友评论

          本文标题:Ceph RGW:数据的存储及寻址

          本文链接:https://www.haomeiwen.com/subject/zgzpaftx.html