美文网首页
Ceph CrushMap及RGW Placement设置

Ceph CrushMap及RGW Placement设置

作者: 圣地亚哥_SVIP | 来源:发表于2019-08-08 15:36 被阅读0次

在前面博文介绍了Ceph集群及RGW的部署。同时提到,针对RGW,bucket-index对象性能要求高,不同用户具有不同的存储性能要求。因此,这篇文章介绍如何设计crush map,同时使得RGW能够支持不同存储性能要求的数据。

在现有集群中添加OSD

三个节点,各添加一个OSD。

#qemu-img create -f qcow2 lm1_journal2.qcow2 50G
#qemu-img create -f qcow2 lm2_journal2.qcow2 50G
#qemu-img create -f qcow2 lm3_journal2.qcow2 50G
#qemu-img create -f qcow2 lm1_osd3.qcow2 100G
#qemu-img create -f qcow2 lm2_osd3.qcow2 100G
#qemu-img create -f qcow2 lm3_osd3.qcow2 100G

界面挂载块设备

OSD盘设置:

#ansible ceph -m command -a "pvcreate /dev/vdf"
#ansible ceph -m command -a "vgcreate datavg3 /dev/vdf"
#ansible ceph -m command -a "lvcreate -n datalv3 -l 100%Free datavg3"

SSD盘设置(block.db, block.wal):

#ansible ceph -m command -a "parted /dev/vde mklabel gpt"
#ansible ceph -m command -a "parted /dev/vde mkpart primary 2048s 50%"
#ansible ceph -m command -a "parted /dev/vde mkpart primary 50% 100%"

#ansible ceph -m command -a "pvcreate /dev/vde1"
#ansible ceph -m command -a "pvcreate /dev/vde2"

#ansible ceph -m command -a "vgcreate block_db_vg3 /dev/vde1"
#ansible ceph -m command -a "vgcreate block_wal_vg3 /dev/vde2"

#ansible ceph -m command -a "lvcreate -n dblv3 -l 100%Free block_db_vg3"
#ansible ceph -m command -a "lvcreate -n wallv3 -l 100%Free block_wal_vg3"

Luminous1/2/3分别运行:

ceph-volume lvm prepare --data /dev/datavg3/datalv3 --block.wal block_wal_vg3/wallv3 --block.db block_db_vg3/dblv3
ceph-volume lvm activate {osd_id} {osd_fsid}

更新crush map

配置文件添加:

[osd]
osd crush update on start = false

重启所有OSD

添加虚拟节点:

#ceph osd crush add-bucket ssd-luminous1 host
#ceph osd crush add-bucket ssd-luminous2 host
#ceph osd crush add-bucket ssd-luminous3 host
#ceph osd crush add-bucket ssd-root root

将对应的OSD添加至指定的host中:

# ceph osd tree
    ID  CLASS WEIGHT  TYPE NAME          STATUS REWEIGHT PRI-AFF 
    -11             0 host ssd-luminous3                         
    -10             0 host ssd-luminous2                         
     -9             0 host ssd-luminous1                         
     -1       0.58498 root default                               
     -3       0.19499     host luminous1                         
      0   hdd 0.09799         osd.0          up  1.00000 1.00000 
      1   hdd 0.09799         osd.1          up  1.00000 1.00000 
     -5       0.19499     host luminous2                         
      2   hdd 0.09799         osd.2          up  1.00000 1.00000 
      3   hdd 0.09799         osd.3          up  1.00000 1.00000 
     -7       0.19499     host luminous3                         
      4   hdd 0.09799         osd.4          up  1.00000 1.00000 
      5   hdd 0.09799         osd.5          up  1.00000 1.00000 
      6   hdd       0 osd.6                  up  1.00000 1.00000 
      7   hdd       0 osd.7                  up  1.00000 1.00000 
      8   hdd       0 osd.8                  up  1.00000 1.00000 

#ceph osd crush set osd.6 0.09798 root=ssd-root host=ssd-luminous1
#ceph osd crush set osd.7 0.09798 root=ssd-root host=ssd-luminous2
#ceph osd crush set osd.8 0.09798 root=ssd-root host=ssd-luminous3

# ceph osd tree
    ID  CLASS WEIGHT  TYPE NAME          STATUS REWEIGHT PRI-AFF 
    -11       0.09798 host ssd-luminous3                         
      8   hdd 0.09798     osd.8              up  1.00000 1.00000 
    -10       0.09798 host ssd-luminous2                         
      7   hdd 0.09798     osd.7              up  1.00000 1.00000 
     -9       0.09798 host ssd-luminous1                         
      6   hdd 0.09798     osd.6              up  1.00000 1.00000 
     -1       0.58498 root default                               
     -3       0.19499     host luminous1                         
      0   hdd 0.09799         osd.0          up  1.00000 1.00000 
      1   hdd 0.09799         osd.1          up  1.00000 1.00000 
     -5       0.19499     host luminous2                         
      2   hdd 0.09799         osd.2          up  1.00000 1.00000 
      3   hdd 0.09799         osd.3          up  1.00000 1.00000 
     -7       0.19499     host luminous3                         
      4   hdd 0.09799         osd.4          up  1.00000 1.00000 
      5   hdd 0.09799         osd.5          up  1.00000 1.00000 

如上,添加了三个虚拟host。同时,将三个OSD添加至对应的虚拟host下。

创建新的rule set

#ceph osd getcrushmap -o /tmp/mycrushmap
#crushtool -d /tmp/mycrushmap > /tmp/mycrushmapcp

编辑/tmp/mycrushmapcp:

如下:

root ssd-root {
        id -15          # do not change unnecessarily
        id -16 class hdd                # do not change unnecessarily
        id -17 class ssd                # do not change unnecessarily
        # weight 0.000
        alg straw2
        hash 0  # rjenkins1
        item ssd-luminous1 weight 0.098
        item ssd-luminous2 weight 0.098
        item ssd-luminous3 weight 0.098
}

#ssd-rules
rule index_rule {
        id 1
        type replicated
        min_size 1
        max_size 10
        step take ssd-root
        step chooseleaf firstn 0 type host
        step emit
}


#crushtool -c /tmp/mycrushmapcp -o /tmp/mycrushmapnew
#ceph osd setcrushmap -i /tmp/mycrushmapnew 

# ceph osd tree
    ID  CLASS WEIGHT  TYPE NAME          STATUS REWEIGHT PRI-AFF 
    -15             0 root ssd-root                              
    -11       0.09798 host ssd-luminous3                         
      8   ssd 0.09798     osd.8              up  1.00000 1.00000 
    -10       0.09798 host ssd-luminous2                         
      7   ssd 0.09798     osd.7              up  1.00000 1.00000 
     -9       0.09798 host ssd-luminous1                         
      6   ssd 0.09798     osd.6              up  1.00000 1.00000 
     -1       0.58498 root default                               
     -3       0.19499     host luminous1                         
      0   hdd 0.09799         osd.0          up  1.00000 1.00000 
      1   hdd 0.09799         osd.1          up  1.00000 1.00000 
     -5       0.19499     host luminous2                         
      2   hdd 0.09799         osd.2          up  1.00000 1.00000 
      3   hdd 0.09799         osd.3          up  1.00000 1.00000 
     -7       0.19499     host luminous3                         
      4   hdd 0.09799         osd.4          up  1.00000 1.00000 
      5   hdd 0.09799         osd.5          up  1.00000 1.00000 

如上完成了一个基于SSD设备的rule set的创建。

测试

#ceph osd pool create test 32 32 index_rule
pool 'test' created

#ceph osd pool get test crush_rule
crush_rule: index_rule

# ceph pg dump | grep '^12\.' | awk 'BEGIN{print "PG_id","\t","copy_set"}{print $1,"\t",$17}' | less

    PG_id    copy_set
    dumped all
    12.1d    [7,6,8]
    12.1c    [6,7,8]
    12.1a    [8,7,6]
    ...

新的pg都落在指定的osd上。

基于定制的crush map设置不同的rgw placement策略

在上述中,我们利用创建了一个独立rule set。此rule set可使数据落在独立的一批OSD中。

需求:

  1. index pool: 落在此定制的rule set中;
  2. data pool:不同用户可以选择不同性能的Pool。

index pool: 创建pool时可以直接指定对应的crush rule。

对于不同的用户使用不同的data pool,有两种方法:

创建一个data pool:

# ceph osd pool create upc.rgw.test.data 32 32 index_rule
    # ceph osd pool application enable upc.rgw.test.data rgw
  1. 基于placement策略,placement关联bucket

创建placement:

设置为默认的zone,此处应在部署时即设定
#radosgw-admin zone default --rgw-zone=upc --rgw-zonegroup=pd --rgw-realm=sh
#radosgw-admin period update --commit


#radosgw-admin zonegroup placement add  --rgw-zonegroup=pd  --placement-id=temp
#radosgw-admin zonegroup placement modify --rgw-zonegroup=pd --placement-id=temp --tags="Tag"
#radosgw-admin zone placement add --rgw-zone upc --placement-id temp --data-pool upc.rgw.test.data --index-pool upc.rgw.buckets.index --data-extra-pool upc.rgw.buckets.non-ec
#radosgw-admin period update --commit

配置用户的placement:

#radosgw-admin metadata get user:ups302 > ups3.json

编辑ups3.json:

    修改default_placement : temp,设置一个tag用于权限控制。
    "default_placement": "temp",
    "default_storage_class": "",
    "placement_tags": [
        "Tag"
    ],

更新用户元数据

#radosgw-admin metadata put user:ups302 < ups3.json

测试"storage class"及"compress"功能

创建一个bucket:

#s3cmd mb s3://second    //创建在user.default_placement,当前为temp
# s3cmd put ups3.json s3://second
upload: 'ups3.json' -> 's3://second/ups3.json'  [1 of 1]
 1199 of 1199   100% in    0s    25.21 kB/s  done

利用--bucket-location覆盖用户的default_placement
#s3cmd mb s3://second --bucket-location=":default-placement" 

可以发现数据实际存放在了我们指定的pool中:

# rados -p upc.rgw.test.data ls
1c60b268-0a5d-4718-ad02-e4b5bce824bf.136021.1_ups3.json
  1. 基于Storage Class,直接关联object data

在RGW N版,提供了storage class的功能。基于storage class完成不同object的不同存储策略。

添加storage class,我们添加在default-placement中:

# radosgw-admin zonegroup placement add --rgw-zonegroup pd --placement-id default-placement --storage-class COLD
    [
        {
            "key": "default-placement",
            "val": {
                "name": "default-placement",
                "tags": [],
                "storage_classes": [
                    "COLD",
                    "STANDARD"
                ]
            }
        },
        {
            "key": "temp",
            "val": {
                "name": "temp",
                "tags": [
                    "Tag"
                ],
                "storage_classes": [
                    "STANDARD"
                ]
            }
        }
    ]

设置storage class 的data pool,并且设置压缩策略:

# radosgw-admin zone placement add --rgw-zone upc --placement-id default-placement --storage-class COLD --data-pool upc.rgw.test.data --compression zlib    
    ...
      "placement_pools": [
            {
                "key": "default-placement",
                "val": {
                    "index_pool": "upc.rgw.buckets.index",
                    "storage_classes": {
                        "COLD": {
                            "data_pool": "upc.rgw.test.data",
                            "compression_type": "zlib"
                        },
                        "STANDARD": {
                            "data_pool": "upc.rgw.buckets.data"
                        }
                    },
                    "data_extra_pool": "upc.rgw.buckets.non-ec",
                    "index_type": 0
                }
            },
    ...

#radosgw-admin period update --commit

测试

使用storage class,有两种方式:

  1. 编辑用户的元数据:

"default_storage_class": ""

  1. 添加http头部:

    #s3cmd --storage-class=COLD put cold_data s3://second

[root@luminous1 ~]# rados -p upc.rgw.test.data ls
1c60b268-0a5d-4718-ad02-e4b5bce824bf.136021.4__shadow_.6sYlWrtESVuQNVMTsCNSToM_MB293Ah_0
[root@luminous1 ~]# rados -p upc.rgw.buckets.data ls
1c60b268-0a5d-4718-ad02-e4b5bce824bf.136021.4_cold_data

发现其头部对象仍然在"STANDARD" class中,分片对象在对应"COLD" 的 data pool 中;但这里的头部对象并不存储数据。

# radosgw-admin bucket stats --bucket=second

"usage": {
    "rgw.main": {
        "size": 10485760,
        "size_actual": 10485760,
        "size_utilized": 10212,
        "size_kb": 10240,
        "size_kb_actual": 10240,
        "size_kb_utilized": 10,
        "num_objects": 1
    }
},

其中:size_utilized/size_kb_utilized 为实际占用的磁盘空间,可以看出压缩后的大小为10KB。原始大小"size": 10MB。

查看一下其头部对象的xattr:

# rados -p upc.rgw.buckets.data listxattr 1c60b268-0a5d-4718-ad02-e4b5bce824bf.136021.4_cold_data
user.rgw.acl
user.rgw.compression
user.rgw.content_type
user.rgw.etag
user.rgw.idtag
user.rgw.manifest
user.rgw.pg_ver
user.rgw.source_zone
user.rgw.storage_class
user.rgw.tail_tag
user.rgw.x-amz-content-sha256
user.rgw.x-amz-date
user.rgw.x-amz-meta-s3cmd-attrs

压缩信息存储在了xattr:user.rgw.compression。利用getxattr检索发现即是"zlib"(compress算法)。

相关文章

网友评论

      本文标题:Ceph CrushMap及RGW Placement设置

      本文链接:https://www.haomeiwen.com/subject/comxjctx.html