crush rule主要作用:
从OSD Map中的哪个节点开始查找
定位副本的搜索模式(广度优先 or 深度优先)
ceph osd crush rule dump
(ceph-mon)[root@node01 /]# ceph osd tree
-6 6.00000 root stat
9 1.00000 osd.9 up 1.00000 1.00000
10 1.00000 osd.10 up 1.00000 1.00000
11 1.00000 osd.11 up 1.00000 1.00000
12 1.00000 osd.12 up 1.00000 1.00000
13 1.00000 osd.13 up 1.00000 1.00000
14 1.00000 osd.14 up 1.00000 1.00000
-5 9.00000 root ssd
0 1.00000 osd.0 up 1.00000 1.00000
1 1.00000 osd.1 up 1.00000 1.00000
2 1.00000 osd.2 up 1.00000 1.00000
3 1.00000 osd.3 up 1.00000 1.00000
4 1.00000 osd.4 up 1.00000 1.00000
5 1.00000 osd.5 up 1.00000 1.00000
6 1.00000 osd.6 up 1.00000 1.00000
7 1.00000 osd.7 up 1.00000 1.00000
8 1.00000 osd.8 up 1.00000 1.00000
-1 15.00000 root default
-2 5.00000 host
0 1.00000 osd.0 up 1.00000 1.00000
2 1.00000 osd.2 up 1.00000 1.00000
4 1.00000 osd.4 up 1.00000 1.00000
11 1.00000 osd.11 up 1.00000 1.00000
13 1.00000 osd.13 up 1.00000 1.00000
-3 5.00000 host
1 1.00000 osd.1 up 1.00000 1.00000
3 1.00000 osd.3 up 1.00000 1.00000
5 1.00000 osd.5 up 1.00000 1.00000
10 1.00000 osd.10 up 1.00000 1.00000
14 1.00000 osd.14 up 1.00000 1.00000
-4 5.00000 host
6 1.00000 osd.6 up 1.00000 1.00000
7 1.00000 osd.7 up 1.00000 1.00000
8 1.00000 osd.8 up 1.00000 1.00000
9 1.00000 osd.9 up 1.00000 1.00000
12 1.00000 osd.12 up 1.00000 1.00000
这里是我已经配置好的osd信息,未配置之前是没有root stat,root ssd这2部分内容的,一共有15个osd,0–8是SSD磁盘,9–14是SATA磁盘。
获取crush map
(ceph-mon)[root@node01 /]# ceph osd getcrushmap -o crushmapgot
got crush map from osdmap epoch 380
反编译crush map
(ceph-mon)[root@node01 /]# crushtool -d crushmapgot -o decrushmap
修改crush map
(ceph-mon)[root@node01 /]# cat decrushmap
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable straw_calc_version 1
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host {
id -2 # do not change unnecessarily
# weight 5.000
alg straw
hash 0 # rjenkins1
item osd.0 weight 1.000
item osd.2 weight 1.000
item osd.4 weight 1.000
item osd.11 weight 1.000
item osd.13 weight 1.000
host {
id -3 # do not change unnecessarily
# weight 5.000
alg straw
hash 0 # rjenkins1
item osd.1 weight 1.000
item osd.3 weight 1.000
item osd.5 weight 1.000
item osd.10 weight 1.000
item osd.14 weight 1.000
host {
id -4 # do not change unnecessarily
# weight 5.000
alg straw
hash 0 # rjenkins1
item osd.6 weight 1.000
item osd.7 weight 1.000
item osd.8 weight 1.000
item osd.9 weight 1.000
item osd.12 weight 1.000
root default {
id -1 # do not change unnecessarily
# weight 15.000
alg straw
hash 0 # rjenkins1
item weight 5.000
item weight 5.000
item weight 5.000
root ssd {
id -5 # do not change unnecessarily
# weight 9.000
alg straw
hash 0 # rjenkins1
item osd.0 weight 1.000
item osd.1 weight 1.000
item osd.2 weight 1.000
item osd.3 weight 1.000
item osd.4 weight 1.000
item osd.5 weight 1.000
item osd.6 weight 1.000
item osd.7 weight 1.000
item osd.8 weight 1.000
root stat {
id -6 # do not change unnecessarily
# weight 6.000
alg straw
hash 0 # rjenkins1
item osd.9 weight 1.000
item osd.10 weight 1.000
item osd.11 weight 1.000
item osd.12 weight 1.000
item osd.13 weight 1.000
item osd.14 weight 1.000
# rules
rule replicated_ruleset {
ruleset 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
rule disks {
ruleset 1
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
rule ssd {
ruleset 2
type replicated
min_size 1
max_size 10
step take ssd
step chooseleaf firstn 0 type osd
step emit
rule stat {
ruleset 3
type replicated
min_size 1
max_size 10
step take stat
step chooseleaf firstn 0 type osd
step emit
# end crush map
# rules
rule replicated_ruleset {
ruleset 0 #rule编号
type replicated #定义pool类型为replicated(还有esurecode模式)
min_size 1 #pool中最小指定的副本数量不能小1
max_size 10 #pool中最大指定的副本数量不能大于10
step take default #定义pg查找副本的入口点
step chooseleaf firstn 0 type host #选叶子节点、深度优先、隔离host
step emit #结束
编译crush map
(ceph-mon)[root@node01 /]# crushtool -c decrushmap -o newcrushmap
注入crush map
(ceph-mon)[root@node01 /]# ceph osd setcrushmap -i newcrushmap
(ceph-mon)[root@node01 /]# ceph osd pool create ssd_pool2 256 256
pool 'ssd_pool2' created
(ceph-mon)[root@node01 /]# ceph osd pool create sata_pool2 256 256
pool 'sata_pool2' created
(ceph-mon)[root@node01 /]# ceph osd dump|grep ssd_pool2
pool 34 'ssd_pool2' replicated size 3 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 256 pgp_num 256 last_change 381 flags hashpspool stripe_width 0
(ceph-mon)[root@node01 /]# ceph osd dump|grep sata_pool2
pool 35 'sata_pool2' replicated size 3 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 256 pgp_num 256 last_change 383 flags hashpspool stripe_width 0
注意:刚刚创建的两个资源池ssd_pool 和stat_pool 的 crush_ruleset 都是0,下面需要修改。
(ceph-mon)[root@node01 /]# ceph osd pool set ssd_pool2 crush_ruleset 2
set pool 34 crush_ruleset to 1
(ceph-mon)[root@node01 /]# ceph osd pool set sata_pool2 crush_ruleset 3
set pool 35 crush_ruleset to 2
(ceph-mon)[root@node01 /]# ceph osd dump|grep ssd_pool2
pool 34 'ssd_pool2' replicated size 3 min_size 1 crush_ruleset 2 object_hash rjenkins pg_num 256 pgp_num 256 last_change 385 flags hashpspool stripe_width 0
(ceph-mon)[root@node01 /]# ceph osd dump|grep sata_pool2
pool 35 'sata_pool2' replicated size 3 min_size 1 crush_ruleset 3 object_hash rjenkins pg_num 256 pgp_num 256 last_change 386 flags hashpspool stripe_width 0
验证前先看看ssd_pool 和stat_pool 里面是否有对象
(ceph-mon)[root@node01 /]# rados ls -p ssd_pool2
(ceph-mon)[root@node01 /]# rados ls -p sata_pool2
是空的, 用rados命令 添加对象到两个资源池中
(ceph-mon)[root@node01 /]# rados -p ssd_pool2 put test_object1 /etc/hosts
(ceph-mon)[root@node01 /]# rados -p sata_pool2 put test_object2 /etc/hosts
(ceph-mon)[root@node01 /]# rados ls -p ssd_pool2
(ceph-mon)[root@node01 /]# rados ls -p sata_pool2
(ceph-mon)[root@node01 /]# ceph osd map ssd_pool2 test_object1
osdmap e392 pool 'ssd_pool2' (34) object 'test_object1' -> pg 34.d5066e42 (34.42) -> up ([7,2,1], p7) acting ([7,2,1], p7)
(ceph-mon)[root@node01 /]# ceph osd map sata_pool2 test_object2
osdmap e392 pool 'sata_pool2' (35) object 'test_object2' -> pg 35.c5cfe5e9 (35.e9) -> up ([12,14,10], p12) acting ([12,14,10], p12)
object1 三个副本在osd7,osd2,osd1上,object2在osd10,osd12,osd14上。