背景: kubespray部署的k8s集群,etcd单独用高可用性能型数据盘来维护
etcd替换为数据盘
etcd三节点,先docker stop,再将原来的数据目录bak,再挂载新的卷,/etc/fstab永久固化
- 基于horizon 根据节点创建卷(选择high类型)50G 并挂载
1.1 检查
# 1. 确定位置
[root@test-on-prem-k8s-master01 ~]# docker inspect 7df7d8aa2107 | grep -A 10 Binds
"Binds": [
"/etc/ssl/certs:/etc/ssl/certs:ro",
"/etc/ssl/etcd/ssl:/etc/ssl/etcd/ssl:ro",
"/var/lib/etcd:/var/lib/etcd:rw"
],
# 确定数据
[root@test-on-prem-k8s-master01 ~]# tree /var/lib/etcd/
/var/lib/etcd/
└── member
├── snap
│ ├── 0000000000000290-000000000069029e.snap
│ ├── 0000000000000290-00000000006929af.snap
│ ├── 0000000000000290-00000000006950c0.snap
│ ├── 0000000000000290-00000000006977d1.snap
│ ├── 0000000000000290-0000000000699ee2.snap
│ └── db
└── wal
├── 0000000000000054-0000000000655594.wal
├── 0000000000000055-00000000006650cb.wal
├── 0000000000000056-0000000000674ccd.wal
├── 0000000000000057-00000000006836e9.wal
├── 0000000000000058-000000000068faec.wal
└── 0.tmp
[root@test-on-prem-k8s-master01 ~]# netstat -tunlp | grep 2379
tcp 0 0 127.0.0.1:2379 0.0.0.0:* LISTEN 8120/etcd
tcp 0 0 10.220.3.59:2379 0.0.0.0:* LISTEN 8120/etcd
# 2. 操作前后确认集群ok,再开始下一步操作
hostname=`hostname`
export ETCDCTL_API=3
export ETCDCTL_CERT=/etc/ssl/etcd/ssl/admin-$hostname.pem
export ETCDCTL_KEY=/etc/ssl/etcd/ssl/admin-$hostname-key.pem
export ETCDCTL_CACERT=/etc/ssl/etcd/ssl/ca.pem
export ETCDCTL_ENDPOINTS=https://127.0.0.1:2379
etcdctl member list
1.2 挂载磁盘给etcd
[root@test-on-prem-k8s-master01 ~]# docker ps -a | grep etcd
7df7d8aa2107 cr.yealinkops.com/third_party/coreos/etcd:v3.4.3 "/usr/local/bin/etcd" 7 days ago Up 7 days etcd01
docker stop etcd1
docker stop etcd2
docker stop etcd3
# ls -l /var/lib/etcd/
mv /var/lib/etcd /var/lib/etcd-bak
mkdir /var/lib/etcd
mount /dev/vdb /var/lib/etcd
cp -r /var/lib/etcd-bak/* /var/lib/etcd
# stop 后,切换需要在15s内完成,etcd会在15s后自动启动,如果未完成则需要修复数据,重新部署
mkfs.xfs -f /dev/vdb
vi /etc/fstab
/dev/vdb /var/lib/etcd xfs defaults 0 0
docker start etcd1
docker start etcd2
docker start etcd3
# 基于1.1 检查
# 进入容器内检查
docker exec -it etcd1 sh
docker exec -it etcd2 sh
docker exec -it etcd3 sh
df -Th /var/lib/etcd
Filesystem Type Size Used Avail Use% Mounted on
/dev/vda1 xfs 100G 5.9G 95G 6% /var/lib/etcd
# 确认pod ok
kubectl get pod -A -o wide
问题
- 注意千万不要执行删除动作,否则etcd无法启动
53 docker stop etcd03
54 mv /var/lib/etcd/ /var/lib/etcd-bak
55 mkdir /var/lib/etcd/
56 ls -lR /var/lib/etcd-bak
57 rm -fr /var/lib/etcd-bak # !!!!
58 mv /var/lib/etcd /var/lib/etcd-bak
59 ls -lR /var/lib/etcd-bak
60 ls -lR /var/lib/etcd-bak
61 ls -lR /var/lib/etcd
62 ls -lR /var/lib/etcd-bak
63 mkfs.xfs -f /dev/vdb
64 vi /etc/fstab
65 mount /dev/vdb /var/lib/etcd
66 \cp -r /var/lib/etcd-bak/* /var/lib/etcd
67 docker start etcd03
解决方式
# 先重启etcd服务
# 删除再添加
[root@test-on-prem-k8s-master01 yealink]# etcdctl member list
4264f8469505722c, started, etcd3, https://10.118.35.143:2380, https://10.118.35.143:2379, false
ae71f6860b518f63, started, etcd2, https://10.118.34.26:2380, https://10.118.34.26:2379, false
eb40929757bb9421, started, etcd1, https://10.118.33.92:2380, https://10.118.33.92:2379, true
etcdctl member remove
etcdctl member add etcd1 --peer-urls=https://10.118.33.92:2380 --learner
# 一旦加入 好像docker就可以启动了,而且应该不用执行下面那行,下面那行可能用于添加第四个节点,而当前的场景是直接恢复
export ETCDCTL_ENDPOINTS=https://10.118.33.92:2379,https://10.118.34.26:2379,https://10.118.35.143:2379
etcdctl member promote eb40929757bb9421
# 注意需要配置ETCDCTL_ENDPOINTS环境变量以及正确的member uuid
## 否则会出现以下问题
{"level":"warn","ts":"2021-01-06T03:14:52.788Z","caller":"clientv3/retry_interceptor.go:61","msg":"retrying of unary invoker failed","target":"endpoint://client-fb6ad35b-3c1e-493a-bf22-8299bb37e9e9/127.0.0.1:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member"}
Error: etcdserver: can only promote a learner member
或者
docker stop etcd3
docker rm -f etcd3
# 删除之后k8s自动会起一个新的??
# 可能需要多删几次
rm -fr /var/lib/etcd/*
#
etcdctl member remove
# 重新基于kubespray部署etcd
网友评论