Etcd数据库备份与恢复
kubeadm部署方式
备份
[root@k8smaster ~]# yum install etcd
ETCDCTL_API=3 etcdctl \
snapshot save snap.db \
--endpoints=https://127.0.0.1:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/server.crt \
--key=/etc/kubernetes/pki/etcd/server.key
[root@k8smaster ~]# ls
snap.db
恢复
先暂停kube-apiserver和etcd容器
[root@k8smaster ~]# mv /etc/kubernetes/manifests /etc/kubernetes/manifests.bak
[root@k8smaster ~]# mv /var/lib/etcd/ /var/lib/etcd.bak
恢复
ETCDCTL_API=3 etcdctl \
snapshot restore snap.db \
--data-dir=/var/lib/etcd
启动kube-apiserver和etcd容器
[root@k8smaster ~]# mv /etc/kubernetes/manifests.bak /etc/kubernetes/manifests
#此时所有服务都将恢复
[root@k8smaster ~]# kubectl get pod -n kube-system
二进制部署方式
备份
ETCDCTL_API=3 etcdctl \
snapshot save snap.db \
--endpoints=https://192.168.153.25:2379 \
--cacert=/opt/etcd/ssl/ca.pem \
--cert=/opt/etcd/ssl/server.pem \
--key=/opt/etcd/ssl/server-key.pem
[root@k8s-m1 ~]# ls
snap.db
#不要yum install etcd,破坏原有配置
恢复
先暂停kube-apiserver和etcd
[root@k8s-m1 ~]# systemctl stop kube-apiserver
[root@k8s-m1 ~]# systemctl stop etcd
[root@k8s-m1 ~]# mv /var/lib/etcd/default.etcd /var/lib/etcd/default.etcd.bak
每个etcd节点上恢复(参考etcd配置文件)
#etcd配置文件
cat /opt/etcd/cfg/etcd.conf
#[Member]
ETCD_NAME="etcd-1"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="https://192.168.153.25:2380"
ETCD_LISTEN_CLIENT_URLS="https://192.168.153.25:2379"
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.168.153.25:2380"
ETCD_ADVERTISE_CLIENT_URLS="https://192.168.153.25:2379"
ETCD_INITIAL_CLUSTER="etcd-1=https://192.168.153.25:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
--------------------------------------------------------------------------------
[root@k8s-m1 bin]# cd /opt/etcd/bin
#执行
ETCDCTL_API=3 /opt/etcd/bin/etcdctl snapshot restore snap.db \
--name etcd-1 \
--initial-cluster="etcd-1=https://192.168.153.25:2380" \
--initial-cluster-token=etcd-cluster \
--initial-advertise-peer-urls=https://192.168.153.25:2380 \
--data-dir=/var/lib/etcd/default.etcd
#ETCD_INITIAL_CLUSTER是个集群组合
启动kube-apiserver和etcd
systemctl start etcd
systemctl start kube-apiserver
ETCDCTL_API=3 /opt/etcd/bin/etcdctl --cacert=/opt/etcd/ssl/ca.pem --cert=/opt/etcd/ssl/server.pem --key=/opt/etcd/ssl/server-key.pem --endpoints="https://192.168.153.25:2379" endpoint health --write-out=table
+-----------------------------+--------+------------+-------+
| ENDPOINT | HEALTH | TOOK | ERROR |
+-----------------------------+--------+------------+-------+
| https://192.168.153.25:2379 | true | 7.818883ms | |
+-----------------------------+--------+------------+-------+
证书自动续签
kubeadmin
管理节点
# 查看现有证书到期时间
[root@k8smaster ~]# kubeadm alpha certs check-expiration
CERTIFICATE EXPIRES RESIDUAL TIME CERTIFICATE AUTHORITY EXTERNALLY MANAGED
admin.conf Oct 30, 2022 14:21 UTC 321d
apiserver Oct 30, 2022 14:21 UTC 321d ca
apiserver-etcd-client Oct 30, 2022 14:21 UTC 321d etcd-ca
apiserver-kubelet-client Oct 30, 2022 14:21 UTC 321d ca
controller-manager.conf Oct 30, 2022 14:21 UTC 321d
etcd-healthcheck-client Oct 30, 2022 14:21 UTC 321d etcd-ca
etcd-peer Oct 30, 2022 14:21 UTC 321d etcd-ca
etcd-server Oct 30, 2022 14:21 UTC 321d etcd-ca
front-proxy-client Oct 30, 2022 14:21 UTC 321d front-proxy-ca
scheduler.conf Oct 30, 2022 14:21 UTC 321d
[root@k8smaster ~]# kubeadm alpha certs renew all
#做一个调度,每个月执行一次
工作节点
配置kube-controller-manager组件
添加上述两个参数:
• experimental-cluster-signing-duration=87600h0m0s 为kubelet客户端证书颁发有效期10年
• feature-gates=RotateKubeletServerCertificate=true 启用server证书颁发
vi /etc/kubernetes/manifests/kube-controller-manager.yaml
spec:
containers:
- command:
- kube-controller-manager
- --experimental-cluster-signing-duration=87600h0m0s
- --feature-gates=RotateKubeletServerCertificate=true
......
配置完成后,重建pod使之生效:
[root@k8smaster pki]# kubectl delete pod kube-controller-manager-k8smaster -n kube-system
pod "kube-controller-manager-k8smaster" deleted
配置kubelet组件
#默认kubelet证书轮转已启用:
[root@k8smaster ~]# vi /var/lib/kubelet/config.yaml
rotateCertificates: true
测试
#找一台节点测试,先查看现有客户端证书有效期
[root@k8smaster ~]# cd /var/lib/kubelet/pki
[root@k8smaster pki]# openssl x509 -in kubelet-client-current.pem -noout -dates
notBefore=Oct 30 14:21:04 2021 GMT
notAfter=Oct 30 14:21:06 2022 GMT
#修改服务器时间,模拟证书即将到期
[root@k8smaster pki]# date -s "2022-10-29"
[root@k8smaster pki]# systemctl restart kubelet
#再查看证书有效期,可以看到已经是十年:
[root@k8smaster pki]# openssl x509 -in kubelet-client-current.pem -noout -dates
notBefore=Oct 28 15:55:34 2022 GMT
notAfter=Oct 28 14:21:04 2031 GMT
二进制
#自定义方式,每个节点都可以查看
[root@k8s-m1 ~]# cd /opt/kubernetes/ssl
[root@k8s-m1 ssl]# openssl x509 -in kubelet-client-current.pem -noout -dates
notBefore=Dec 12 05:40:24 2021 GMT
notAfter=Dec 11 05:27:00 2026 GMT
Kubernetes 集群常见故障排查思路
先区分部署方式
#kubeadm方式
[root@k8smaster pki]# kubectl get pod -n kube-system
etcd-k8smaster
kube-apiserver-k8smaster
#二进制
所有组件均采用systemd管理
集群部署类问题
001 网络不通
002 启动失败,一般配置文件或者依赖服务
journalctl -u kube-apiserver -f
journalctl -u kubelet -f
......
003 平台不兼容
应用部署类问题
• 查看资源详情:kubectl describe TYPE/NAME
• 查看容器日志:kubectl logs TYPE/NAME [-c CONTAINER]
• 进入容器中:kubectl exec POD [-c CONTAINER] -- COMMAND [args...]
网络类问题,一般指无法在集群内部或者外部访问应用
• Pod正常工作吗?
• Service是否关联Pod?
• Service指定target-port端口是否正确?
• 如果用名称访问,DNS是否正常工作?
• kube-proxy正常工作吗?是否正常写iptables规则?
• CNI网络插件是否正常工作?
-------------------------------------------------------
[root@k8smaster pod]# kubectl get pod -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
calico-kube-controllers-97769f7c7-r9lsf 1/1 Running 10.244.249.2 k8snode1
calico-node-2vmxg 1/1 Running 192.168.153.22 k8snode1
calico-node-xc6js 1/1 Running 192.168.153.21 k8smaster
coredns-6d56c8448f-fz6f5 1/1 Running 10.244.16.129 k8smaster
coredns-6d56c8448f-vzr4h 1/1 Running 10.244.249.1 k8snode1
etcd-k8smaster 1/1 Running 192.168.153.21 k8smaster
kube-apiserver-k8smaster 1/1 Running 192.168.153.21 k8smaster
kube-controller-manager-k8smaster 1/1 Running 192.168.153.21 k8smaster
kube-proxy-nz6s9 1/1 Running 192.168.153.21 k8smaster
kube-proxy-qv54k 1/1 Running 192.168.153.22 k8snode1
kube-scheduler-k8smaster 1/1 Running 192.168.153.21 k8smaster
1639385581495.png
k8s官网
https://kubernetes.io/zh/docs/home/
网友评论