美文网首页
K8s运维管理

K8s运维管理

作者: 小李飞刀_lql | 来源:发表于2021-12-13 17:08 被阅读0次

    Etcd数据库备份与恢复

    kubeadm部署方式

    备份

    [root@k8smaster ~]# yum install etcd 
    
    ETCDCTL_API=3 etcdctl \
    snapshot save snap.db \
    --endpoints=https://127.0.0.1:2379 \
    --cacert=/etc/kubernetes/pki/etcd/ca.crt \
    --cert=/etc/kubernetes/pki/etcd/server.crt \
    --key=/etc/kubernetes/pki/etcd/server.key 
    
    [root@k8smaster ~]# ls
    snap.db
    

    恢复

    先暂停kube-apiserver和etcd容器

    [root@k8smaster ~]# mv /etc/kubernetes/manifests /etc/kubernetes/manifests.bak
    [root@k8smaster ~]# mv /var/lib/etcd/ /var/lib/etcd.bak
    
    

    恢复

    ETCDCTL_API=3 etcdctl \
    snapshot restore snap.db \
    --data-dir=/var/lib/etcd
    
    

    启动kube-apiserver和etcd容器

    [root@k8smaster ~]# mv /etc/kubernetes/manifests.bak /etc/kubernetes/manifests
    
    #此时所有服务都将恢复
    [root@k8smaster ~]# kubectl get pod -n kube-system
    

    二进制部署方式

    备份

     
    
    ETCDCTL_API=3 etcdctl \
    snapshot save snap.db \
    --endpoints=https://192.168.153.25:2379 \
    --cacert=/opt/etcd/ssl/ca.pem \
    --cert=/opt/etcd/ssl/server.pem \
    --key=/opt/etcd/ssl/server-key.pem
    
    [root@k8s-m1 ~]# ls
    snap.db
    
    #不要yum install etcd,破坏原有配置
    

    恢复

    先暂停kube-apiserver和etcd

    [root@k8s-m1 ~]# systemctl stop kube-apiserver
    [root@k8s-m1 ~]# systemctl stop etcd
    [root@k8s-m1 ~]# mv /var/lib/etcd/default.etcd /var/lib/etcd/default.etcd.bak
    

    每个etcd节点上恢复(参考etcd配置文件)

    #etcd配置文件
    cat  /opt/etcd/cfg/etcd.conf  
    #[Member]
    ETCD_NAME="etcd-1"
    ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
    ETCD_LISTEN_PEER_URLS="https://192.168.153.25:2380"
    ETCD_LISTEN_CLIENT_URLS="https://192.168.153.25:2379"
    
    #[Clustering]
    ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.168.153.25:2380"
    ETCD_ADVERTISE_CLIENT_URLS="https://192.168.153.25:2379"
    ETCD_INITIAL_CLUSTER="etcd-1=https://192.168.153.25:2380"
    ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
    ETCD_INITIAL_CLUSTER_STATE="new"
    --------------------------------------------------------------------------------
    
    [root@k8s-m1 bin]# cd /opt/etcd/bin
    #执行
    ETCDCTL_API=3 /opt/etcd/bin/etcdctl snapshot restore snap.db \
    --name etcd-1 \
    --initial-cluster="etcd-1=https://192.168.153.25:2380" \
    --initial-cluster-token=etcd-cluster \
    --initial-advertise-peer-urls=https://192.168.153.25:2380 \
    --data-dir=/var/lib/etcd/default.etcd
    
    
    #ETCD_INITIAL_CLUSTER是个集群组合
     
    

    启动kube-apiserver和etcd

    systemctl start etcd
    systemctl start kube-apiserver
    
    ETCDCTL_API=3 /opt/etcd/bin/etcdctl --cacert=/opt/etcd/ssl/ca.pem --cert=/opt/etcd/ssl/server.pem --key=/opt/etcd/ssl/server-key.pem --endpoints="https://192.168.153.25:2379" endpoint health --write-out=table
    
    +-----------------------------+--------+------------+-------+
    |          ENDPOINT           | HEALTH |    TOOK    | ERROR |
    +-----------------------------+--------+------------+-------+
    | https://192.168.153.25:2379 |   true | 7.818883ms |       |
    +-----------------------------+--------+------------+-------+
    

    证书自动续签

    kubeadmin

    管理节点

    # 查看现有证书到期时间
    [root@k8smaster ~]# kubeadm alpha certs check-expiration
    
    CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
    admin.conf                 Oct 30, 2022 14:21 UTC   321d                               
    apiserver                  Oct 30, 2022 14:21 UTC   321d            ca                 
    apiserver-etcd-client      Oct 30, 2022 14:21 UTC   321d            etcd-ca             
    apiserver-kubelet-client   Oct 30, 2022 14:21 UTC   321d            ca     
    controller-manager.conf    Oct 30, 2022 14:21 UTC   321d             
    etcd-healthcheck-client    Oct 30, 2022 14:21 UTC   321d            etcd-ca   
    etcd-peer                  Oct 30, 2022 14:21 UTC   321d            etcd-ca  
    etcd-server                Oct 30, 2022 14:21 UTC   321d            etcd-ca    
    front-proxy-client         Oct 30, 2022 14:21 UTC   321d            front-proxy-ca  
    scheduler.conf             Oct 30, 2022 14:21 UTC   321d      
    
    [root@k8smaster ~]# kubeadm alpha certs renew all     
    
    #做一个调度,每个月执行一次
    

    工作节点

    配置kube-controller-manager组件

    添加上述两个参数:
    • experimental-cluster-signing-duration=87600h0m0s 为kubelet客户端证书颁发有效期10年
    • feature-gates=RotateKubeletServerCertificate=true 启用server证书颁发
    
    vi /etc/kubernetes/manifests/kube-controller-manager.yaml
    
    spec:
      containers:
      - command:
        - kube-controller-manager
        - --experimental-cluster-signing-duration=87600h0m0s
        - --feature-gates=RotateKubeletServerCertificate=true
        ......
        
    配置完成后,重建pod使之生效:  
    [root@k8smaster pki]# kubectl delete pod kube-controller-manager-k8smaster  -n kube-system
    pod "kube-controller-manager-k8smaster" deleted
    

    配置kubelet组件

    #默认kubelet证书轮转已启用:
    
    [root@k8smaster ~]# vi /var/lib/kubelet/config.yaml 
    rotateCertificates: true
    

    测试

    #找一台节点测试,先查看现有客户端证书有效期
    [root@k8smaster ~]# cd /var/lib/kubelet/pki
    [root@k8smaster pki]# openssl x509 -in kubelet-client-current.pem -noout -dates
    notBefore=Oct 30 14:21:04 2021 GMT
    notAfter=Oct 30 14:21:06 2022 GMT
    
    #修改服务器时间,模拟证书即将到期
    [root@k8smaster pki]# date -s "2022-10-29"
    [root@k8smaster pki]#  systemctl restart kubelet
    
    #再查看证书有效期,可以看到已经是十年:
    [root@k8smaster pki]# openssl x509 -in kubelet-client-current.pem -noout -dates
    notBefore=Oct 28 15:55:34 2022 GMT
    notAfter=Oct 28 14:21:04 2031 GMT
    

    二进制

    #自定义方式,每个节点都可以查看
    [root@k8s-m1 ~]# cd /opt/kubernetes/ssl
    [root@k8s-m1 ssl]# openssl x509 -in kubelet-client-current.pem -noout -dates
    notBefore=Dec 12 05:40:24 2021 GMT
    notAfter=Dec 11 05:27:00 2026 GMT
    

    Kubernetes 集群常见故障排查思路

    先区分部署方式

    #kubeadm方式
    [root@k8smaster pki]# kubectl get pod -n kube-system
    etcd-k8smaster                            
    kube-apiserver-k8smaster
    
    #二进制
    所有组件均采用systemd管理
    

    集群部署类问题

    001 网络不通
    002 启动失败,一般配置文件或者依赖服务
    journalctl -u kube-apiserver -f
    journalctl -u kubelet -f
    ......
    003 平台不兼容
    
    

    应用部署类问题

    • 查看资源详情:kubectl describe TYPE/NAME
    • 查看容器日志:kubectl logs TYPE/NAME [-c CONTAINER]
    • 进入容器中:kubectl exec POD [-c CONTAINER] -- COMMAND [args...]
    

    网络类问题,一般指无法在集群内部或者外部访问应用

    • Pod正常工作吗?
    • Service是否关联Pod?
    • Service指定target-port端口是否正确?
    • 如果用名称访问,DNS是否正常工作?
    • kube-proxy正常工作吗?是否正常写iptables规则?
    • CNI网络插件是否正常工作?
    
    
    -------------------------------------------------------
    
    [root@k8smaster pod]# kubectl get pod -n kube-system -o wide
    NAME                                      READY   STATUS    RESTARTS   AGE     IP               NODE        NOMINATED NODE   READINESS GATES
    calico-kube-controllers-97769f7c7-r9lsf   1/1     Running    10.244.249.2     k8snode1 
    calico-node-2vmxg                         1/1     Running    192.168.153.22   k8snode1 
    calico-node-xc6js                         1/1     Running    192.168.153.21   k8smaster 
    coredns-6d56c8448f-fz6f5                  1/1     Running    10.244.16.129    k8smaster 
    coredns-6d56c8448f-vzr4h                  1/1     Running    10.244.249.1     k8snode1 
    etcd-k8smaster                            1/1     Running    192.168.153.21   k8smaster 
    kube-apiserver-k8smaster                  1/1     Running   192.168.153.21   k8smaster 
    kube-controller-manager-k8smaster         1/1     Running  192.168.153.21   k8smaster   
    kube-proxy-nz6s9                          1/1     Running  192.168.153.21   k8smaster   
    kube-proxy-qv54k                          1/1     Running  192.168.153.22   k8snode1   
    kube-scheduler-k8smaster                  1/1     Running  192.168.153.21   k8smaster   
    
    
    1639385581495.png

    k8s官网

    https://kubernetes.io/zh/docs/home/
    

    相关文章

      网友评论

          本文标题:K8s运维管理

          本文链接:https://www.haomeiwen.com/subject/aptsfrtx.html