keywords
1.tidb-scheduler节点处于Pending状态/有pod节点处于Pending状态
2.kube-scheduler镜像,pullbackoff
background
root@kubernets-master:~# kubectl get po -n tidb-admin
NAME READY STATUS RESTARTS AGE
tidb-controller-manager-7dd5c59f4f-whtf4 1/1 Running 2 99m
tidb-scheduler-5f5958d476-tmdjw 0/2 Running 2 99m
root@kubernets-master:~# kubectl get po -n tidb-admin -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
tidb-controller-manager-7dd5c59f4f-whtf4 1/1 Running 4 27h 10.244.102.120 kubernets-node2 <none> <none>
tidb-scheduler-5f5958d476-tmdjw 0/2 Running 5 27h 10.244.122.212 kubernets-node1 <none> <none>
analyze
可以发现tidb-scheduler 没起来,通过describe观察
root@kubernets-master:~# kubectl get po -n tidb-admin
NAME READY STATUS RESTARTS AGE
tidb-controller-manager-7dd5c59f4f-whtf4 1/1 Running 6 28h
tidb-scheduler-5f5958d476-tmdjw 2/2 Running 6 28h
root@kubernets-master:~# kubectl describe po -n tidb-admin
Name: tidb-controller-manager-7dd5c59f4f-whtf4
Namespace: tidb-admin
Priority: 0
Node: kubernets-node2/192.168.16.136
Start Time: Mon, 09 Mar 2020 19:52:08 +0800
Labels: app.kubernetes.io/component=controller-manager
app.kubernetes.io/instance=tidb-operator
app.kubernetes.io/name=tidb-operator
pod-template-hash=7dd5c59f4f
Annotations: cni.projectcalico.org/podIP: 10.244.102.120/32
Status: Running
IP: 10.244.102.120
IPs:
IP: 10.244.102.120
Controlled By: ReplicaSet/tidb-controller-manager-7dd5c59f4f
Containers:
tidb-operator:
Container ID: docker://c5eb677aa30e4919755ca13924269b5d82d242729a3b13c2314a12cd24274651
Image: pingcap/tidb-operator:v1.1.0-beta.2
Image ID: docker-pullable://pingcap/tidb-operator@sha256:6ae9c87b80e442f13a03d493807db61b0ed753b9d313b91f629fdca4be8efaeb
Port: <none>
Host Port: <none>
Command:
/usr/local/bin/tidb-controller-manager
-tidb-backup-manager-image=pingcap/tidb-backup-manager:v1.1.0-beta.2
-tidb-discovery-image=pingcap/tidb-operator:v1.1.0-beta.2
-cluster-scoped=true
-auto-failover=true
-pd-failover-period=5m
-tikv-failover-period=5m
-tidb-failover-period=5m
-v=2
State: Running
Started: Tue, 10 Mar 2020 23:27:17 +0800
Last State: Terminated
Reason: Error
Exit Code: 255
Started: Tue, 10 Mar 2020 23:11:06 +0800
Finished: Tue, 10 Mar 2020 23:27:14 +0800
Ready: True
Restart Count: 6
Limits:
cpu: 250m
memory: 150Mi
Requests:
cpu: 80m
memory: 50Mi
Environment:
NAMESPACE: tidb-admin (v1:metadata.namespace)
TZ: UTC
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from tidb-controller-manager-token-sb6px (ro)
Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
tidb-controller-manager-token-sb6px:
Type: Secret (a volume populated by a Secret)
SecretName: tidb-controller-manager-token-sb6px
Optional: false
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute for 300s
node.kubernetes.io/unreachable:NoExecute for 300s
Events: <none>
Name: tidb-scheduler-5f5958d476-tmdjw
Namespace: tidb-admin
Priority: 0
Node: kubernets-node1/192.168.16.137
Start Time: Mon, 09 Mar 2020 19:52:08 +0800
Labels: app.kubernetes.io/component=scheduler
app.kubernetes.io/instance=tidb-operator
app.kubernetes.io/name=tidb-operator
pod-template-hash=5f5958d476
Annotations: cni.projectcalico.org/podIP: 10.244.122.212/32
Status: Running
IP: 10.244.122.212
IPs:
IP: 10.244.122.212
Controlled By: ReplicaSet/tidb-scheduler-5f5958d476
Containers:
tidb-scheduler:
Container ID: docker://afb8730e08d95508fcae4e6bc625b732adadb9bb6190d2605e4e5eb3136e0796
Image: pingcap/tidb-operator:v1.1.0-beta.2
Image ID: docker-pullable://pingcap/tidb-operator@sha256:6ae9c87b80e442f13a03d493807db61b0ed753b9d313b91f629fdca4be8efaeb
Port: <none>
Host Port: <none>
Command:
/usr/local/bin/tidb-scheduler
-v=2
-port=10262
State: Running
Started: Tue, 10 Mar 2020 22:58:34 +0800
Last State: Terminated
Reason: Error
Exit Code: 255
Started: Mon, 09 Mar 2020 20:59:19 +0800
Finished: Tue, 10 Mar 2020 22:57:48 +0800
Ready: True
Restart Count: 2
Limits:
cpu: 250m
memory: 150Mi
Requests:
cpu: 80m
memory: 50Mi
Environment: <none>
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from tidb-scheduler-token-kkfb6 (ro)
kube-scheduler:
Container ID: docker://8f3e2fcf82770871660529fbf4927db7726621fb260228b8ce1c4c0ff6c8d93b
Image: registry.aliyuncs.com/google_containers/kube-scheduler:v1.17.0
Image ID: docker-pullable://registry.aliyuncs.com/google_containers/kube-scheduler@sha256:e35a9ec92da008d88fbcf97b5f0945ff52a912ba5c11e7ad641edb8d4668fc1a
Port: <none>
Host Port: <none>
Command:
kube-scheduler
--port=10261
--leader-elect=true
--lock-object-name=tidb-scheduler
--lock-object-namespace=tidb-admin
--scheduler-name=tidb-scheduler
--v=2
--policy-configmap=tidb-scheduler-policy
--policy-configmap-namespace=tidb-admin
State: Running
Started: Tue, 10 Mar 2020 23:11:08 +0800
Last State: Terminated
Reason: Error
Exit Code: 255
Started: Tue, 10 Mar 2020 23:03:47 +0800
Finished: Tue, 10 Mar 2020 23:10:41 +0800
Ready: True
Restart Count: 4
Limits:
cpu: 250m
memory: 150Mi
Requests:
cpu: 80m
memory: 50Mi
Environment: <none>
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from tidb-scheduler-token-kkfb6 (ro)
Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
tidb-scheduler-token-kkfb6:
Type: Secret (a volume populated by a Secret)
SecretName: tidb-scheduler-token-kkfb6
Optional: false
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute for 300s
node.kubernetes.io/unreachable:NoExecute for 300s
Events:
FirstSeen LastSeen Count From SubObjectPath Type Reason Message
--------- -------- ----- ---- ------------- -------- ------ -------
51m 51m 1 kubelet, ubuntu-k8s-3 Normal NodeNotSchedulable Node ubuntu-k8s-3 status is now: NodeNotSchedulable
9d 51m 49428 kubelet, ubuntu-k8s-3 Warning EvictionThresholdMet Attempting to reclaim nodefs
5m 5m 1 kubelet, ubuntu-k8s-3 Normal Starting Starting kubelet.
5m 5m 2 kubelet, ubuntu-k8s-3 Normal NodeHasSufficientDisk Node ubuntu-k8s-3 status is now: NodeHasSufficientDisk
5m 5m 2 kubelet, ubuntu-k8s-3 Normal NodeHasSufficientMemory Node ubuntu-k8s-3 status is now: NodeHasSufficientMemory
5m 5m 2 kubelet, ubuntu-k8s-3 Normal NodeHasNoDiskPressure Node ubuntu-k8s-3 status is now: NodeHasNoDiskPressure
5m 5m 1 kubelet, ubuntu-k8s-3 Normal NodeAllocatableEnforced Updated Node Allocatable limit across pods
5m 5m 1 kubelet, ubuntu-k8s-3 Normal NodeHasDiskPressure Node ubuntu-k8s-3 status is now: NodeHasDiskPressure
5m 14s 23 kubelet, ubuntu-k8s-3 Warning EvictionThresholdMet Attempting to reclaim nodefs
#######events前面类似这样,当时没粘贴信息 但是如下信息保留下来了
Warning Failed 99s (x270 over 65m) kubelet, kubernets-node2 kube-scheduler not fing ImagePullBackOff #这段没记录下来反正大概就是说没找到tidb-scheduler的镜像
Warning Failed 99s (x270 over 65m) kubelet, kubernets-node2 Error: ImagePullBackOff
可以发现 node2节点没找到kube-scheduler镜像,pullbackoff
solve
vi charts/tidb-operator/values.yaml
# 找到第 75 行,k8s.gcr.io/kube-scheduler 修改为如下内容
kubeSchedulerImageName: registry.aliyuncs.com/google_containers/kube-scheduler
原先镜像写的是的:registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler
改成这个镜像:registry.aliyuncs.com/google_containers/kube-scheduler
重新init charts软件包的tidb-operator
helm install charts/tidb-operator --name=tidb-operator --namespace=tidb-admin --version=v1.0.1 &&
网友评论