1、监控指标
- 集群监控
节点资源利用率、节点数、运行pods - Pod监控
容器指标(占用多少cpu,多少内存、繁忙/空闲)、应用程序(状态)
2、监控平台
prometheus+Grafana
(1)prometheus - 开源的
- 监控、告警、数据库
- 以HTTP协议周期性抓取被监控组件状态
- 不需要复杂的集成过程,使用HTTP接口接入就可以
(2)Grafana - 开源的数据分析和可视化工具
- 支持多种数据源
[root@hadoop102 ~]# mkdir pgmonitor
[root@hadoop102 ~]# cd pgmonitor
[root@hadoop102 pgmonitor]# git clone -b release-0.5 https://gitee.com/lonelyZhe/kube-prometheus.git
[root@hadoop102 pgmonitor]# cd kube-prometheus/
[root@hadoop102 kube-prometheus]# kubectl create -f manifests/setup
[root@hadoop102 kube-prometheus]# kubectl create -f manifests/
[root@hadoop102 kube-prometheus]# kubectl create -f manifests/grafana/
[root@hadoop102 kube-prometheus]# kubectl create -f manifests/alertmanager/
[root@hadoop102 kube-prometheus]# kubectl get pods -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-main-0 2/2 Running 0 10m
alertmanager-main-1 2/2 Running 0 9m58s
alertmanager-main-2 2/2 Running 0 9m57s
grafana-5c55845445-nxx6s 1/1 Running 0 11m
kube-state-metrics-bdb8874fd-8wg9m 3/3 Running 0 14m
node-exporter-fpm8m 2/2 Running 0 14m
node-exporter-kdtvr 2/2 Running 0 14m
node-exporter-nkzl7 2/2 Running 0 14m
prometheus-adapter-5949969998-22jld 1/1 Running 0 14m
prometheus-k8s-0 3/3 Running 1 13m
prometheus-k8s-1 3/3 Running 1 13m
prometheus-operator-574fd8ccd9-8z2ns 2/2 Running 0 14m
[root@hadoop102 kube-prometheus]# kubectl get svc -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager-main ClusterIP 10.98.56.171 <none> 9093/TCP 12m
alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 12m
grafana ClusterIP 10.108.153.175 <none> 3000/TCP 14m
kube-state-metrics ClusterIP None <none> 8443/TCP,9443/TCP 15m
node-exporter ClusterIP None <none> 9100/TCP 15m
prometheus-adapter ClusterIP 10.105.59.54 <none> 443/TCP 15m
prometheus-k8s ClusterIP 10.105.64.22 <none> 9090/TCP 15m
prometheus-operated ClusterIP None <none> 9090/TCP 15m
prometheus-operator ClusterIP None <none> 8443/TCP 16m
[root@hadoop102 kube-prometheus]# kubectl edit svc -n monitoring grafana
# Please edit the object below. Lines beginning with a '#' will be ignored,
# and an empty file will abort the edit. If an error occurs while saving this file will be
# reopened with the relevant failures.
#
apiVersion: v1
kind: Service
metadata:
creationTimestamp: "2023-05-05T02:34:45Z"
labels:
app: grafana
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:labels:
.: {}
f:app: {}
f:spec:
f:ports:
.: {}
k:{"port":3000,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
f:selector:
.: {}
f:app: {}
f:sessionAffinity: {}
f:type: {}
manager: kubectl
operation: Update
time: "2023-05-05T02:34:45Z"
name: grafana
namespace: monitoring
resourceVersion: "53012"
selfLink: /api/v1/namespaces/monitoring/services/grafana
uid: 2389706e-b7fc-44d6-9f56-c7f21acdbd82
spec:
clusterIP: 10.108.153.175
ports:
- name: http
port: 3000
protocol: TCP
targetPort: http
selector:
app: grafana
sessionAffinity: None
type: NodePort
status:
loadBalancer: {}
"/tmp/kubectl-edit-q28r8.yaml" 53L, 1236C written
service/grafana edited
[root@hadoop102 kube-prometheus]# kubectl get svc -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager-main ClusterIP 10.98.56.171 <none> 9093/TCP 16m
alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 16m
grafana NodePort 10.108.153.175 <none> 3000:30991/TCP 18m
kube-state-metrics ClusterIP None <none> 8443/TCP,9443/TCP 19m
node-exporter ClusterIP None <none> 9100/TCP 19m
prometheus-adapter ClusterIP 10.105.59.54 <none> 443/TCP 19m
prometheus-k8s ClusterIP 10.105.64.22 <none> 9090/TCP 19m
prometheus-operated ClusterIP None <none> 9090/TCP 19m
prometheus-operator ClusterIP None <none> 8443/TCP 20m
[root@hadoop102 kube-prometheus]# kubectl get svc -n monitoring -o wide
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
alertmanager-main ClusterIP 10.98.56.171 <none> 9093/TCP 17m alertmanager=main,app=alertmanager
alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 17m app=alertmanager
grafana NodePort 10.108.153.175 <none> 3000:30991/TCP 19m app=grafana
kube-state-metrics ClusterIP None <none> 8443/TCP,9443/TCP 20m app.kubernetes.io/name=kube-state-metrics
node-exporter ClusterIP None <none> 9100/TCP 20m app.kubernetes.io/name=node-exporter,app.kubernetes.io/version=v0.18.1
prometheus-adapter ClusterIP 10.105.59.54 <none> 443/TCP 20m name=prometheus-adapter
prometheus-k8s ClusterIP 10.105.64.22 <none> 9090/TCP 20m app=prometheus,prometheus=k8s
prometheus-operated ClusterIP None <none> 9090/TCP 20m app=prometheus
prometheus-operator ClusterIP None <none> 8443/TCP 21m app.kubernetes.io/component=controller,app.kubernetes.io/name=prometheus-operator
访问http://192.168.100.102:30991
192.168.100.102是k8s宿主机的IP
默认用户名admin密码admin
[root@hadoop102 kube-prometheus]# kubectl edit svc -n monitoring prometheus-k8s
# Please edit the object below. Lines beginning with a '#' will be ignored,
# and an empty file will abort the edit. If an error occurs while saving this file will be
# reopened with the relevant failures.
#
apiVersion: v1
kind: Service
metadata:
creationTimestamp: "2023-05-05T02:33:20Z"
labels:
prometheus: k8s
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:labels:
.: {}
f:prometheus: {}
f:spec:
f:ports:
.: {}
k:{"port":9090,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
f:selector:
.: {}
f:app: {}
f:prometheus: {}
f:sessionAffinity: {}
f:sessionAffinityConfig:
.: {}
f:clientIP:
.: {}
f:timeoutSeconds: {}
f:type: {}
manager: kubectl
operation: Update
time: "2023-05-05T02:33:19Z"
name: prometheus-k8s
namespace: monitoring
resourceVersion: "52713"
selfLink: /api/v1/namespaces/monitoring/services/prometheus-k8s
uid: 2ca86618-b4cb-4c81-914b-d3b0aa60c98b
spec:
clusterIP: 10.105.64.22
ports:
- name: web
port: 9090
protocol: TCP
targetPort: web
selector:
app: prometheus
prometheus: k8s
sessionAffinity: ClientIP
sessionAffinityConfig:
clientIP:
timeoutSeconds: 10800
type: NodePort
status:
loadBalancer: {}
"/tmp/kubectl-edit-l6gph.yaml" 63L, 1499C written
service/prometheus-k8s edited
网友评论