Prometheus 监控非 K8S 集群节点
k8s 中 helm 部署的 operator-prometheus 配合原生自带的 servicemonitor 和 promerules 非常好用,这篇文章中教学的是,监控非 k8s 集群资源,以及手写 exporter 监控二进制启动的服务
一、宿主机工作
1. 启动 node-exporter
为了方便省事我直接使用 docker 启动
[root@pro-dcim-01 ~]# docker run -dit --name node-exporter \
--restart=always \
-p 9100:9100 \
-v "/proc:/host/proc:ro" \
-v "/sys:/host/sys:ro" \
-v "/:/host/root:ro" \
--net="host" \
quay.io/prometheus/node-exporter:v1.2.0 \
--path.procfs /host/proc \
--path.sysfs /host/sys \
--path.rootfs /host/root \
--web.listen-address=0.0.0.0:9100 \
--collector.filesystem.ignored-mount-points "^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)" \
--collector.filesystem.ignored-fs-types "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"查看运行状态
[root@pro-dcim-01 ~]# docker ps |grep node-exporter
bd2a6629826e quay.io/prometheus/node-exporter:v1.2.0 "/bin/node_exporter …" 32 hours ago Up 27 hours node-exporter2. 启动 service-exporter
二进制命令:service-exporter.zip
此服务使用来监控 systemd 管理启动的二进制服务
[root@pro-dcim-01 ~]# cat /etc/systemd/system/service-exporter.service
[Unit]
Description=Service Exporter for Prometheus
After=network.target
[Service]
User=root
Group=root
ExecStart=/usr/local/bin/service-exporter --server=service-exporter,redis,mysqld,vminsert,vmselect,vmstorage,emqx
Restart=on-failure
Environment=GODEBUG=netdns=cgo
CPUAccounting=yes
MemoryAccounting=yes
[Install]
WantedBy=multi-user.target查看运行状态
[root@pro-dcim-01 ~]# systemctl status service-exporter.service
● service-exporter.service - Service Exporter for Prometheus
Loaded: loaded (/etc/systemd/system/service-exporter.service; enabled; vendor preset: disabled)
Active: active (running) since Fri 2025-06-13 17:13:35 CST; 1 day 1h ago
Main PID: 16568 (service-exporte)
Tasks: 22
Memory: 31.0M
CGroup: /system.slice/service-exporter.service
└─16568 /usr/local/bin/service-exporter --server=service-exporter,redis,mysqld,vminsert,vmselect,vmstorage,emqx
Jun 13 17:13:35 pro-dcim-01 systemd[1]: Started Service Exporter for Prometheus.
Jun 13 17:13:35 pro-dcim-01 service-exporter[16568]: 2025/06/13 17:13:35 监控以下服务: [service-exporter redis mysqld vminsert vmselect vmstorage emqx]
Jun 13 17:13:35 pro-dcim-01 service-exporter[16568]: 2025/06/13 17:13:35 Exporter 已启动,监听端口 9105二、k8s 端工作
为了让 operator-prometheus 服务自动发现 agent ,所以要进行以下工作
1. 创建 node-exporter 的 service
$ cat pro-dcim-01-svc-node-exporter.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app: pro-dcim-01-node-exporter
name: pro-dcim-01-node-exporter
namespace: monitoring
spec:
ports:
- name: pro-dcim-01-node-exporter
port: 9100
protocol: TCP
targetPort: 9100
type: ClusterIP$ kubectl apply -f pro-dcim-01-svc-node-exporter.yaml2. 创建 node-exporter 的 endpoints 资源
$ cat pro-dcim-01-endpoints-node-exporter.yaml
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: pro-dcim-01-node-exporter
name: pro-dcim-01-node-exporter
namespace: monitoring
subsets:
- addresses:
- ip: 192.168.198.31
ports:
- name: pro-dcim-01-node-exporter
port: 9100
protocol: TCP$ kubectl apply -f pro-dcim-01-endpoints-node-exporter.yaml3. 创建 service-exporter 的资源
$ cat pro-dcim-01-svc-service-exporter.yaml
apiVersion: v1
kind: Service
metadata:
labels:
app: pro-dcim-01-service-exporter
name: pro-dcim-01-service-exporter
namespace: monitoring
spec:
ports:
- name: pro-dcim-01-service-exporter
port: 9105
protocol: TCP
targetPort: 9105
type: ClusterIP$ kubectl apply -f pro-dcim-01-svc-service-exporter.yaml 4. 创建 service-exporter 的 endpoints 资源
$ cat pro-dcim-01-endpoints-service-exporter.yaml
apiVersion: v1
kind: Endpoints
metadata:
labels:
app: pro-dcim-01-service-exporter
name: pro-dcim-01-service-exporter
namespace: monitoring
subsets:
- addresses:
- ip: 192.168.198.31
ports:
- name: pro-dcim-01-service-exporter
port: 9105
protocol: TCP$ kubectl apply -f pro-dcim-01-endpoints-service-exporter.yaml 5. 创建 node-exporter 服务自动发现
$ cat pro-dcim-01-node-exporter-monitoring.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: pro-dcim-01-node-exporter
namespace: monitoring
labels:
release: kube-prometheus-stack # 确保与 Prometheus 的配置匹配
spec:
selector:
matchLabels:
app: pro-dcim-01-node-exporter # 查找标签为 pro-dcim-01-node-exporter 的 service
endpoints:
- port: pro-dcim-01-node-exporter # 查找 port 名称为 pro-dcim-01-node-exporter endpoints
interval: 15s
path: /metrics
relabelings:
- sourceLabels: [__address__]
regex: '([^:]+):\d+'
targetLabel: instance
replacement: '${1}'
action: replace
# 静态标签
- targetLabel: region
replacement: "边缘机器"
- targetLabel: site
replacement: "北京-世纪互联-M6"
- targetLabel: role
replacement: "跳板机"
- targetLabel: nodename
replacement: "dcim-01"
namespaceSelector:
matchNames:
- monitoring$ kubectl apply -f pro-dcim-01-node-exporter-monitoring.yaml6. 创建 service-exporter 服务自动发现
$ cat pro-dcim-01-service-exporter-monitoring.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: pro-dcim-01-service-exporter
namespace: monitoring
labels:
release: kube-prometheus-stack # 确保与 Prometheus 的配置匹配
spec:
selector:
matchLabels:
app: pro-dcim-01-service-exporter # 查找标签为 pro-dcim-01-service-exporter 的 service
endpoints:
- port: pro-dcim-01-service-exporter # 查找port名称为 pro-dcim-01-service-exporter endpoints
interval: 15s
path: /metrics
relabelings:
- sourceLabels: [__address__]
regex: '([^:]+):\d+'
targetLabel: instance
replacement: '${1}'
action: replace
# 静态标签
- targetLabel: region
replacement: "边缘机器"
- targetLabel: site
replacement: "北京-世纪互联-M6"
- targetLabel: role
replacement: "跳板机"
- targetLabel: nodename
replacement: "dcim-01"
namespaceSelector:
matchNames:
- monitoring$ kubectl apply -f pro-dcim-01-service-exporter-monitoring.yaml7. 检查 prometheus 页面

8. 告警规则
$ cat bastion-node-service-rules.yaml
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
release: kube-prometheus-stack
app: prometheus-operator
name: edge-bastion-rules
namespace: monitoring
spec:
groups:
# ==================== 系统级监控规则 ====================
- name: edge-bastion-system-rules
rules:
- alert: 跳板机服务器宕机
expr: up{role="跳板机"} == 0
for: 5m
labels:
severity: "edge-bastion"
annotations:
summary: "[边缘机器] 跳板机节点失联 ({{ $labels.nodename }})"
description: |
边缘跳板机节点不可达!
位置:{{ $labels.site }}
节点:{{ $labels.nodename }} ({{ $labels.instance }})
离线时长:5分钟以上
- alert: 跳板机服务器CPU使用率高
expr: 100 - (avg by(instance, nodename) (irate(node_cpu_seconds_total{role="跳板机",mode="idle"}[5m])) * 100) > 80
for: 10m
labels:
severity: "edge-bastion"
annotations:
summary: "[边缘机器] CPU负载过高 ({{ $labels.nodename }})"
description: |
跳板机CPU持续高负载!
当前使用率:{{ $value | printf "%.2f" }}%
阈值:>80%
持续时间:10分钟
- alert: 跳板机服务器内存不足
expr: (node_memory_MemAvailable_bytes{role="跳板机"} / node_memory_MemTotal_bytes{role="跳板机"}) * 100 < 15
for: 10m
labels:
severity: "edge-bastion"
annotations:
summary: "[边缘机器] 内存不足 (dcim-01)"
description: |
可用内存低于安全阈值!
当前可用:{{ $value | printf "%.2f" }}%
阈值:<15%
建议检查:内存泄漏或进程异常
# ==================== 网络监控规则 ====================
- name: edge-bastion-network-rules
rules:
- alert: 跳板机服务器网卡入站流量高
expr: rate(node_network_receive_bytes_total{role="跳板机",device!~"lo|veth.*"}[1m]) / 1024 / 1024 > 20 # 超过20MB/s入站流量
for: 5m
labels:
severity: "edge-bastion"
annotations:
summary: "[边缘机器] 入站流量过高 ({{ $labels.nodename }}@{{ $labels.device }})"
description: |
网络入站流量异常!
网卡:{{ $labels.device }}
当前速率:{{ $value | printf "%.2f" }} MB/s
阈值:>20 MB/s
可能原因:异常上传或网络攻击
- alert: 跳板机服务器出站流量高
expr: rate(node_network_transmit_bytes_total{role="跳板机",device!~"lo|veth.*"}[1m]) / 1024 / 1024 > 20 # 超过20MB/s出站流量
for: 5m
labels:
severity: "edge-bastion"
annotations:
summary: "[边缘机器] 出站流量过高 ({{ $labels.nodename }}@{{ $labels.device }})"
description: |
网络出站流量异常!
网卡:{{ $labels.device }}
当前速率:{{ $value | printf "%.2f" }} MB/s
阈值:>20 MB/s
可能原因:异常下载或数据泄露
- alert: 跳板机服务器流量异常
expr: rate(node_network_receive_errs_total{role="跳板机",device!~"lo|veth.*"}[5m]) + rate(node_network_transmit_errs_total{role="跳板机"}[5m]) > 10
for: 0m
labels:
severity: "edge-bastion"
annotations:
summary: "[边缘机器] 网络错误激增 ({{ $labels.nodename }})"
description: |
网络错误包数异常!
5 分钟错误数:{{ $value }}
阈值:>10个
可能原因:网卡故障或网络拥塞
# ==================== systemd 服务监控规则 ====================
- name: edge-bastion-service-rules
rules:
- alert: 跳板机服务器服务发生关闭
expr: systemd_service_status{role="跳板机"} == 0
for: 2m
labels:
severity: "edge-bastion"
annotations:
summary: "[边缘机器] 关键服务停止 ({{ $labels.exported_service }})"
description: |
核心服务异常停止运行!
服务名称:{{ $labels.exported_service }}
节点:{{ $labels.nodename }}
停止时长:2分钟以上$ kubectl apply -f bastion-node-service-rules.yaml 9. prometheusAlert 添加模板


模板如下:
{{ $alertmanagerURL := "http://192.168.233.32:32093" -}}
{{ $alerts := .alerts -}}
{{ range $alert := $alerts -}}
{{ $groupKey := printf "%s|%s" $alert.labels.alertname $alert.status -}}
{{ $urimsg := "" -}}
{{ range $key,$value := $alert.labels -}}
{{ $urimsg = print $urimsg $key "%3D%22" $value "%22%2C" -}}
{{ end -}}
{{ $grafanaURL := printf "http://192.168.233.32:32030/d/0p4iw4EHk/bian-yuan-tiao-ban-ji-zhuang-tai-xin-xi?orgId=1&refresh=30s&var-datasource=Prometheus&var-instance=%s" $alert.labels.instance }}
{{ if eq $alert.status "resolved" -}}
🟢 边缘服务器恢复通知 🟢
{{ else -}}
🚨 边缘服务器告警通知 🚨
{{ end -}}
---
🔧 **服务监控详情** {{ if eq $alert.status "resolved" }}✅{{ else }}❗{{ end }}
🖥️ **节点名称**: {{ $alert.labels.nodename }}
🌐 **实例地址**: {{ $alert.labels.instance }}
📍 **所在位置**: {{ $alert.labels.site }}
⚠️ **告警级别**: {{ $alert.labels.severity }}
🕒 **开始时间**: {{ GetCSTtime $alert.startsAt }}
{{ if eq $alert.status "resolved" }}🕒 **结束时间**: {{ GetCSTtime $alert.endsAt }}
{{ end }}
{{ if $alert.labels.device }}🔌 **网卡名称**: **{{ $alert.labels.device }}**
{{ end }}
{{ if $alert.labels.exported_service }}🛎️ **服务名称**: {{ $alert.labels.exported_service }}
{{ end }}
---
📝 **告警描述**
{{ $alert.annotations.description }}
🚀 **快速操作**
- **[点我屏蔽该告警]({{ $alertmanagerURL }}/#/silences/new?filter=%7B{{ SplitString $urimsg 0 -3 }}%7D)**
- **[点击我查看 Grafana 监控面板]({{ $grafanaURL }})**
---
🛠️ **处理建议**
{{ if or (eq $alert.labels.alertname "跳板机服务器出站流量高") (eq $alert.labels.alertname "跳板机服务器网卡入站流量高") }}
1. 流量分析工具:
- 实时流量: nload -m {{ $alert.labels.device }}
- 连接跟踪: nethogs {{ $alert.labels.device }}
{{ else if eq $alert.labels.alertname "跳板机服务器流量异常" }}
1. 网络错误诊断:
- 错误统计: ip -s link show {{ $alert.labels.device }}
- 驱动检测: ethtool {{ $alert.labels.device }}
{{ else if eq $alert.labels.alertname "跳板机服务器服务发生关闭" }}
1. 服务检查:
- 日志查看: journalctl -u {{ $alert.labels.exported_service }} -n 50
- 状态验证: systemctl status {{ $alert.labels.exported_service }}
{{ else if eq $alert.labels.alertname "跳板机服务器宕机" }}
1. 基础检查:
- 连通性测试: ping -c 4 {{ $alert.labels.instance }}
- 端口检测: nc -zv {{ $alert.labels.instance }} 9100
{{ else }}
1. 通用诊断:
- 系统负载: top -n 1 -b | head -20
- 磁盘空间: df -hT
- 检查内存: free -h
{{ end }}
{{ end }}10. helm 包修改
routes:
- match:
severity: critical # 匹配严重级别 (critical) 的告警
receiver: critical-alerts # 严重告警发送到 critical-alerts 接收器
group_interval: 5m # 严重告警每 5 分钟可以发送一组
repeat_interval: 1h # 严重告警每 1 小时重复提醒一次
- match:
severity: warning # 匹配警告级别 (warning) 的告警
receiver: warning-alerts # 警告告警发送到 warning-alerts 接收器
repeat_interval: 4h # 警告告警每 4 小时重复提醒一次
- match:
severity: info # 匹配信息级别 (info) 的告警
receiver: info-alerts # 信息告警发送到 info-alerts 接收器
repeat_interval: 1h # 信息告警每 2 小时重复提醒一次
#### 边缘跳板机告警推送 ####
- match:
severity: edge-bastion # 匹配级别 (edge-bastion) 的告警
receiver: edge-bastion # 严重告警发送到 edge-bastion 接收器
group_interval: 5m # 严重告警每 5 分钟可以发送一组
repeat_interval: 1h # 严重告警每 1 小时重复提醒一次
inhibit_rules:
# 定义告警抑制规则,当较高优先级的告警存在时抑制较低优先级的告警
- source_match:
severity: 'critical' # 来源是严重级别的告警
target_match:
severity: 'warning' # 目标是警告级别的告警
equal: ['alertname', 'namespace', 'instance'] # 如果告警名称、命名空间和实例相同,则抑制警告级别的告警
- source_match:
severity: 'warning' # 来源是警告级别的告警
target_match:
severity: 'info' # 目标是信息级别的告警
equal: ['alertname', 'namespace', 'instance'] # 如果告警名称、命名空间和实例相同,则抑制信息级别的告警
receivers:
- name: edge-bastion # 接收器,然后发送到飞书上面去
webhook_configs:
- url: http://webhook-service:8080/prometheusalert?type=fs&tpl=edge-bastion&fsurl=https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx-xxxxxxxxxxx-xxxxxxxx更新 helm
$ helm -n monitoring upgrade --install kube-prometheus-stack ./ -f values.yaml11. 查看告警消息



12. grafana 面板
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 1,
"id": 75,
"iteration": 1750749873112,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 0
},
"hiddenSeries": false,
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "(\n (1 - rate(node_cpu_seconds_total{role=\"跳板机\", mode=\"idle\", instance=\"$instance\"}[$__rate_interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{role=\"跳板机\", mode=\"idle\", instance=\"$instance\"})\n)\n",
"format": "time_series",
"intervalFactor": 5,
"legendFormat": "{{cpu}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "CPU 使用率",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
},
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"hiddenSeries": false,
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "node_load1{role=\"跳板机\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "1m 系统负载",
"refId": "A"
},
{
"expr": "node_load5{role=\"跳板机\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5m 系统负载",
"refId": "B"
},
{
"expr": "node_load15{role=\"跳板机\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "15m 系统负载",
"refId": "C"
},
{
"expr": "count(node_cpu_seconds_total{role=\"跳板机\", instance=\"$instance\", mode=\"idle\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "logical cores",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "系统负载",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 18,
"x": 0,
"y": 7
},
"hiddenSeries": false,
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "(\n node_memory_MemTotal_bytes{role=\"跳板机\", instance=\"$instance\"}\n-\n node_memory_MemFree_bytes{role=\"跳板机\", instance=\"$instance\"}\n-\n node_memory_Buffers_bytes{role=\"跳板机\", instance=\"$instance\"}\n-\n node_memory_Cached_bytes{role=\"跳板机\", instance=\"$instance\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory used",
"refId": "A"
},
{
"expr": "node_memory_Buffers_bytes{role=\"跳板机\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory buffers",
"refId": "B"
},
{
"expr": "node_memory_Cached_bytes{role=\"跳板机\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory cached",
"refId": "C"
},
{
"expr": "node_memory_MemFree_bytes{role=\"跳板机\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory free",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "内存使用率",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"cacheTimeout": null,
"datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(50, 172, 45, 0.97)",
"value": null
},
{
"color": "rgba(237, 129, 40, 0.89)",
"value": 80
},
{
"color": "rgba(245, 54, 54, 0.9)",
"value": 90
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 7
},
"id": 5,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"text": {}
},
"pluginVersion": "8.0.5",
"targets": [
{
"expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{role=\"跳板机\", instance=\"$instance\"})\n/\n avg(node_memory_MemTotal_bytes{role=\"跳板机\", instance=\"$instance\"})\n* 100\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A"
}
],
"title": "内存使用率",
"type": "gauge"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 14
},
"hiddenSeries": false,
"id": 6,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "/ read| written/",
"yaxis": 1
},
{
"alias": "/ io time/",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_disk_read_bytes_total{role=\"跳板机\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}} read",
"refId": "A"
},
{
"expr": "rate(node_disk_written_bytes_total{role=\"跳板机\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}} written",
"refId": "B"
},
{
"expr": "rate(node_disk_io_time_seconds_total{role=\"跳板机\", instance=\"$instance\", device=~\"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}} io time",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "磁盘 I/O",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 14
},
"hiddenSeries": false,
"id": 7,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "used",
"color": "#E0B400"
},
{
"alias": "available",
"color": "#73BF69"
}
],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{role=\"跳板机\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{role=\"跳板机\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "used",
"refId": "A"
},
{
"expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{role=\"跳板机\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "available",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "磁盘空间使用情况",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 21
},
"hiddenSeries": false,
"id": 8,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_network_receive_bytes_total{role=\"跳板机\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "入站网络负载",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 21
},
"hiddenSeries": false,
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_network_transmit_bytes_total{role=\"跳板机\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "出站网络负载",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 28
},
"hiddenSeries": false,
"id": 10,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "systemd_service_status{role=\"跳板机\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{exported_service}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "系统服务状态",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": "状态 (1=运行, 0=未运行)",
"logBase": 1,
"max": 1,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": 1,
"min": 0,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "30s",
"schemaVersion": 30,
"style": "dark",
"tags": [
"node-exporter-mixin"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Prometheus",
"value": "Prometheus"
},
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": null,
"current": {
"selected": false,
"text": "192.168.198.31",
"value": "192.168.198.31"
},
"datasource": "$datasource",
"definition": "",
"description": "选择跳板机节点",
"error": null,
"hide": 0,
"includeAll": false,
"label": "节点IP",
"multi": false,
"name": "instance",
"options": [],
"query": {
"query": "label_values(node_exporter_build_info{role=\"跳板机\"}, instance)",
"refId": "Prometheus-instance-Variable-Query"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "utc",
"title": "边缘跳板机状态信息",
"uid": "zhentianxiang",
"version": 1
}效果截图

License:
CC BY 4.0