mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
Monitoring single (#2190)
* dashboards: plot cpu limits for vmagent, vmalert and vm-single dashboards Signed-off-by: hagen1778 <roman@victoriametrics.com> * alerts: add `TooHighCPUUsage` alert for all VM components Signed-off-by: hagen1778 <roman@victoriametrics.com> * dashboards: bump components version requirements Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
0d3e00e512
commit
e29b2b8444
4 changed files with 890 additions and 949 deletions
File diff suppressed because it is too large
Load diff
|
@ -6,7 +6,7 @@
|
||||||
"type": "grafana",
|
"type": "grafana",
|
||||||
"id": "grafana",
|
"id": "grafana",
|
||||||
"name": "Grafana",
|
"name": "Grafana",
|
||||||
"version": "8.3.2"
|
"version": "8.3.5"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "panel",
|
"type": "panel",
|
||||||
|
@ -58,12 +58,12 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"description": "Overview for VictoriaMetrics vmagent v1.70.0 or higher",
|
"description": "Overview for VictoriaMetrics vmagent v1.73.0 or higher",
|
||||||
"editable": true,
|
"editable": true,
|
||||||
"fiscalYearStartMonth": 0,
|
"fiscalYearStartMonth": 0,
|
||||||
"graphTooltip": 1,
|
"graphTooltip": 1,
|
||||||
"id": null,
|
"id": null,
|
||||||
"iteration": 1639980687827,
|
"iteration": 1644908591152,
|
||||||
"links": [
|
"links": [
|
||||||
{
|
{
|
||||||
"icon": "doc",
|
"icon": "doc",
|
||||||
|
@ -151,7 +151,7 @@
|
||||||
"text": {},
|
"text": {},
|
||||||
"textMode": "auto"
|
"textMode": "auto"
|
||||||
},
|
},
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})",
|
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})",
|
||||||
|
@ -215,7 +215,7 @@
|
||||||
"text": {},
|
"text": {},
|
||||||
"textMode": "auto"
|
"textMode": "auto"
|
||||||
},
|
},
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})",
|
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})",
|
||||||
|
@ -282,7 +282,7 @@
|
||||||
"text": {},
|
"text": {},
|
||||||
"textMode": "auto"
|
"textMode": "auto"
|
||||||
},
|
},
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))",
|
"expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))",
|
||||||
|
@ -341,7 +341,7 @@
|
||||||
"text": {},
|
"text": {},
|
||||||
"textMode": "auto"
|
"textMode": "auto"
|
||||||
},
|
},
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})",
|
"expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})",
|
||||||
|
@ -487,7 +487,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -583,7 +583,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -687,7 +687,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -785,7 +785,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -906,7 +906,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -999,7 +999,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -1098,7 +1098,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -1196,7 +1196,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -1295,7 +1295,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -3613,6 +3613,7 @@
|
||||||
"dashLength": 10,
|
"dashLength": 10,
|
||||||
"dashes": false,
|
"dashes": false,
|
||||||
"datasource": {
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
"uid": "$ds"
|
"uid": "$ds"
|
||||||
},
|
},
|
||||||
"description": "Shows the CPU usage per vmagent instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.",
|
"description": "Shows the CPU usage per vmagent instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.",
|
||||||
|
@ -3628,7 +3629,7 @@
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 14
|
"y": 45
|
||||||
},
|
},
|
||||||
"hiddenSeries": false,
|
"hiddenSeries": false,
|
||||||
"id": 35,
|
"id": 35,
|
||||||
|
@ -3658,21 +3659,47 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
"seriesOverrides": [],
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"$$hashKey": "object:77",
|
||||||
|
"alias": "/Limit.*/",
|
||||||
|
"color": "#F2495C"
|
||||||
|
}
|
||||||
|
],
|
||||||
"spaceLength": 10,
|
"spaceLength": 10,
|
||||||
"stack": false,
|
"stack": false,
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "$ds"
|
||||||
|
},
|
||||||
|
"exemplar": false,
|
||||||
"expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(instance)",
|
"expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(instance)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
|
"interval": "",
|
||||||
"intervalFactor": 1,
|
"intervalFactor": 1,
|
||||||
"legendFormat": "{{instance}}",
|
"legendFormat": "{{instance}}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "$ds"
|
||||||
|
},
|
||||||
|
"exemplar": false,
|
||||||
|
"expr": "process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}",
|
||||||
|
"format": "time_series",
|
||||||
|
"hide": false,
|
||||||
|
"interval": "",
|
||||||
|
"intervalFactor": 1,
|
||||||
|
"legendFormat": "Limit ({{instance}})",
|
||||||
|
"refId": "B"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": [],
|
"thresholds": [],
|
||||||
|
@ -3727,7 +3754,7 @@
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 12,
|
"x": 12,
|
||||||
"y": 14
|
"y": 45
|
||||||
},
|
},
|
||||||
"hiddenSeries": false,
|
"hiddenSeries": false,
|
||||||
"id": 37,
|
"id": 37,
|
||||||
|
@ -3757,7 +3784,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -3834,7 +3861,7 @@
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 22
|
"y": 53
|
||||||
},
|
},
|
||||||
"hiddenSeries": false,
|
"hiddenSeries": false,
|
||||||
"id": 81,
|
"id": 81,
|
||||||
|
@ -3858,7 +3885,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -3943,7 +3970,7 @@
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 12,
|
"x": 12,
|
||||||
"y": 22
|
"y": 53
|
||||||
},
|
},
|
||||||
"hiddenSeries": false,
|
"hiddenSeries": false,
|
||||||
"id": 7,
|
"id": 7,
|
||||||
|
@ -3967,7 +3994,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -4045,7 +4072,7 @@
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 30
|
"y": 61
|
||||||
},
|
},
|
||||||
"hiddenSeries": false,
|
"hiddenSeries": false,
|
||||||
"id": 83,
|
"id": 83,
|
||||||
|
@ -4069,7 +4096,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -4153,7 +4180,7 @@
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 12,
|
"x": 12,
|
||||||
"y": 30
|
"y": 61
|
||||||
},
|
},
|
||||||
"hiddenSeries": false,
|
"hiddenSeries": false,
|
||||||
"id": 39,
|
"id": 39,
|
||||||
|
@ -4177,7 +4204,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -4247,7 +4274,7 @@
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 38
|
"y": 69
|
||||||
},
|
},
|
||||||
"hiddenSeries": false,
|
"hiddenSeries": false,
|
||||||
"id": 43,
|
"id": 43,
|
||||||
|
@ -4271,7 +4298,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -4339,7 +4366,7 @@
|
||||||
"h": 8,
|
"h": 8,
|
||||||
"w": 12,
|
"w": 12,
|
||||||
"x": 12,
|
"x": 12,
|
||||||
"y": 38
|
"y": 69
|
||||||
},
|
},
|
||||||
"hiddenSeries": false,
|
"hiddenSeries": false,
|
||||||
"id": 41,
|
"id": 41,
|
||||||
|
@ -4363,7 +4390,7 @@
|
||||||
"alertThreshold": true
|
"alertThreshold": true
|
||||||
},
|
},
|
||||||
"percentage": false,
|
"percentage": false,
|
||||||
"pluginVersion": "8.3.2",
|
"pluginVersion": "8.3.5",
|
||||||
"pointradius": 2,
|
"pointradius": 2,
|
||||||
"points": false,
|
"points": false,
|
||||||
"renderer": "flot",
|
"renderer": "flot",
|
||||||
|
@ -4418,7 +4445,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"refresh": false,
|
"refresh": false,
|
||||||
"schemaVersion": 33,
|
"schemaVersion": 34,
|
||||||
"style": "dark",
|
"style": "dark",
|
||||||
"tags": [
|
"tags": [
|
||||||
"vmagent",
|
"vmagent",
|
||||||
|
@ -4428,9 +4455,9 @@
|
||||||
"list": [
|
"list": [
|
||||||
{
|
{
|
||||||
"current": {
|
"current": {
|
||||||
"selected": true,
|
"selected": false,
|
||||||
"text": "dbaas-test-t3-medium-inst",
|
"text": "VictoriaMetrics",
|
||||||
"value": "dbaas-test-t3-medium-inst"
|
"value": "VictoriaMetrics"
|
||||||
},
|
},
|
||||||
"hide": 0,
|
"hide": 0,
|
||||||
"includeAll": false,
|
"includeAll": false,
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -44,6 +44,17 @@ groups:
|
||||||
description: "Too high memory usage may result into multiple issues such as OOMs or degraded performance.
|
description: "Too high memory usage may result into multiple issues such as OOMs or degraded performance.
|
||||||
Consider to either increase available memory or decrease the load on the process."
|
Consider to either increase available memory or decrease the load on the process."
|
||||||
|
|
||||||
|
- alert: TooHighCPUUsage
|
||||||
|
expr: rate(process_cpu_seconds_total[5m]) / process_cpu_cores_available > 0.9
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m"
|
||||||
|
description: "Too high CPU usage may be a sign of insufficient resources and make process unstable.
|
||||||
|
Consider to either increase available CPU resources or decrease the load on the process."
|
||||||
|
|
||||||
|
|
||||||
# Alerts group for VM single assumes that Grafana dashboard
|
# Alerts group for VM single assumes that Grafana dashboard
|
||||||
# https://grafana.com/grafana/dashboards/10229 is installed.
|
# https://grafana.com/grafana/dashboards/10229 is installed.
|
||||||
# Pls update the `dashboard` annotation according to your setup.
|
# Pls update the `dashboard` annotation according to your setup.
|
||||||
|
|
Loading…
Reference in a new issue