Monitoring cluster (#2191)

* dashboards: add `CPU percentage` panel for cluster dashboards

The new panel `CPU percentage` was added instead if adding a limit
to the existing `CPU` panel because dasbhoard may display big number
of components each with own limits. The separate panel should provide
a clear display of CPU load.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* dashboards: sync vmagent and vmalert changes from single version

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* docker: remove unsupported param from vmagent config

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* alerts: add `TooHighCPUUsage` alert for all VM components

Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2022-02-15 11:57:58 +02:00 committed by GitHub
parent 38c73a00db
commit 3458a3d593
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 555 additions and 539 deletions

View file

@ -6,7 +6,7 @@
"type": "grafana", "type": "grafana",
"id": "grafana", "id": "grafana",
"name": "Grafana", "name": "Grafana",
"version": "8.3.2" "version": "8.3.5"
}, },
{ {
"type": "panel", "type": "panel",
@ -52,12 +52,12 @@
} }
] ]
}, },
"description": "Overview for cluster VictoriaMetrics v1.70.0 or higher", "description": "Overview for cluster VictoriaMetrics v1.73.0 or higher",
"editable": true, "editable": true,
"fiscalYearStartMonth": 0, "fiscalYearStartMonth": 0,
"graphTooltip": 0, "graphTooltip": 0,
"id": null, "id": null,
"iteration": 1640161142159, "iteration": 1644910726761,
"links": [ "links": [
{ {
"icon": "doc", "icon": "doc",
@ -111,8 +111,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
} }
] ]
}, },
@ -175,8 +174,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
} }
] ]
}, },
@ -240,8 +238,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
} }
] ]
}, },
@ -304,8 +301,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
} }
] ]
}, },
@ -368,8 +364,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
} }
] ]
}, },
@ -432,8 +427,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
} }
] ]
}, },
@ -496,8 +490,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
} }
] ]
}, },
@ -560,8 +553,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
} }
] ]
}, },
@ -630,8 +622,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green", "color": "green"
"value": null
}, },
{ {
"color": "red", "color": "red",
@ -669,8 +660,7 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "rgba(245, 54, 54, 0.9)", "color": "rgba(245, 54, 54, 0.9)"
"value": null
}, },
{ {
"color": "rgba(237, 129, 40, 0.89)", "color": "rgba(237, 129, 40, 0.89)",
@ -918,7 +908,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1012,7 +1002,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1106,7 +1096,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1200,7 +1190,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1294,7 +1284,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 1, "pointradius": 1,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1402,7 +1392,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1498,7 +1488,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1609,7 +1599,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1692,7 +1682,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 3 "y": 35
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 66, "id": 66,
@ -1716,7 +1706,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.0.0", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1785,7 +1775,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 3 "y": 35
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 138, "id": 138,
@ -1809,7 +1799,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.0.0", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1877,7 +1867,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 11 "y": 43
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 64, "id": 64,
@ -1901,7 +1891,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.0.0", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1957,9 +1947,10 @@
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": { "datasource": {
"type": "prometheus",
"uid": "$ds" "uid": "$ds"
}, },
"description": "Shows average GC duration by instance", "description": "Shows the CPU usage in the percentage from the limit.",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"links": [] "links": []
@ -1972,10 +1963,10 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 11 "y": 43
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 72, "id": 146,
"legend": { "legend": {
"alignAsTable": true, "alignAsTable": true,
"avg": true, "avg": true,
@ -1996,7 +1987,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.0.0", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -2006,18 +1997,32 @@
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"exemplar": true, "datasource": {
"expr": "sum(rate(go_gc_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance)\n/\nsum(rate(go_gc_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance)", "type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
"expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance) / process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series", "format": "time_series",
"interval": "", "interval": "",
"intervalFactor": 2, "intervalFactor": 1,
"legendFormat": "{{instance}} ({{job}})", "legendFormat": "{{instance}} ({{job}})",
"refId": "A" "refId": "A"
} }
], ],
"thresholds": [], "thresholds": [
{
"$$hashKey": "object:195",
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 0.9,
"yaxis": "left"
}
],
"timeRegions": [], "timeRegions": [],
"title": "GC duration ($instance)", "title": "CPU percentage ($instance)",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 0, "sort": 0,
@ -2031,12 +2036,14 @@
}, },
"yaxes": [ "yaxes": [
{ {
"format": "s", "$$hashKey": "object:75",
"format": "percentunit",
"logBase": 1, "logBase": 1,
"min": "0", "min": "0",
"show": true "show": true
}, },
{ {
"$$hashKey": "object:76",
"format": "short", "format": "short",
"logBase": 1, "logBase": 1,
"show": true "show": true
@ -2067,7 +2074,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 19 "y": 51
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 117, "id": 117,
@ -2091,7 +2098,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.0.0", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -2156,6 +2163,196 @@
"align": false "align": false
} }
}, },
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows average GC duration by instance",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 51
},
"hiddenSeries": false,
"id": 72,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(go_gc_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance)\n/\nsum(rate(go_gc_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{instance}} ({{job}})",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "GC duration ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 59
},
"hiddenSeries": false,
"id": 68,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{instance}} ({{job}})",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Goroutines ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{ {
"aliasColors": {}, "aliasColors": {},
"bars": false, "bars": false,
@ -2177,7 +2374,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 19 "y": 59
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 122, "id": 122,
@ -2201,7 +2398,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.0.0", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -2286,186 +2483,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 27 "y": 67
},
"hiddenSeries": false,
"id": 68,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.0",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{instance}} ({{job}})",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Goroutines ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 27
},
"hiddenSeries": false,
"id": 119,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.0",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)",
"interval": "",
"legendFormat": "{{instance}} ({{job}})",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "TCP connections ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 35
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 70, "id": 70,
@ -2489,7 +2507,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.0.0", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -2554,7 +2572,91 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 35 "y": 67
},
"hiddenSeries": false,
"id": 119,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)",
"interval": "",
"legendFormat": "{{instance}} ({{job}})",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "TCP connections ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 75
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 120, "id": 120,
@ -2577,7 +2679,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.0.0", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -6308,7 +6410,7 @@
"type": "row" "type": "row"
} }
], ],
"schemaVersion": 33, "schemaVersion": 34,
"style": "dark", "style": "dark",
"tags": [], "tags": [],
"templating": { "templating": {

View file

@ -6,7 +6,7 @@
"type": "grafana", "type": "grafana",
"id": "grafana", "id": "grafana",
"name": "Grafana", "name": "Grafana",
"version": "8.3.2" "version": "8.3.5"
}, },
{ {
"type": "panel", "type": "panel",
@ -58,12 +58,12 @@
} }
] ]
}, },
"description": "Overview for VictoriaMetrics vmagent v1.70.0 or higher", "description": "Overview for VictoriaMetrics vmagent v1.73.0 or higher",
"editable": true, "editable": true,
"fiscalYearStartMonth": 0, "fiscalYearStartMonth": 0,
"graphTooltip": 1, "graphTooltip": 1,
"id": null, "id": null,
"iteration": 1639980687827, "iteration": 1644908591152,
"links": [ "links": [
{ {
"icon": "doc", "icon": "doc",
@ -151,7 +151,7 @@
"text": {}, "text": {},
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"targets": [ "targets": [
{ {
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})", "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})",
@ -215,7 +215,7 @@
"text": {}, "text": {},
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"targets": [ "targets": [
{ {
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})", "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})",
@ -282,7 +282,7 @@
"text": {}, "text": {},
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"targets": [ "targets": [
{ {
"expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))", "expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))",
@ -341,7 +341,7 @@
"text": {}, "text": {},
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"targets": [ "targets": [
{ {
"expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})", "expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})",
@ -487,7 +487,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -583,7 +583,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -687,7 +687,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -785,7 +785,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -906,7 +906,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -999,7 +999,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1098,7 +1098,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1196,7 +1196,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1295,7 +1295,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -3613,6 +3613,7 @@
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": { "datasource": {
"type": "prometheus",
"uid": "$ds" "uid": "$ds"
}, },
"description": "Shows the CPU usage per vmagent instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.", "description": "Shows the CPU usage per vmagent instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.",
@ -3628,7 +3629,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 14 "y": 45
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 35, "id": 35,
@ -3658,21 +3659,47 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
"seriesOverrides": [], "seriesOverrides": [
{
"$$hashKey": "object:77",
"alias": "/Limit.*/",
"color": "#F2495C"
}
],
"spaceLength": 10, "spaceLength": 10,
"stack": false, "stack": false,
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
"expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(instance)", "expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(instance)",
"format": "time_series", "format": "time_series",
"interval": "",
"intervalFactor": 1, "intervalFactor": 1,
"legendFormat": "{{instance}}", "legendFormat": "{{instance}}",
"refId": "A" "refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
"expr": "process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Limit ({{instance}})",
"refId": "B"
} }
], ],
"thresholds": [], "thresholds": [],
@ -3727,7 +3754,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 14 "y": 45
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 37, "id": 37,
@ -3757,7 +3784,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -3834,7 +3861,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 22 "y": 53
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 81, "id": 81,
@ -3858,7 +3885,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -3943,7 +3970,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 22 "y": 53
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 7, "id": 7,
@ -3967,7 +3994,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -4045,7 +4072,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 30 "y": 61
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 83, "id": 83,
@ -4069,7 +4096,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -4153,7 +4180,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 30 "y": 61
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 39, "id": 39,
@ -4177,7 +4204,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -4247,7 +4274,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 38 "y": 69
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 43, "id": 43,
@ -4271,7 +4298,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -4339,7 +4366,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 38 "y": 69
}, },
"hiddenSeries": false, "hiddenSeries": false,
"id": 41, "id": 41,
@ -4363,7 +4390,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.3.2", "pluginVersion": "8.3.5",
"pointradius": 2, "pointradius": 2,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -4418,7 +4445,7 @@
} }
], ],
"refresh": false, "refresh": false,
"schemaVersion": 33, "schemaVersion": 34,
"style": "dark", "style": "dark",
"tags": [ "tags": [
"vmagent", "vmagent",
@ -4428,9 +4455,9 @@
"list": [ "list": [
{ {
"current": { "current": {
"selected": true, "selected": false,
"text": "dbaas-test-t3-medium-inst", "text": "VictoriaMetrics",
"value": "dbaas-test-t3-medium-inst" "value": "VictoriaMetrics"
}, },
"hide": 0, "hide": 0,
"includeAll": false, "includeAll": false,

File diff suppressed because it is too large Load diff

View file

@ -43,6 +43,16 @@ groups:
description: "Too high memory usage may result into multiple issues such as OOMs or degraded performance. description: "Too high memory usage may result into multiple issues such as OOMs or degraded performance.
Consider to either increase available memory or decrease the load on the process." Consider to either increase available memory or decrease the load on the process."
- alert: TooHighCPUUsage
expr: rate(process_cpu_seconds_total[5m]) / process_cpu_cores_available > 0.9
for: 5m
labels:
severity: critical
annotations:
summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m"
description: "Too high CPU usage may be a sign of insufficient resources and make process unstable.
Consider to either increase available CPU resources or decrease the load on the process."
# Alerts group for VM cluster assumes that Grafana dashboard # Alerts group for VM cluster assumes that Grafana dashboard
# https://grafana.com/grafana/dashboards/11176 is installed. # https://grafana.com/grafana/dashboards/11176 is installed.
# Please, update the `dashboard` annotation according to your setup. # Please, update the `dashboard` annotation according to your setup.

View file

@ -1,6 +1,5 @@
global: global:
scrape_interval: 1s scrape_interval: 1s
evaluation_interval: 1s
scrape_configs: scrape_configs:
- job_name: 'vmagent' - job_name: 'vmagent'