diff --git a/dashboards/victoriametrics.json b/dashboards/victoriametrics.json index 80ad0446b..de71f1494 100644 --- a/dashboards/victoriametrics.json +++ b/dashboards/victoriametrics.json @@ -6,7 +6,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "8.3.2" + "version": "8.3.5" }, { "type": "panel", @@ -52,12 +52,12 @@ } ] }, - "description": "Overview for cluster VictoriaMetrics v1.70.0 or higher", + "description": "Overview for cluster VictoriaMetrics v1.73.0 or higher", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, - "iteration": 1640161142159, + "iteration": 1644910726761, "links": [ { "icon": "doc", @@ -111,8 +111,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -175,8 +174,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -240,8 +238,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -304,8 +301,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -368,8 +364,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -432,8 +427,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -496,8 +490,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -560,8 +553,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -630,8 +622,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -669,8 +660,7 @@ "mode": "absolute", "steps": [ { - "color": "rgba(245, 54, 54, 0.9)", - "value": null + "color": "rgba(245, 54, 54, 0.9)" }, { "color": "rgba(237, 129, 40, 0.89)", @@ -918,7 +908,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1012,7 +1002,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1106,7 +1096,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1200,7 +1190,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1294,7 +1284,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 1, "points": false, "renderer": "flot", @@ -1402,7 +1392,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1498,7 +1488,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1609,7 +1599,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1692,7 +1682,7 @@ "h": 8, "w": 12, "x": 0, - "y": 3 + "y": 35 }, "hiddenSeries": false, "id": 66, @@ -1716,7 +1706,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1785,7 +1775,7 @@ "h": 8, "w": 12, "x": 12, - "y": 3 + "y": 35 }, "hiddenSeries": false, "id": 138, @@ -1809,7 +1799,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1877,7 +1867,7 @@ "h": 8, "w": 12, "x": 0, - "y": 11 + "y": 43 }, "hiddenSeries": false, "id": 64, @@ -1901,7 +1891,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1957,9 +1947,10 @@ "dashLength": 10, "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds" }, - "description": "Shows average GC duration by instance", + "description": "Shows the CPU usage in the percentage from the limit.", "fieldConfig": { "defaults": { "links": [] @@ -1972,10 +1963,10 @@ "h": 8, "w": 12, "x": 12, - "y": 11 + "y": 43 }, "hiddenSeries": false, - "id": 72, + "id": 146, "legend": { "alignAsTable": true, "avg": true, @@ -1996,7 +1987,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -2006,18 +1997,32 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(go_gc_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance)\n/\nsum(rate(go_gc_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance)", + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "exemplar": false, + "expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance) / process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}", "format": "time_series", "interval": "", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "{{instance}} ({{job}})", "refId": "A" } ], - "thresholds": [], + "thresholds": [ + { + "$$hashKey": "object:195", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.9, + "yaxis": "left" + } + ], "timeRegions": [], - "title": "GC duration ($instance)", + "title": "CPU percentage ($instance)", "tooltip": { "shared": true, "sort": 0, @@ -2031,12 +2036,14 @@ }, "yaxes": [ { - "format": "s", + "$$hashKey": "object:75", + "format": "percentunit", "logBase": 1, "min": "0", "show": true }, { + "$$hashKey": "object:76", "format": "short", "logBase": 1, "show": true @@ -2067,7 +2074,7 @@ "h": 8, "w": 12, "x": 0, - "y": 19 + "y": 51 }, "hiddenSeries": false, "id": 117, @@ -2091,7 +2098,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -2156,6 +2163,196 @@ "align": false } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "description": "Shows average GC duration by instance", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 51 + }, + "hiddenSeries": false, + "id": 72, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(go_gc_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance)\n/\nsum(rate(go_gc_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[5m])) by(job, instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} ({{job}})", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "GC duration ($instance)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 59 + }, + "hiddenSeries": false, + "id": 68, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}} ({{job}})", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Goroutines ($instance)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, { "aliasColors": {}, "bars": false, @@ -2177,7 +2374,7 @@ "h": 8, "w": 12, "x": 12, - "y": 19 + "y": 59 }, "hiddenSeries": false, "id": 122, @@ -2201,7 +2398,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -2286,186 +2483,7 @@ "h": 8, "w": 12, "x": 0, - "y": 27 - }, - "hiddenSeries": false, - "id": 68, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} ({{job}})", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Goroutines ($instance)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "uid": "$ds" - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 27 - }, - "hiddenSeries": false, - "id": 119, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)", - "interval": "", - "legendFormat": "{{instance}} ({{job}})", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "TCP connections ($instance)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "uid": "$ds" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 35 + "y": 67 }, "hiddenSeries": false, "id": 70, @@ -2489,7 +2507,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -2554,7 +2572,91 @@ "h": 8, "w": 12, "x": 12, - "y": 35 + "y": 67 + }, + "hiddenSeries": false, + "id": 119, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(vm_tcplistener_conns{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)", + "interval": "", + "legendFormat": "{{instance}} ({{job}})", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "TCP connections ($instance)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 }, "hiddenSeries": false, "id": 120, @@ -2577,7 +2679,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -6308,7 +6410,7 @@ "type": "row" } ], - "schemaVersion": 33, + "schemaVersion": 34, "style": "dark", "tags": [], "templating": { diff --git a/dashboards/vmagent.json b/dashboards/vmagent.json index 7cfea626d..5465a4fc9 100644 --- a/dashboards/vmagent.json +++ b/dashboards/vmagent.json @@ -6,7 +6,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "8.3.2" + "version": "8.3.5" }, { "type": "panel", @@ -58,12 +58,12 @@ } ] }, - "description": "Overview for VictoriaMetrics vmagent v1.70.0 or higher", + "description": "Overview for VictoriaMetrics vmagent v1.73.0 or higher", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, - "iteration": 1639980687827, + "iteration": 1644908591152, "links": [ { "icon": "doc", @@ -151,7 +151,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "targets": [ { "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})", @@ -215,7 +215,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "targets": [ { "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})", @@ -282,7 +282,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "targets": [ { "expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))", @@ -341,7 +341,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "targets": [ { "expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})", @@ -487,7 +487,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -583,7 +583,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -687,7 +687,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -785,7 +785,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -906,7 +906,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -999,7 +999,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1098,7 +1098,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1196,7 +1196,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1295,7 +1295,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -3613,6 +3613,7 @@ "dashLength": 10, "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds" }, "description": "Shows the CPU usage per vmagent instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.", @@ -3628,7 +3629,7 @@ "h": 8, "w": 12, "x": 0, - "y": 14 + "y": 45 }, "hiddenSeries": false, "id": 35, @@ -3658,21 +3659,47 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "$$hashKey": "object:77", + "alias": "/Limit.*/", + "color": "#F2495C" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "exemplar": false, "expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(instance)", "format": "time_series", + "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "exemplar": false, + "expr": "process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit ({{instance}})", + "refId": "B" } ], "thresholds": [], @@ -3727,7 +3754,7 @@ "h": 8, "w": 12, "x": 12, - "y": 14 + "y": 45 }, "hiddenSeries": false, "id": 37, @@ -3757,7 +3784,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -3834,7 +3861,7 @@ "h": 8, "w": 12, "x": 0, - "y": 22 + "y": 53 }, "hiddenSeries": false, "id": 81, @@ -3858,7 +3885,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -3943,7 +3970,7 @@ "h": 8, "w": 12, "x": 12, - "y": 22 + "y": 53 }, "hiddenSeries": false, "id": 7, @@ -3967,7 +3994,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -4045,7 +4072,7 @@ "h": 8, "w": 12, "x": 0, - "y": 30 + "y": 61 }, "hiddenSeries": false, "id": 83, @@ -4069,7 +4096,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -4153,7 +4180,7 @@ "h": 8, "w": 12, "x": 12, - "y": 30 + "y": 61 }, "hiddenSeries": false, "id": 39, @@ -4177,7 +4204,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -4247,7 +4274,7 @@ "h": 8, "w": 12, "x": 0, - "y": 38 + "y": 69 }, "hiddenSeries": false, "id": 43, @@ -4271,7 +4298,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -4339,7 +4366,7 @@ "h": 8, "w": 12, "x": 12, - "y": 38 + "y": 69 }, "hiddenSeries": false, "id": 41, @@ -4363,7 +4390,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.3.2", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -4418,7 +4445,7 @@ } ], "refresh": false, - "schemaVersion": 33, + "schemaVersion": 34, "style": "dark", "tags": [ "vmagent", @@ -4428,9 +4455,9 @@ "list": [ { "current": { - "selected": true, - "text": "dbaas-test-t3-medium-inst", - "value": "dbaas-test-t3-medium-inst" + "selected": false, + "text": "VictoriaMetrics", + "value": "VictoriaMetrics" }, "hide": 0, "includeAll": false, diff --git a/dashboards/vmalert.json b/dashboards/vmalert.json index e8bceca51..d4cf3ec11 100644 --- a/dashboards/vmalert.json +++ b/dashboards/vmalert.json @@ -5,7 +5,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "8.0.3" + "version": "8.3.5" }, { "type": "panel", @@ -57,12 +57,12 @@ } ] }, - "description": "Overview for VictoriaMetrics vmalert v1.65.0 or higher", + "description": "Overview for VictoriaMetrics vmalert v1.73.0 or higher", "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, - "iteration": 1630393200659, + "iteration": 1644909221704, "links": [ { "asDropdown": false, @@ -101,10 +101,10 @@ "url": " https://github.com/VictoriaMetrics/VictoriaMetrics/releases" } ], + "liveNow": false, "panels": [ { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -117,7 +117,9 @@ "type": "row" }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows if the last configuration update was successful. \"Not Ok\" means there was an unsuccessful attempt to update the configuration due to some error. Check the log for details.", "fieldConfig": { "defaults": { @@ -179,7 +181,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "targets": [ { "exemplar": false, @@ -189,13 +191,13 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Config error", "type": "stat" }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the total number of errors generated by recording/alerting rules for selected instances and groups.", "fieldConfig": { "defaults": { @@ -238,7 +240,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "targets": [ { "exemplar": false, @@ -248,13 +250,13 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Errors", "type": "stat" }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the total number of loaded alerting rules across selected instances and groups.", "fieldConfig": { "defaults": { @@ -293,7 +295,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "targets": [ { "exemplar": false, @@ -303,13 +305,13 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Alerting rules", "type": "stat" }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the total number of loaded recording rules across selected instances and groups.", "fieldConfig": { "defaults": { @@ -348,7 +350,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "targets": [ { "exemplar": false, @@ -358,14 +360,14 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Recording rules", "type": "stat" }, { "columns": [], - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fontSize": "100%", "gridPos": { "h": 7, @@ -374,7 +376,6 @@ "y": 1 }, "id": 2, - "pageSize": null, "scroll": true, "showHeader": true, "sort": { @@ -405,7 +406,6 @@ { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -422,7 +422,6 @@ { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -449,8 +448,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Uptime", "transform": "table", "type": "table-old" @@ -460,7 +457,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -487,7 +486,6 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, "sort": "current", "sortDesc": false, "total": false, @@ -500,7 +498,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -520,9 +518,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Uptime", "tooltip": { "shared": true, @@ -531,9 +527,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -542,20 +536,15 @@ "$$hashKey": "object:170", "decimals": 0, "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:171", - "decimals": null, "format": "short", "label": "", "logBase": 1, - "max": null, - "min": null, "show": true } ], @@ -569,7 +558,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the number of fired alerts by instance.", "fill": 1, "fillGradient": 0, @@ -600,7 +591,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -618,9 +609,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Alerts fired total", "tooltip": { "shared": true, @@ -629,33 +618,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -663,7 +643,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Average evaluation duration by group. Basically means how long it takes to execute all the rules per each group.", "fieldConfig": { "defaults": { @@ -700,7 +682,7 @@ "alertThreshold": false }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -718,9 +700,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Groups avg evaluation duration ($group)", "tooltip": { "shared": true, @@ -729,33 +709,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -763,7 +734,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows how many requests (executions) per second vmalert sends to the configured datasource.", "fill": 0, "fillGradient": 0, @@ -794,7 +767,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -812,9 +785,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Rules execution rate ($instance)", "tooltip": { "shared": true, @@ -823,33 +794,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -857,7 +819,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the error rate while executing configured rules. Non-zero value means there are some issues with existing rules. Check the logs to get more details.", "fill": 1, "fillGradient": 0, @@ -888,7 +852,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -906,9 +870,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Rules execution errors ($instance)", "tooltip": { "shared": true, @@ -917,38 +879,28 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "collapsed": true, - "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -962,7 +914,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the current active (firing) alerting rules per group.", "fill": 0, "fillGradient": 0, @@ -1011,9 +965,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Active ($group)", "tooltip": { "shared": true, @@ -1022,33 +974,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1056,7 +999,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the events when rule execution resulted into an error. Check the logs for more details.", "fill": 0, "fillGradient": 0, @@ -1105,9 +1050,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Errors ($group)", "tooltip": { "shared": true, @@ -1116,33 +1059,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1150,7 +1084,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the current pending alerting rules per group.\nBy pending means the rule which remains active less than configured `for` parameter.", "fill": 0, "fillGradient": 0, @@ -1199,9 +1135,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Pending ($group)", "tooltip": { "shared": true, @@ -1210,33 +1144,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1244,7 +1169,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows how many alerts are sent to Alertmanager per second. Only active alerts are sent.", "fill": 0, "fillGradient": 0, @@ -1293,9 +1220,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Requests rate to Alertmanager ($group)", "tooltip": { "shared": true, @@ -1304,9 +1229,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1314,25 +1237,20 @@ { "$$hashKey": "object:229", "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:230", "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1340,7 +1258,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the error rate for the attempts to send alerts to Alertmanager. If not zero it means there issues on attempt to send notification to Alertmanager and some alerts may be not delivered properly. Check the logs for more details.", "fill": 0, "fillGradient": 0, @@ -1389,9 +1309,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Errors rate to Alertmanager ($group)", "tooltip": { "shared": true, @@ -1400,9 +1318,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1410,25 +1326,20 @@ { "$$hashKey": "object:229", "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:230", "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], @@ -1437,7 +1348,6 @@ }, { "collapsed": true, - "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -1451,7 +1361,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the top 10 recording rules which generate the most of samples. Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.", "fill": 0, "fillGradient": 0, @@ -1500,9 +1412,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Top 10 rules by produced samples ($group)", "tooltip": { "shared": true, @@ -1511,37 +1421,30 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the rules which do not produce any samples during the evaluation. Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.", "fieldConfig": { "defaults": { @@ -1581,8 +1484,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1625,8 +1527,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Rules with 0 produced samples ($group)", "type": "timeseries" }, @@ -1635,7 +1535,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fill": 0, "fillGradient": 0, "gridPos": { @@ -1683,9 +1585,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Errors ($group)", "tooltip": { "shared": true, @@ -1694,33 +1594,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], @@ -1729,7 +1620,6 @@ }, { "collapsed": true, - "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -1743,7 +1633,10 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, "description": "Shows the CPU usage per vmalert instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.", "fieldConfig": { "defaults": { @@ -1757,7 +1650,7 @@ "h": 8, "w": 12, "x": 0, - "y": 4 + "y": 27 }, "hiddenSeries": false, "id": 35, @@ -1787,16 +1680,26 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "$$hashKey": "object:61", + "alias": "/Limit .*/", + "color": "#F2495C" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, "exemplar": false, "expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)", "format": "time_series", @@ -1804,12 +1707,24 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "exemplar": false, + "expr": "process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit ({{instance}})", + "refId": "B" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU ($instance)", "tooltip": { "shared": true, @@ -1818,33 +1733,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1852,7 +1759,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Amount of used memory\n\nResident memory shows share which can be freed by OS when needed.\n\nAnonymous shows share for memory allocated by the process itself. This share cannot be freed by the OS, so it must be taken into account by OOM killer.\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.", "fieldConfig": { "defaults": { @@ -1866,7 +1775,7 @@ "h": 8, "w": 12, "x": 12, - "y": 4 + "y": 27 }, "hiddenSeries": false, "id": 37, @@ -1896,7 +1805,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1922,9 +1831,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory usage ($instance)", "tooltip": { "shared": true, @@ -1933,33 +1840,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1967,7 +1866,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Panel shows the number of open file descriptors in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a", "fieldConfig": { "defaults": { @@ -1981,7 +1882,7 @@ "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 35 }, "hiddenSeries": false, "id": 39, @@ -2005,7 +1906,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -2039,9 +1940,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Open FDs ($instance)", "tooltip": { "shared": true, @@ -2050,9 +1949,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -2060,24 +1957,19 @@ { "decimals": 0, "format": "short", - "label": null, "logBase": 2, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2085,7 +1977,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -2098,7 +1992,7 @@ "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 35 }, "hiddenSeries": false, "id": 41, @@ -2122,7 +2016,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.3", + "pluginVersion": "8.3.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -2141,9 +2035,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Goroutines ($instance)", "tooltip": { "shared": true, @@ -2152,9 +2044,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -2162,24 +2052,18 @@ { "decimals": 0, "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], @@ -2188,7 +2072,7 @@ } ], "refresh": false, - "schemaVersion": 30, + "schemaVersion": 34, "style": "dark", "tags": [ "victoriametrics", @@ -2199,14 +2083,11 @@ { "current": { "selected": false, - "text": "Prometheus", - "value": "Prometheus" + "text": "VictoriaMetrics", + "value": "VictoriaMetrics" }, - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": null, "multi": false, "name": "ds", "options": [], @@ -2218,15 +2099,13 @@ "type": "datasource" }, { - "allValue": null, "current": {}, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "definition": "label_values(vm_app_version{version=~\"^vmalert.*\"}, job)", - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": null, "multi": false, "name": "job", "options": [], @@ -2243,13 +2122,12 @@ { "allValue": ".*", "current": {}, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "definition": "label_values(vm_app_version{job=~\"$job\"}, instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": null, "multi": true, "name": "instance", "options": [], @@ -2266,13 +2144,12 @@ { "allValue": ".*", "current": {}, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "definition": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": null, "multi": true, "name": "group", "options": [], @@ -2296,5 +2173,6 @@ "timezone": "", "title": "vmalert", "uid": "LzldHAVnz", - "version": 1 + "version": 1, + "weekStart": "" } \ No newline at end of file diff --git a/deployment/docker/alerts.yml b/deployment/docker/alerts.yml index cfbccce40..01869b9a7 100644 --- a/deployment/docker/alerts.yml +++ b/deployment/docker/alerts.yml @@ -43,6 +43,16 @@ groups: description: "Too high memory usage may result into multiple issues such as OOMs or degraded performance. Consider to either increase available memory or decrease the load on the process." + - alert: TooHighCPUUsage + expr: rate(process_cpu_seconds_total[5m]) / process_cpu_cores_available > 0.9 + for: 5m + labels: + severity: critical + annotations: + summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m" + description: "Too high CPU usage may be a sign of insufficient resources and make process unstable. + Consider to either increase available CPU resources or decrease the load on the process." + # Alerts group for VM cluster assumes that Grafana dashboard # https://grafana.com/grafana/dashboards/11176 is installed. # Please, update the `dashboard` annotation according to your setup. diff --git a/deployment/docker/prometheus.yml b/deployment/docker/prometheus.yml index e2e79aa51..b40a66374 100644 --- a/deployment/docker/prometheus.yml +++ b/deployment/docker/prometheus.yml @@ -1,6 +1,5 @@ global: scrape_interval: 1s - evaluation_interval: 1s scrape_configs: - job_name: 'vmagent'