From c47138e1b0137f453b0a01d5621b2169d670599b Mon Sep 17 00:00:00 2001 From: hagen1778 <roman@victoriametrics.com> Date: Thu, 3 Aug 2023 11:14:14 +0200 Subject: [PATCH] dashboards: add panels for absoulte value of mem and cpu usage by vmalert See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4627 Signed-off-by: hagen1778 <roman@victoriametrics.com> --- dashboards/vmalert.json | 318 ++++++++++++++++++++++++++++++++++------ docs/CHANGELOG.md | 1 + 2 files changed, 276 insertions(+), 43 deletions(-) diff --git a/dashboards/vmalert.json b/dashboards/vmalert.json index 966f4407e9..37963202a4 100644 --- a/dashboards/vmalert.json +++ b/dashboards/vmalert.json @@ -6,7 +6,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "9.2.6" + "version": "9.2.7" }, { "type": "datasource", @@ -204,7 +204,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "9.2.6", + "pluginVersion": "9.2.7", "targets": [ { "datasource": { @@ -264,7 +264,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "9.2.6", + "pluginVersion": "9.2.7", "targets": [ { "datasource": { @@ -324,7 +324,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "9.2.6", + "pluginVersion": "9.2.7", "targets": [ { "datasource": { @@ -388,7 +388,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "9.2.6", + "pluginVersion": "9.2.7", "targets": [ { "datasource": { @@ -452,7 +452,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "9.2.6", + "pluginVersion": "9.2.7", "targets": [ { "datasource": { @@ -546,7 +546,7 @@ }, "showHeader": true }, - "pluginVersion": "9.2.6", + "pluginVersion": "9.2.7", "targets": [ { "datasource": { @@ -1182,7 +1182,7 @@ } ] }, - "pluginVersion": "9.2.6", + "pluginVersion": "9.2.7", "targets": [ { "datasource": { @@ -1243,6 +1243,230 @@ }, "id": 43, "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The precentage of used RSS memory\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 37, + "links": [ + { + "targetBlank": true, + "title": "Profiling", + "url": "https://docs.victoriametrics.com/vmagent.html#profiling" + } + ], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Memory usage % ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Amount of used RSS memory\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 57, + "links": [ + { + "targetBlank": true, + "title": "Profiling", + "url": "https://docs.victoriametrics.com/vmagent.html#profiling" + } + ], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n) by(job)", + "interval": "", + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory usage ($instance)", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -1308,7 +1532,7 @@ "h": 8, "w": 12, "x": 0, - "y": 35 + "y": 41 }, "id": 35, "links": [ @@ -1362,7 +1586,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Amount of used memory\n\nResident memory shows share which can be freed by OS when needed.\n\nAnonymous shows share for memory allocated by the process itself. This share cannot be freed by the OS, so it must be taken into account by OOM killer.\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.", + "description": "Shows the max number of CPU cores used by a `job` and the corresponding limit.", "fieldConfig": { "defaults": { "color": { @@ -1414,7 +1638,7 @@ } ] }, - "unit": "percentunit" + "unit": "short" }, "overrides": [] }, @@ -1422,9 +1646,9 @@ "h": 8, "w": 12, "x": 12, - "y": 35 + "y": 41 }, - "id": 37, + "id": 56, "links": [ { "targetBlank": true, @@ -1447,7 +1671,7 @@ }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, "pluginVersion": "9.2.6", @@ -1459,14 +1683,32 @@ }, "editorMode": "code", "exemplar": false, - "expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(job)", + "expr": "max(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", + "format": "time_series", "interval": "", - "legendFormat": "__auto", + "intervalFactor": 1, + "legendFormat": "{{job}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "min(process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}) by(job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "limit ({{job}})", + "range": true, + "refId": "B" } ], - "title": "Memory usage % ($instance)", + "title": "CPU usage ($instance)", "type": "timeseries" }, { @@ -1535,7 +1777,7 @@ "h": 8, "w": 12, "x": 0, - "y": 43 + "y": 49 }, "id": 39, "links": [], @@ -1641,7 +1883,7 @@ "h": 8, "w": 12, "x": 12, - "y": 43 + "y": 49 }, "id": 41, "links": [], @@ -1754,8 +1996,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1857,8 +2098,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1960,8 +2200,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2064,8 +2303,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2164,8 +2402,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2292,8 +2529,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2395,8 +2631,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2497,8 +2732,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2620,8 +2854,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2713,8 +2946,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2776,9 +3008,9 @@ "list": [ { "current": { - "selected": true, - "text": "VictoriaMetrics", - "value": "VictoriaMetrics" + "selected": false, + "text": "VictoriaMetrics - cluster", + "value": "VictoriaMetrics - cluster" }, "hide": 0, "includeAll": false, @@ -2862,7 +3094,7 @@ }, { "current": { - "selected": true, + "selected": false, "text": "5", "value": "5" }, diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 86152d893f..c11ce37a3c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -33,6 +33,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): do not add `/api/v1/read` suffix to remote read storage address defined by `--remote-read-src-addr` if a `--remote-read-disable-path-append` command-line flag is set. It allows an overriding path for remote-read API via `--remote-read-src-addr`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4655). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add warning in query field of vmui for partial data responses. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4721). * FEATURE: [Official Grafana dashboards for VictoriaMetrics](https://grafana.com/orgs/victoriametrics): add `Concurrent inserts` panel to vmagent's dasbhoard. The new panel supposed to show whether the number of concurrent inserts processed by vmagent isn't reaching the limit. +* FEATURE: [Official Grafana dashboards for VictoriaMetrics](https://grafana.com/orgs/victoriametrics): add panels for absolute Mem and CPU usage by vmalert. See related issue [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4627). * FEATURE: [Alerting rules for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts): `ConcurrentFlushesHitTheLimit` alerting rule was moved from [single-server](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts.yml) and [cluster](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-cluster.yml) alerts to the [list of "health" alerts](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-health.yml) as it could be related to many VictoriaMetrics components. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): use local scrape timestamps for the scraped metrics unless `honor_timestamps: true` option is explicitly set at [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs). This fixes gaps for metrics collected from [cadvisor](https://github.com/google/cadvisor) or similar exporters, which export metrics with invalid timestamps. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4697) and [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4697#issuecomment-1654614799) for details. The issue has been introduced in [v1.68.0](#v1680).