diff --git a/dashboards/vmagent.json b/dashboards/vmagent.json index ce6071ce9..3e1fb5494 100644 --- a/dashboards/vmagent.json +++ b/dashboards/vmagent.json @@ -1,11 +1,12 @@ { "__inputs": [], + "__elements": [], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "8.2.0" + "version": "8.3.2" }, { "type": "panel", @@ -57,13 +58,12 @@ } ] }, - "description": "Overview for VictoriaMetrics vmagent v1.64.0 or higher", + "description": "Overview for VictoriaMetrics vmagent v1.70.0 or higher", "editable": true, "fiscalYearStartMonth": 0, - "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1634561115384, + "iteration": 1639980687827, "links": [ { "icon": "doc", @@ -95,7 +95,9 @@ "panels": [ { "collapsed": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "gridPos": { "h": 1, "w": 24, @@ -108,7 +110,9 @@ "type": "row" }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows total number of all configured scrape targets in state \"up\".\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n", "fieldConfig": { "defaults": { @@ -147,7 +151,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "targets": [ { "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})", @@ -156,13 +160,13 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Scrape targets up", "type": "stat" }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows total number of all configured scrape targets in state \"down\".\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n", "fieldConfig": { "defaults": { @@ -211,7 +215,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "targets": [ { "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})", @@ -220,13 +224,13 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Scrape targets down", "type": "stat" }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows number of generated error messages in logs over last 30m. Non-zero value may be a sign of connectivity or missconfiguration errors.", "fieldConfig": { "defaults": { @@ -278,7 +282,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "targets": [ { "expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))", @@ -287,13 +291,13 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Log errors (30m)", "type": "stat" }, { - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Persistent queue size shows size of pending samples in bytes which hasn't been flushed to remote storage yet. \nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.", "fieldConfig": { "defaults": { @@ -337,7 +341,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "targets": [ { "expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})", @@ -346,14 +350,14 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Persistent queue size", "type": "stat" }, { "columns": [], - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fontSize": "100%", "gridPos": { "h": 7, @@ -362,7 +366,6 @@ "y": 1 }, "id": 11, - "pageSize": null, "scroll": true, "showHeader": true, "sort": { @@ -393,7 +396,6 @@ { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -410,7 +412,6 @@ { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -436,8 +437,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Uptime", "transform": "table", "type": "table-old" @@ -447,7 +446,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -474,7 +475,6 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, "sort": "current", "sortDesc": false, "total": false, @@ -487,7 +487,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -506,9 +506,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Uptime", "tooltip": { "shared": true, @@ -517,9 +515,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -527,19 +523,14 @@ { "decimals": 0, "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { - "decimals": null, "format": "short", "label": "", "logBase": 1, - "max": null, - "min": null, "show": true } ], @@ -553,7 +544,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows in/out samples rate including push and pull models. \n\nThe out-rate could be different to in-rate because of replication or additional timeseries added by vmagent for every scraped target.", "fieldConfig": { "defaults": { @@ -590,7 +583,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -620,9 +613,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Samples rate ($instance)", "tooltip": { "shared": true, @@ -631,33 +622,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -665,7 +647,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the rate of requests served by vmagent HTTP server.", "fieldConfig": { "defaults": { @@ -703,7 +687,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -720,9 +704,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Requests rate ($instance)", "tooltip": { "shared": true, @@ -731,33 +713,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "none", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -765,15 +739,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", - "description": "Network usage shows the bytes rate for data accepted by vmagent and pushed via remotewrite protocol.\nDiscrepancies are possible because of different protocols used for ingesting, scraping and writing data.", + "datasource": { + "uid": "$ds" + }, + "description": "Errors rate shows rate for multiple metrics that track possible errors in vmagent, such as network or parsing errors.", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, - "fill": 6, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, @@ -782,14 +758,13 @@ "y": 16 }, "hiddenSeries": false, - "id": 7, + "id": 69, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, - "rightSide": false, "show": true, "sort": "current", "sortDesc": true, @@ -798,77 +773,87 @@ }, "lines": true, "linewidth": 1, - "nullPointMode": "null", + "links": [ + { + "targetBlank": true, + "title": "Troubleshooting", + "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + } + ], + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "out", - "transform": "negative-Y" - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8\n+ sum(rate(vm_promscrape_conn_bytes_read_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8", + "expr": "sum(rate(vmagent_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(protocol)", "interval": "", - "legendFormat": "in", + "legendFormat": "{{protocol}} (request)", "refId": "A" }, { - "expr": "sum(rate(vmagent_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8", + "expr": "sum(rate(vm_protoparser_read_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(type)", "interval": "", - "legendFormat": "out", + "legendFormat": "{{type}} (parse)", "refId": "B" + }, + { + "expr": "sum(rate(vm_ingestserver_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(type)", + "interval": "", + "legendFormat": "{{type}} (ingest)", + "refId": "C" + }, + { + "expr": "sum(rate(vm_protoparser_unmarshal_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(type)", + "interval": "", + "legendFormat": "{{type}} (unmarshal)", + "refId": "D" + }, + { + "expr": "sum(rate(vm_promscrape_dial_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval]))", + "interval": "", + "legendFormat": "scrape dial", + "refId": "E" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, - "title": "Network usage ($instance)", + "title": "Errors rate ($instance)", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "format": "bps", - "label": null, + "format": "short", "logBase": 1, - "max": null, - "min": null, + "min": "0", "show": true }, { - "format": "bytes", - "label": null, + "format": "none", "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -876,7 +861,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the persistent queue size of pending samples in bytes which hasn't been flushed to remote storage yet. \n\nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.", "fieldConfig": { "defaults": { @@ -919,7 +906,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -937,9 +924,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Persistent queue size ($instance) to ($url)", "tooltip": { "shared": true, @@ -948,33 +933,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -982,8 +959,10 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", - "description": "Shows rate of dropped samples from persistent queue. VMagent drops samples from queue if in-memory and on-disk queues are full and it is unable to flush them to remote storage.\nThe max size of on-disk queue is configured by `-remoteWrite.maxDiskUsagePerURL` flag.", + "datasource": { + "uid": "$ds" + }, + "description": "Shows the rate of dropped data blocks in cases when remote storage replies with `400 Bad Request` and `409 Conflict` HTTP responses.\n\nSee https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149", "fieldConfig": { "defaults": { "links": [] @@ -999,6 +978,99 @@ "y": 24 }, "hiddenSeries": false, + "id": 79, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(vmagent_remotewrite_packets_dropped_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__interval])) by(url)", + "interval": "", + "legendFormat": "{{url}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Data blocks dropped ($instance) to ($url)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "bytes", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "description": "Shows rate of dropped samples from persistent queue. VMagent drops samples from queue if in-memory and on-disk queues are full and it is unable to flush them to remote storage.\nThe max size of on-disk queue is configured by `-remoteWrite.maxDiskUsagePerURL` flag.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "hiddenSeries": false, "id": 49, "legend": { "alignAsTable": true, @@ -1026,7 +1098,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -1043,9 +1115,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Persistent queue dropped rate ($instance)", "tooltip": { "shared": true, @@ -1054,33 +1124,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1088,137 +1150,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", - "description": "Errors rate shows rate for multiple metrics that track possible errors in vmagent, such as network or parsing errors.", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] + "datasource": { + "uid": "$ds" }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 24 - }, - "hiddenSeries": false, - "id": 69, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - { - "targetBlank": true, - "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" - } - ], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(vmagent_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(protocol)", - "interval": "", - "legendFormat": "{{protocol}} (request)", - "refId": "A" - }, - { - "expr": "sum(rate(vm_protoparser_read_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(type)", - "interval": "", - "legendFormat": "{{type}} (parse)", - "refId": "B" - }, - { - "expr": "sum(rate(vm_ingestserver_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(type)", - "interval": "", - "legendFormat": "{{type}} (ingest)", - "refId": "C" - }, - { - "expr": "sum(rate(vm_protoparser_unmarshal_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(type)", - "interval": "", - "legendFormat": "{{type}} (unmarshal)", - "refId": "D" - }, - { - "expr": "sum(rate(vm_promscrape_dial_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval]))", - "interval": "", - "legendFormat": "scrape dial", - "refId": "E" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Errors rate ($instance)", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", "description": "Shows the rate of dropped samples due to relabeling. \nMetric tracks drops for `-remoteWrite.relabelConfig` configuration only.", "fieldConfig": { "defaults": { @@ -1262,7 +1196,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -1286,9 +1220,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Rows dropped by relabeling ($instance) to ($url)", "tooltip": { "shared": true, @@ -1297,33 +1229,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1331,108 +1255,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", - "description": "Shows the rate of dropped data blocks in cases when remote storage replies with `400 Bad Request` and `409 Conflict` HTTP responses.\n\nSee https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] + "datasource": { + "uid": "$ds" }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "hiddenSeries": false, - "id": 79, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(rate(vmagent_remotewrite_packets_dropped_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__interval])) by(url)", - "interval": "", - "legendFormat": "{{url}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Data blocks dropped ($instance) to ($url)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", "description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.", "fieldConfig": { "defaults": { @@ -1446,7 +1271,7 @@ "h": 8, "w": 12, "x": 12, - "y": 40 + "y": 32 }, "hiddenSeries": false, "id": 86, @@ -1470,7 +1295,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -1491,9 +1316,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Logging rate ($instance)", "tooltip": { "shared": true, @@ -1502,44 +1325,38 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "decimals": null, "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "collapsed": true, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 48 + "y": 40 }, "id": 28, "panels": [ @@ -1548,7 +1365,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -1561,7 +1380,7 @@ "h": 7, "w": 12, "x": 0, - "y": 41 + "y": 10 }, "hiddenSeries": false, "id": 48, @@ -1584,7 +1403,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -1603,9 +1422,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Scrape targets UP", "tooltip": { "shared": true, @@ -1614,33 +1431,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1648,7 +1457,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -1661,7 +1472,7 @@ "h": 7, "w": 12, "x": 12, - "y": 41 + "y": 10 }, "hiddenSeries": false, "id": 76, @@ -1684,7 +1495,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -1703,9 +1514,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Scrape targets DOWN", "tooltip": { "shared": true, @@ -1714,33 +1523,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1748,7 +1549,10 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -1761,7 +1565,7 @@ "h": 8, "w": 12, "x": 0, - "y": 48 + "y": 17 }, "hiddenSeries": false, "id": 20, @@ -1784,7 +1588,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -1799,12 +1603,21 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${DS_DBAAS-TEST-T3-MEDIUM-INST}" + }, + "exemplar": true, "expr": "sum(rate(vm_promscrape_scrapes_total{job=~\"$job\", instance=~\"$instance\"}[$__interval]))", "interval": "", "legendFormat": "scrapes", "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "${DS_DBAAS-TEST-T3-MEDIUM-INST}" + }, "expr": "sum(rate(vm_promscrape_scraped_samples_sum{job=~\"$job\", instance=~\"$instance\"}[$__interval]))", "interval": "", "legendFormat": "samples", @@ -1812,9 +1625,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Scrape rate ($instance)", "tooltip": { "shared": true, @@ -1823,33 +1634,26 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1857,7 +1661,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -1870,7 +1676,7 @@ "h": 8, "w": 12, "x": 12, - "y": 48 + "y": 17 }, "hiddenSeries": false, "id": 31, @@ -1893,7 +1699,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -1928,9 +1734,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Scrape fails ($instance)", "tooltip": { "shared": true, @@ -1939,33 +1743,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1973,7 +1769,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -1986,7 +1784,7 @@ "h": 8, "w": 12, "x": 0, - "y": 56 + "y": 25 }, "hiddenSeries": false, "id": 46, @@ -2009,7 +1807,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2033,9 +1831,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Scrape response size ($instance)", "tooltip": { "shared": true, @@ -2044,40 +1840,29 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cards": { - "cardPadding": null, - "cardRound": null - }, + "cards": {}, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", @@ -2086,13 +1871,15 @@ "mode": "spectrum" }, "dataFormat": "tsbuckets", - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "works in vm only disclaimer", "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 56 + "y": 25 }, "heatmap": {}, "hideZeroBuckets": false, @@ -2112,8 +1899,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Scrape duration ($instance)", "tooltip": { "show": true, @@ -2123,20 +1908,13 @@ "xAxis": { "show": true }, - "xBucketNumber": null, - "xBucketSize": null, "yAxis": { "decimals": 2, "format": "s", "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null + "yBucketBound": "auto" } ], "title": "Scraping", @@ -2144,12 +1922,14 @@ }, { "collapsed": true, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 49 + "y": 41 }, "id": 71, "panels": [ @@ -2158,7 +1938,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the rate of write requests served by ingestserver (UDP, TCP connections) and HTTP server.", "fieldConfig": { "defaults": { @@ -2172,7 +1954,7 @@ "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 11 }, "hiddenSeries": false, "id": 73, @@ -2196,7 +1978,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2221,9 +2003,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Requests rate ($instance)", "tooltip": { "shared": true, @@ -2232,33 +2012,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "none", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2266,7 +2038,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the rate of write errors in ingestserver (UDP, TCP connections) and HTTP server.", "fieldConfig": { "defaults": { @@ -2280,7 +2054,7 @@ "h": 8, "w": 12, "x": 12, - "y": 42 + "y": 11 }, "hiddenSeries": false, "id": 77, @@ -2304,7 +2078,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2329,9 +2103,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Error rate ($instance)", "tooltip": { "shared": true, @@ -2340,33 +2112,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "none", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2374,7 +2138,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the rate of parsed rows from write or scrape requests.", "fieldConfig": { "defaults": { @@ -2388,7 +2154,7 @@ "h": 8, "w": 12, "x": 0, - "y": 50 + "y": 19 }, "hiddenSeries": false, "id": 78, @@ -2412,7 +2178,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2430,9 +2196,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Rows rate ($instance)", "tooltip": { "shared": true, @@ -2441,33 +2205,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "none", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2475,7 +2231,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Tracks the rate of dropped invalid rows because of errors while unmarshaling write requests. The exact errors messages will be printed in logs.", "fieldConfig": { "defaults": { @@ -2489,7 +2247,7 @@ "h": 8, "w": 12, "x": 12, - "y": 50 + "y": 19 }, "hiddenSeries": false, "id": 50, @@ -2512,7 +2270,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.1.2", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2530,9 +2288,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Invalid rows rate ($instance)", "tooltip": { "shared": true, @@ -2541,33 +2297,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], @@ -2576,12 +2324,14 @@ }, { "collapsed": true, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 50 + "y": 42 }, "id": 58, "panels": [ @@ -2590,7 +2340,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the rate of requests to configured remote write endpoints by url and status code.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.\n\n", "fieldConfig": { "defaults": { @@ -2604,7 +2356,7 @@ "h": 8, "w": 12, "x": 0, - "y": 51 + "y": 12 }, "hiddenSeries": false, "id": 60, @@ -2627,7 +2379,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2645,9 +2397,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Requests rate ($instance) to ($url)", "tooltip": { "shared": true, @@ -2656,9 +2406,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -2666,24 +2414,18 @@ { "decimals": 2, "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2691,7 +2433,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the global rate for number of written bytes via remote write connections.", "fieldConfig": { "defaults": { @@ -2705,7 +2449,7 @@ "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 12 }, "hiddenSeries": false, "id": 66, @@ -2728,7 +2472,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2746,9 +2490,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Bytes write rate ($instance)", "tooltip": { "shared": true, @@ -2757,33 +2499,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2791,7 +2525,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows requests retry rate by url. Number of retries is unlimited but protected with delays up to 1m between attempts.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.\n\n", "fieldConfig": { "defaults": { @@ -2805,7 +2541,7 @@ "h": 8, "w": 12, "x": 0, - "y": 59 + "y": 20 }, "hiddenSeries": false, "id": 61, @@ -2828,7 +2564,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2846,9 +2582,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Retry rate ($instance) to ($url)", "tooltip": { "shared": true, @@ -2857,33 +2591,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2891,7 +2617,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows current number of established connections to remote write endpoints.\n\n", "fieldConfig": { "defaults": { @@ -2905,7 +2633,7 @@ "h": 8, "w": 12, "x": 12, - "y": 59 + "y": 20 }, "hiddenSeries": false, "id": 65, @@ -2928,7 +2656,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -2946,9 +2674,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Connections ($instance)", "tooltip": { "shared": true, @@ -2957,40 +2683,29 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cards": { - "cardPadding": null, - "cardRound": null - }, + "cards": {}, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", @@ -2999,13 +2714,15 @@ "mode": "spectrum" }, "dataFormat": "tsbuckets", - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the remote write request duration distribution in seconds. Value depends on block size, network quality and remote storage performance.", "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 67 + "y": 28 }, "heatmap": {}, "hideZeroBuckets": false, @@ -3026,8 +2743,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Push duration ($instance) to ($url)", "tooltip": { "show": true, @@ -3037,27 +2752,22 @@ "xAxis": { "show": true }, - "xBucketNumber": null, - "xBucketSize": null, "yAxis": { "decimals": 2, "format": "s", "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null + "yBucketBound": "auto" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows saturation of every connection to remote storage. If the threshold of 0.9sec is reached, then the connection is saturated by more than 90% and vmagent won't be able to keep up. This usually means that `-remoteWrite.queues` command-line flag must be increased in order to increase the number of connections per each remote storage.\n", "fieldConfig": { "defaults": { @@ -3071,7 +2781,7 @@ "h": 8, "w": 12, "x": 12, - "y": 67 + "y": 28 }, "hiddenSeries": false, "id": 84, @@ -3094,7 +2804,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -3122,9 +2832,7 @@ "yaxis": "left" } ], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Remote write connection saturation ($instance)", "tooltip": { "shared": true, @@ -3133,9 +2841,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3143,32 +2849,23 @@ { "$$hashKey": "object:662", "format": "s", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:663", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cards": { - "cardPadding": null, - "cardRound": null - }, + "cards": {}, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", @@ -3177,13 +2874,15 @@ "mode": "spectrum" }, "dataFormat": "tsbuckets", - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the remote write request block size distribution in rows.", "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 75 + "y": 36 }, "heatmap": {}, "hideZeroBuckets": false, @@ -3204,8 +2903,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Block size rows ($instance)", "tooltip": { "show": true, @@ -3215,26 +2912,16 @@ "xAxis": { "show": true }, - "xBucketNumber": null, - "xBucketSize": null, "yAxis": { "decimals": 2, "format": "short", "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null + "yBucketBound": "auto" }, { - "cards": { - "cardPadding": null, - "cardRound": null - }, + "cards": {}, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", @@ -3243,13 +2930,15 @@ "mode": "spectrum" }, "dataFormat": "tsbuckets", - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the remote write request block size distribution in bytes.", "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 75 + "y": 36 }, "heatmap": {}, "hideZeroBuckets": false, @@ -3269,8 +2958,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Block size bytes ($instance)", "tooltip": { "show": true, @@ -3280,27 +2967,21 @@ "xAxis": { "show": true }, - "xBucketNumber": null, - "xBucketSize": null, "yAxis": { - "decimals": null, "format": "bytes", "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null + "yBucketBound": "auto" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the current limit usage of unique series over an hourly period. Vmagent will start to drop series once the limit is reached.\n\nPlease note, panel will be blank if `remoteWrite.maxHourlySeries` is not set.", "fill": 1, "fillGradient": 0, @@ -3308,7 +2989,7 @@ "h": 8, "w": 12, "x": 0, - "y": 83 + "y": 44 }, "hiddenSeries": false, "id": 88, @@ -3328,7 +3009,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -3364,9 +3045,7 @@ "yaxis": "left" } ], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Hourly series limit", "tooltip": { "shared": true, @@ -3375,9 +3054,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3385,25 +3062,19 @@ { "$$hashKey": "object:216", "format": "percent", - "label": null, "logBase": 1, "max": "100", - "min": null, "show": true }, { "$$hashKey": "object:217", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3411,7 +3082,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the current limit usage of unique series over a daily period. Vmagent will start to drop series once the limit is reached.\n\nPlease note, panel will be blank if `remoteWrite.maxDailySeries` is not set.", "fill": 1, "fillGradient": 0, @@ -3419,7 +3092,7 @@ "h": 8, "w": 12, "x": 12, - "y": 83 + "y": 44 }, "hiddenSeries": false, "id": 90, @@ -3439,7 +3112,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.2.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -3475,9 +3148,7 @@ "yaxis": "left" } ], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Daily series limit", "tooltip": { "shared": true, @@ -3486,9 +3157,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3496,25 +3165,19 @@ { "$$hashKey": "object:216", "format": "percent", - "label": null, "logBase": 1, "max": "100", - "min": null, "show": true }, { "$$hashKey": "object:217", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], @@ -3523,12 +3186,424 @@ }, { "collapsed": true, - "datasource": "$ds", "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 51 + "y": 43 + }, + "id": 94, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "description": "Shows top 5 job by the number of new series registered by vmagent over the 5min range. These jobs generate the most of the churn rate.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 5 + }, + "hiddenSeries": false, + "id": 92, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$ds" + }, + "exemplar": false, + "expr": "topk(5, sum(sum_over_time(scrape_series_added[5m])) by (job))", + "interval": "", + "legendFormat": "{{ job }}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Top 5 jobs by unique samples", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "description": "Shows top 5 instances by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 5 + }, + "hiddenSeries": false, + "id": 95, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$ds" + }, + "exemplar": false, + "expr": "topk(5, sum(sum_over_time(scrape_series_added[5m])) by (instance))", + "interval": "", + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Top 5 instances by unique samples", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows saturation persistent queue for writes. If the threshold of 0.9sec is reached, then persistent is saturated by more than 90% and vmagent won't be able to keep up with flushing data on disk. In this case, consider to decrease load on the vmagent or improve the disk throughput.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 98, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_DBAAS-TEST-T3-MEDIUM-INST}" + }, + "exemplar": true, + "expr": "sum(rate(vm_persistentqueue_write_duration_seconds_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "$$hashKey": "object:683", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.9, + "yaxis": "left" + } + ], + "timeRegions": [], + "title": "Persistent queue write saturation ($instance)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:662", + "format": "s", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:663", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows saturation persistent queue for reads. If the threshold of 0.9sec is reached, then persistent is saturated by more than 90% and vmagent won't be able to keep up with reading data from the disk. In this case, consider to decrease load on the vmagent or improve the disk throughput.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 13 + }, + "hiddenSeries": false, + "id": 99, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_DBAAS-TEST-T3-MEDIUM-INST}" + }, + "exemplar": true, + "expr": "sum(rate(vm_persistentqueue_read_duration_seconds_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by (instance)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "$$hashKey": "object:683", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.9, + "yaxis": "left" + } + ], + "timeRegions": [], + "title": "Persistent queue read saturation ($instance)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:662", + "format": "s", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:663", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Troubleshooting", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "uid": "$ds" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 }, "id": 45, "panels": [ @@ -3537,7 +3612,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Shows the CPU usage per vmagent instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.", "fieldConfig": { "defaults": { @@ -3551,7 +3628,7 @@ "h": 8, "w": 12, "x": 0, - "y": 5 + "y": 14 }, "hiddenSeries": false, "id": 35, @@ -3581,7 +3658,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -3599,9 +3676,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU ($instance)", "tooltip": { "shared": true, @@ -3610,33 +3685,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3644,7 +3711,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "description": "Amount of used memory\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.", "fieldConfig": { "defaults": { @@ -3658,7 +3727,7 @@ "h": 8, "w": 12, "x": 12, - "y": 5 + "y": 14 }, "hiddenSeries": false, "id": 37, @@ -3688,7 +3757,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -3714,9 +3783,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory usage ($instance)", "tooltip": { "shared": true, @@ -3725,33 +3792,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3759,225 +3818,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", - "description": "Panel shows the number of open file descriptors in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] + "datasource": { + "uid": "$ds" }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 13 - }, - "hiddenSeries": false, - "id": 83, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "max", - "color": "#C4162A" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(process_open_fds{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "open", - "refId": "A" - }, - { - "expr": "min(process_max_fds{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "max", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Open FDs ($instance)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 2, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 13 - }, - "hiddenSeries": false, - "id": 39, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.0.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Goroutines ($instance)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", "description": "Shows the number of bytes read/write from the storage layer when vmagent has to buffer data on disk or read already buffered data.", "fieldConfig": { "defaults": { @@ -3991,7 +3834,7 @@ "h": 8, "w": 12, "x": 0, - "y": 21 + "y": 22 }, "hiddenSeries": false, "id": 81, @@ -4015,7 +3858,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -4049,9 +3892,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk writes/reads ($instance)", "tooltip": { "shared": true, @@ -4060,34 +3901,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "decimals": null, "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4095,7 +3927,112 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, + "description": "Network usage shows the bytes rate for data accepted by vmagent and pushed via remotewrite protocol.\nDiscrepancies are possible because of different protocols used for ingesting, scraping and writing data.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 6, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "out", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8\n+ sum(rate(vm_promscrape_conn_bytes_read_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8", + "interval": "", + "legendFormat": "in", + "refId": "A" + }, + { + "expr": "sum(rate(vmagent_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8", + "interval": "", + "legendFormat": "out", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network usage ($instance)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "logBase": 1, + "show": true + }, + { + "format": "bytes", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "description": "Panel shows the number of open file descriptors in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a", "fieldConfig": { "defaults": { "links": [] @@ -4107,11 +4044,11 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 30 }, "hiddenSeries": false, - "id": 41, + "id": 83, "legend": { "alignAsTable": true, "avg": true, @@ -4132,7 +4069,115 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "max", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(process_open_fds{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "open", + "refId": "A" + }, + { + "expr": "min(process_max_fds{job=~\"$job\", instance=~\"$instance\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "max", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Open FDs ($instance)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "logBase": 2, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "hiddenSeries": false, + "id": 39, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -4142,18 +4187,17 @@ "steppedLine": false, "targets": [ { - "expr": "sum(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by(instance)", + "expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)", "format": "time_series", + "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, - "title": "Threads ($instance)", + "title": "Goroutines ($instance)", "tooltip": { "shared": true, "sort": 0, @@ -4161,9 +4205,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4171,24 +4213,18 @@ { "decimals": 0, "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4196,7 +4232,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "fieldConfig": { "defaults": { "links": [] @@ -4209,7 +4247,7 @@ "h": 8, "w": 12, "x": 0, - "y": 29 + "y": 38 }, "hiddenSeries": false, "id": 43, @@ -4233,7 +4271,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "8.0.0", + "pluginVersion": "8.3.2", "pointradius": 2, "points": false, "renderer": "flot", @@ -4251,9 +4289,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GC duration ($instance)", "tooltip": { "shared": true, @@ -4262,33 +4298,118 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 38 + }, + "hiddenSeries": false, + "id": 41, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.3.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by(instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Threads ($instance)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false } } ], @@ -4297,7 +4418,7 @@ } ], "refresh": false, - "schemaVersion": 31, + "schemaVersion": 33, "style": "dark", "tags": [ "vmagent", @@ -4307,15 +4428,12 @@ "list": [ { "current": { - "selected": false, - "text": "VictoriaMetrics", - "value": "VictoriaMetrics" + "selected": true, + "text": "dbaas-test-t3-medium-inst", + "value": "dbaas-test-t3-medium-inst" }, - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": null, "multi": false, "name": "ds", "options": [], @@ -4329,13 +4447,12 @@ { "allValue": "", "current": {}, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "definition": "label_values(vm_app_version{version=~\"^vmagent.*\"}, job)", - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": null, "multi": true, "name": "job", "options": [], @@ -4355,13 +4472,12 @@ { "allValue": ".*", "current": {}, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "definition": "label_values(vm_app_version{job=~\"$job\"}, instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": null, "multi": true, "name": "instance", "options": [], @@ -4381,13 +4497,13 @@ { "allValue": ".*", "current": {}, - "datasource": "$ds", + "datasource": { + "uid": "$ds" + }, "definition": "label_values(vmagent_remotewrite_requests_total{job=~\"$job\", instance=~\"$instance\"}, url)", "description": "The remote write URLs", - "error": null, "hide": 0, "includeAll": true, - "label": null, "multi": true, "name": "url", "options": [], @@ -4423,5 +4539,6 @@ "timezone": "", "title": "vmagent", "uid": "G7Z9GzMGz", - "version": 3 -} + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/deployment/docker/alerts.yml b/deployment/docker/alerts.yml index 5c5a7f9d4..e3fc1866f 100644 --- a/deployment/docker/alerts.yml +++ b/deployment/docker/alerts.yml @@ -216,6 +216,16 @@ groups: description: "Vmagent dropped {{ $value | humanize1024 }} from persistent queue on instance {{ $labels.instance }} for the last 10m." + - alert: RejectedRemoteWriteDataBlocksAreDropped + expr: sum(increase(vmagent_remotewrite_packets_dropped_total[5m])) by (job, instance) > 0 + for: 15m + labels: + severity: warning + annotations: + dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=79&var-instance={{ $labels.instance }}" + summary: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} drops the rejected by + remote-write server data blocks. Check the logs to find the reason for rejects." + - alert: TooManyScrapeErrors expr: sum(increase(vm_promscrape_scrapes_failed_total[5m])) by (job, instance) > 0 for: 15m @@ -261,6 +271,30 @@ groups: This usually means that `-remoteWrite.queues` command-line flag must be increased in order to increase the number of connections per each remote storage." + - alert: PersistentQueueForWritesIsSaturated + expr: rate(vm_persistentqueue_write_duration_seconds_total[5m]) > 0.9 + for: 15m + labels: + severity: warning + annotations: + dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=98&var-instance={{ $labels.instance }}" + summary: "Persistent queue writes for instance {{ $labels.instance }} are saturated" + description: "Persistent queue writes for vmagent \"{{ $labels.job }}\" (instance {{ $labels.instance }}) + are saturated by more than 90% and vmagent won't be able to keep up with flushing data on disk. + In this case, consider to decrease load on the vmagent or improve the disk throughput." + + - alert: PersistentQueueForReadsIsSaturated + expr: rate(vm_persistentqueue_read_duration_seconds_total[5m]) > 0.9 + for: 15m + labels: + severity: warning + annotations: + dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=99&var-instance={{ $labels.instance }}" + summary: "Persistent queue reads for instance {{ $labels.instance }} are saturated" + description: "Persistent queue reads for vmagent \"{{ $labels.job }}\" (instance {{ $labels.instance }}) + are saturated by more than 90% and vmagent won't be able to keep up with reading data from the disk. + In this case, consider to decrease load on the vmagent or improve the disk throughput." + - alert: SeriesLimitHourReached expr: (vmagent_hourly_series_limit_current_series / vmagent_hourly_series_limit_max_series) > 0.9 labels: