VictoriaMetrics/dashboards/vmagent.json
Dmytro Kozlov 0672061d35
vmagent: expose metric vmagent_remotewrite_queues (#2871) (#3087)
* vmagent: expose metric `vmagent_remotewrite_queues` (#2871)

The new metric `vmagent_remotewrite_queues` exports a static value of
number of configured remote write queus. This metric is useful to
calculate total saturation per each configured URL with given number
of queues. See corresponding changes to vmagent alerts and dashboard.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* Update dashboards/vmagent.json

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2022-09-08 21:04:12 +03:00

4694 lines
No EOL
122 KiB
JSON

{
"__inputs": [],
"__elements": [],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.4.4"
},
{
"type": "panel",
"id": "graph",
"name": "Graph (old)",
"version": ""
},
{
"type": "panel",
"id": "heatmap",
"name": "Heatmap",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "table-old",
"name": "Table (old)",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Overview for VictoriaMetrics vmagent v1.79.0 or higher",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"iteration": 1657810604530,
"links": [
{
"icon": "doc",
"tags": [],
"targetBlank": true,
"title": "vmagent wiki",
"tooltip": "",
"type": "link",
"url": "https://docs.victoriametrics.com/vmagent.html"
},
{
"icon": "external link",
"tags": [],
"targetBlank": true,
"title": "Found a bug?",
"type": "link",
"url": "https://github.com/VictoriaMetrics/VictoriaMetrics/issues"
},
{
"icon": "external link",
"tags": [],
"targetBlank": true,
"title": "New releases",
"type": "link",
"url": "https://github.com/VictoriaMetrics/VictoriaMetrics/releases"
}
],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": {
"uid": "$ds"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 24,
"panels": [],
"title": "Overview",
"type": "row"
},
{
"datasource": {
"uid": "$ds"
},
"description": "Shows total number of all configured scrape targets in state \"up\".\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 0,
"y": 1
},
"id": 9,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.4.4",
"targets": [
{
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})",
"interval": "",
"legendFormat": "up",
"refId": "A"
}
],
"title": "Scrape targets up",
"type": "stat"
},
{
"datasource": {
"uid": "$ds"
},
"description": "Shows total number of all configured scrape targets in state \"down\".\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 4,
"y": 1
},
"id": 72,
"links": [
{
"title": "Troubleshooting",
"url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting"
}
],
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.4.4",
"targets": [
{
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})",
"interval": "",
"legendFormat": "up",
"refId": "A"
}
],
"title": "Scrape targets down",
"type": "stat"
},
{
"datasource": {
"uid": "$ds"
},
"description": "Shows number of generated error messages in logs over last 30m. Non-zero value may be a sign of connectivity or missconfiguration errors.",
"fieldConfig": {
"defaults": {
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 8,
"y": 1
},
"id": 16,
"links": [
{
"targetBlank": true,
"title": "Troubleshooting",
"url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting"
}
],
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.4.4",
"targets": [
{
"expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Log errors (30m)",
"type": "stat"
},
{
"datasource": {
"uid": "$ds"
},
"description": "Persistent queue size shows size of pending samples in bytes which hasn't been flushed to remote storage yet. \nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 10485760
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 5,
"x": 12,
"y": 1
},
"id": 56,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.4.4",
"targets": [
{
"expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Persistent queue size",
"type": "stat"
},
{
"columns": [],
"datasource": {
"uid": "$ds"
},
"fontSize": "100%",
"gridPos": {
"h": 7,
"w": 7,
"x": 17,
"y": 1
},
"id": 11,
"scroll": true,
"showHeader": true,
"sort": {
"col": 4,
"desc": false
},
"styles": [
{
"alias": "uptime",
"align": "auto",
"colorMode": "cell",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [
"1800",
"3600"
],
"type": "number",
"unit": "s"
},
{
"alias": "",
"align": "auto",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "instance",
"thresholds": [],
"type": "string",
"unit": "short"
},
{
"alias": "",
"align": "auto",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "/.*/",
"thresholds": [],
"type": "hidden",
"unit": "short"
}
],
"targets": [
{
"expr": "sort((time() - vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}) or (up{job=~\"$job\", instance=~\"$instance\"}))",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Uptime",
"transform": "table",
"type": "table-old"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 4,
"w": 17,
"x": 0,
"y": 4
},
"hiddenSeries": false,
"id": 13,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": false,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": true,
"targets": [
{
"expr": "sort(sum(up{job=~\"$job\", instance=~\"$instance\"}) by (job, instance))",
"format": "time_series",
"instant": false,
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Uptime",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:215",
"decimals": 0,
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"$$hashKey": "object:216",
"format": "short",
"label": "",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": 2
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows in/out samples rate including push and pull models. \n\nThe out-rate could be different to in-rate because of replication or additional timeseries added by vmagent for every scraped target.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 6,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"hiddenSeries": false,
"id": 5,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:457",
"alias": "out",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_promscrape_scraped_samples_sum{job=~\"$job\", instance=~\"$instance\"}[$__interval]))\n+ sum(rate(vmagent_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__interval]))",
"hide": false,
"interval": "",
"legendFormat": "in",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vmagent_remotewrite_block_size_rows_sum{job=~\"$job\", instance=~\"$instance\"}[$__interval]))",
"interval": "",
"legendFormat": "out",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Samples rate ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"show": true
},
{
"format": "bytes",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of requests served by vmagent HTTP server.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"hiddenSeries": false,
"id": 15,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vmagent_http_requests_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(path, protocol) > 0",
"interval": "",
"legendFormat": "{{ path }} ({{ protocol }})",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Requests rate ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "none",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Errors rate shows rate for multiple metrics that track possible errors in vmagent, such as network or parsing errors.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"hiddenSeries": false,
"id": 69,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [
{
"targetBlank": true,
"title": "Troubleshooting",
"url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting"
}
],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vmagent_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(protocol) > 0",
"interval": "",
"legendFormat": "{{protocol}} (request)",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_protoparser_read_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type) > 0",
"interval": "",
"legendFormat": "{{type}} (parse)",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_ingestserver_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type) > 0",
"interval": "",
"legendFormat": "{{type}} (ingest)",
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_protoparser_unmarshal_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type) > 0",
"interval": "",
"legendFormat": "{{type}} (unmarshal)",
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_promscrape_dial_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) > 0",
"interval": "",
"legendFormat": "scrape dial",
"refId": "E"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Errors rate ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "none",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows the persistent queue size of pending samples in bytes which hasn't been flushed to remote storage yet. \n\nIncreasing of value might be a sign of connectivity issues. In such cases, vmagent starts to flush pending data on disk with attempt to send it later once connection is restored.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"hiddenSeries": false,
"id": 17,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [
{
"title": "Troubleshooting",
"url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting"
}
],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(vmagent_remotewrite_pending_data_bytes{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}) by (url)",
"interval": "",
"legendFormat": "{{url}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Persistent queue size ($instance) to ($url)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows the rate of dropped data blocks in cases when remote storage replies with `400 Bad Request` and `409 Conflict` HTTP responses.\n\nSee https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1149",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"hiddenSeries": false,
"id": 79,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(vmagent_remotewrite_packets_dropped_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__interval])) by(url)",
"interval": "",
"legendFormat": "{{url}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Data blocks dropped ($instance) to ($url)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "bytes",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows rate of dropped samples from persistent queue. VMagent drops samples from queue if in-memory and on-disk queues are full and it is unable to flush them to remote storage.\nThe max size of on-disk queue is configured by `-remoteWrite.maxDiskUsagePerURL` flag.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"hiddenSeries": false,
"id": 49,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [
{
"targetBlank": true,
"title": "Troubleshooting",
"url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting"
}
],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(increase(vm_persistentqueue_bytes_dropped_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by (path)",
"interval": "",
"legendFormat": "{{ path }}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Persistent queue dropped rate ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of dropped samples due to relabeling. \nMetric tracks drops for `-remoteWrite.relabelConfig` configuration only.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 32
},
"hiddenSeries": false,
"id": 18,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [
{
"targetBlank": true,
"title": "Relabeling",
"url": "https://docs.victoriametrics.com/vmagent.html#relabeling"
}
],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vmagent_remotewrite_global_relabel_metrics_dropped_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "global",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vmagent_remotewrite_relabel_metrics_dropped_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(url)",
"interval": "",
"legendFormat": "{{url}}",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Rows dropped by relabeling ($instance) to ($url)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "bytes",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows the rate of logging the messages by their level. Unexpected spike in rate is a good reason to check logs.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 32
},
"hiddenSeries": false,
"id": 86,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (level) ",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{level}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Logging rate ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"collapsed": true,
"datasource": {
"uid": "$ds"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 40
},
"id": 28,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 2
},
"hiddenSeries": false,
"id": 48,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"}) by(type) > 0",
"format": "time_series",
"interval": "",
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Scrape targets UP",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 2
},
"hiddenSeries": false,
"id": 76,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"}) by(type) > 0",
"format": "time_series",
"interval": "",
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Scrape targets DOWN",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 9
},
"hiddenSeries": false,
"id": 20,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:462",
"alias": "samples",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vm_promscrape_scrapes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "scrapes",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_promscrape_scraped_samples_sum{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "samples",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Scrape rate ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 9
},
"hiddenSeries": false,
"id": 31,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_promscrape_scrapes_failed_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) ",
"interval": "",
"legendFormat": "scrapes failed",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_promscrape_scrapes_timed_out_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) ",
"interval": "",
"legendFormat": "timeouts",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_promscrape_scrapes_gunzip_failed_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) ",
"interval": "",
"legendFormat": "gunzip fails",
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "sum(rate(vm_promscrape_dial_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) ",
"interval": "",
"legendFormat": "dial fails",
"refId": "D"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Scrape fails ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "bytes",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 17
},
"hiddenSeries": false,
"id": 46,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(vm_promscrape_scrape_response_size_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(vmrange)) ",
"format": "time_series",
"interval": "",
"legendFormat": "p0.95",
"refId": "A"
},
{
"expr": "histogram_quantile(0.5, sum(rate(vm_promscrape_scrape_response_size_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(vmrange)) ",
"interval": "",
"legendFormat": "p0.5",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Scrape response size ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"cards": {},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateOranges",
"exponent": 0.5,
"mode": "spectrum"
},
"dataFormat": "tsbuckets",
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "This panel uses MetricsQL and works only when VM is used as a datasource",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 17
},
"heatmap": {},
"hideZeroBuckets": false,
"highlightCards": true,
"id": 33,
"legend": {
"show": false
},
"reverseYBuckets": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"expr": "buckets_limit(10, prometheus_buckets(sum(rate(vm_promscrape_scrape_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"})) by(vmrange)))",
"format": "heatmap",
"interval": "",
"intervalFactor": 10,
"legendFormat": "{{le}}",
"refId": "A"
}
],
"title": "Scrape duration ($instance)",
"tooltip": {
"show": true,
"showHistogram": false
},
"type": "heatmap",
"xAxis": {
"show": true
},
"yAxis": {
"decimals": 2,
"format": "s",
"logBase": 1,
"show": true
},
"yBucketBound": "auto"
}
],
"title": "Scraping",
"type": "row"
},
{
"collapsed": true,
"datasource": {
"uid": "$ds"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 41
},
"id": 71,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of write requests served by ingestserver (UDP, TCP connections) and HTTP server.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 3
},
"hiddenSeries": false,
"id": 73,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vm_ingestserver_requests_total{job=~\"$job\", instance=~\"$instance\", path!~\"/favicon.ico\"}[$__rate_interval])) by(type, net) > 0",
"interval": "",
"legendFormat": "{{ type }} ({{net}})",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vmagent_http_requests_total{job=~\"$job\", instance=~\"$instance\", protocol!=\"\"}[$__interval])) by(protocol) > 0",
"interval": "",
"legendFormat": "{{ protocol }} (http)",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Requests rate ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "none",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of write errors in ingestserver (UDP, TCP connections) and HTTP server.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 3
},
"hiddenSeries": false,
"id": 77,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vm_ingestserver_request_errors_total{job=~\"$job\", instance=~\"$instance\", path!~\"/favicon.ico\"}[$__rate_interval])) by(type, net) > 0",
"interval": "",
"legendFormat": "{{ type }} ({{net}})",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vmagent_http_request_errors_total{job=~\"$job\", instance=~\"$instance\", protocol!=\"\"}[$__interval])) by(protocol) > 0",
"interval": "",
"legendFormat": "{{ protocol }} (http)",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Error rate ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "none",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of parsed rows from write or scrape requests.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 11
},
"hiddenSeries": false,
"id": 78,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vm_protoparser_rows_read_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type) > 0",
"interval": "",
"legendFormat": "{{ type }}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Rows rate ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "none",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Tracks the rate of dropped invalid rows because of errors while unmarshaling write requests. The exact errors messages will be printed in logs.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 11
},
"hiddenSeries": false,
"id": 50,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vm_rows_invalid_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type) > 0",
"interval": "",
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Invalid rows rate ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "bytes",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
}
],
"title": "Ingestion",
"type": "row"
},
{
"collapsed": true,
"datasource": {
"uid": "$ds"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 42
},
"id": 58,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the rate of requests to configured remote write endpoints by url and status code.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.\n\n",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 43
},
"hiddenSeries": false,
"id": 60,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vmagent_remotewrite_requests_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by(url, status_code) > 0",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Requests rate ($instance) to ($url)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 2,
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the global rate for number of written bytes via remote write connections.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 43
},
"hiddenSeries": false,
"id": 66,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vmagent_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance) > 0",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Bytes write rate ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows requests retry rate by url. Number of retries is unlimited but protected with delays up to 1m between attempts.\n\nRemote write URLs are hidden by default but might be unveiled once `-remoteWrite.showURL` is set to true.\n\n",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 51
},
"hiddenSeries": false,
"id": 61,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vmagent_remotewrite_retries_count_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by(url)",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Retry rate ($instance) to ($url)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows current number of established connections to remote write endpoints.\n\n",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 51
},
"hiddenSeries": false,
"id": 65,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(vmagent_remotewrite_conns{job=~\"$job\", instance=~\"$instance\"}) by (instance)",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Connections ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"cards": {},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateOranges",
"exponent": 0.5,
"mode": "spectrum"
},
"dataFormat": "tsbuckets",
"datasource": {
"uid": "$ds"
},
"description": "Shows the remote write request duration distribution in seconds. Value depends on block size, network quality and remote storage performance.",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 59
},
"heatmap": {},
"hideZeroBuckets": false,
"highlightCards": true,
"id": 30,
"legend": {
"show": false
},
"reverseYBuckets": false,
"targets": [
{
"exemplar": true,
"expr": "buckets_limit(12, prometheus_buckets(sum(rate(vmagent_remotewrite_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__interval])) by(vmrange)))",
"format": "heatmap",
"interval": "",
"intervalFactor": 10,
"legendFormat": "{{le}}",
"refId": "A"
}
],
"title": "Push duration ($instance) to ($url)",
"tooltip": {
"show": true,
"showHistogram": false
},
"type": "heatmap",
"xAxis": {
"show": true
},
"yAxis": {
"decimals": 2,
"format": "s",
"logBase": 1,
"show": true
},
"yBucketBound": "auto"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows saturation of every connection to remote storage. If the threshold of 90% is reached, then the connection is saturated (busy or slow) by more than 90%, so vmagent won't be able to keep up and can start buffering data. \n\nThis usually means that `-remoteWrite.queues` command-line flag must be increased in order to increase the number of connections per each remote storage.\n",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 59
},
"hiddenSeries": false,
"id": 84,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(rate(vmagent_remotewrite_send_duration_seconds_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by (instance, url)\n/\nmax(vmagent_remotewrite_queues{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}) by(instance, url)",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [
{
"$$hashKey": "object:683",
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 90,
"yaxis": "left"
}
],
"timeRegions": [],
"title": "Remote write connection saturation ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:662",
"format": "percentunit",
"logBase": 1,
"min": "0",
"show": true
},
{
"$$hashKey": "object:663",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"cards": {},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateOranges",
"exponent": 0.5,
"mode": "spectrum"
},
"dataFormat": "tsbuckets",
"datasource": {
"uid": "$ds"
},
"description": "Shows the remote write request block size distribution in rows.",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 67
},
"heatmap": {},
"hideZeroBuckets": false,
"highlightCards": true,
"id": 63,
"legend": {
"show": false
},
"reverseYBuckets": false,
"targets": [
{
"exemplar": true,
"expr": "buckets_limit(12, prometheus_buckets(sum(rate(vmagent_remotewrite_block_size_rows_bucket{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(vmrange)))",
"format": "heatmap",
"interval": "",
"intervalFactor": 10,
"legendFormat": "{{le}}",
"refId": "A"
}
],
"title": "Block size rows ($instance)",
"tooltip": {
"show": true,
"showHistogram": false
},
"type": "heatmap",
"xAxis": {
"show": true
},
"yAxis": {
"decimals": 2,
"format": "short",
"logBase": 1,
"show": true
},
"yBucketBound": "auto"
},
{
"cards": {},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateOranges",
"exponent": 0.5,
"mode": "spectrum"
},
"dataFormat": "tsbuckets",
"datasource": {
"uid": "$ds"
},
"description": "Shows the remote write request block size distribution in bytes.",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 67
},
"heatmap": {},
"hideZeroBuckets": false,
"highlightCards": true,
"id": 62,
"legend": {
"show": false
},
"reverseYBuckets": false,
"targets": [
{
"expr": "buckets_limit(12, prometheus_buckets(sum(rate(vmagent_remotewrite_block_size_bytes_bucket{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(vmrange)))",
"format": "heatmap",
"interval": "",
"intervalFactor": 10,
"legendFormat": "{{le}}",
"refId": "A"
}
],
"title": "Block size bytes ($instance)",
"tooltip": {
"show": true,
"showHistogram": false
},
"type": "heatmap",
"xAxis": {
"show": true
},
"yAxis": {
"format": "bytes",
"logBase": 1,
"show": true
},
"yBucketBound": "auto"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows the current limit usage of unique series over an hourly period. Vmagent will start to drop series once the limit is reached.\n\nPlease note, panel will be blank if `remoteWrite.maxHourlySeries` is not set.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 75
},
"hiddenSeries": false,
"id": 88,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "(vmagent_hourly_series_limit_current_series{job=~\"$job\", instance=~\"$instance\"} / vmagent_hourly_series_limit_max_series{job=~\"$job\", instance=~\"$instance\"}) * 100",
"interval": "",
"legendFormat": "current limit usage",
"refId": "A"
},
{
"exemplar": true,
"expr": "vmagent_daily_series_limit_max_series{job=~\"$job\", instance=~\"$instance\"}",
"hide": true,
"interval": "",
"legendFormat": "limit",
"refId": "B"
}
],
"thresholds": [
{
"$$hashKey": "object:234",
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 90,
"yaxis": "left"
}
],
"timeRegions": [],
"title": "Hourly series limit",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:216",
"format": "percent",
"logBase": 1,
"max": "100",
"show": true
},
{
"$$hashKey": "object:217",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows the current limit usage of unique series over a daily period. Vmagent will start to drop series once the limit is reached.\n\nPlease note, panel will be blank if `remoteWrite.maxDailySeries` is not set.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 75
},
"hiddenSeries": false,
"id": 90,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "(vmagent_daily_series_limit_current_series{job=~\"$job\", instance=~\"$instance\"} / vmagent_daily_series_limit_max_series{job=~\"$job\", instance=~\"$instance\"}) * 100",
"interval": "",
"legendFormat": "current limit usage",
"refId": "A"
},
{
"exemplar": true,
"expr": "vmagent_daily_series_limit_max_series{job=~\"$job\", instance=~\"$instance\"}",
"hide": true,
"interval": "",
"legendFormat": "limit",
"refId": "B"
}
],
"thresholds": [
{
"$$hashKey": "object:234",
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 90,
"yaxis": "left"
}
],
"timeRegions": [],
"title": "Daily series limit",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:216",
"format": "percent",
"logBase": 1,
"max": "100",
"show": true
},
{
"$$hashKey": "object:217",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
}
],
"title": "Remote write",
"type": "row"
},
{
"collapsed": true,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 43
},
"id": 94,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows top 5 job by the number of new series registered by vmagent over the 5min range. These jobs generate the most of the churn rate.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 5
},
"hiddenSeries": false,
"id": 92,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"rightSide": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"uid": "$ds"
},
"exemplar": false,
"expr": "topk(5, sum(sum_over_time(scrape_series_added[5m])) by (job)) > 0",
"interval": "",
"legendFormat": "{{ job }}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Top 5 jobs by unique samples",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows top 5 instances by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 5
},
"hiddenSeries": false,
"id": 95,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"rightSide": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"uid": "$ds"
},
"exemplar": false,
"expr": "topk(5, sum(sum_over_time(scrape_series_added[5m])) by (instance)) > 0",
"interval": "",
"legendFormat": "{{ instance }}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Top 5 instances by unique samples",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows saturation persistent queue for writes. If the threshold of 0.9sec is reached, then persistent is saturated by more than 90% and vmagent won't be able to keep up with flushing data on disk. In this case, consider to decrease load on the vmagent or improve the disk throughput.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 13
},
"hiddenSeries": false,
"id": 98,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vm_persistentqueue_write_duration_seconds_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by (instance)",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [
{
"$$hashKey": "object:683",
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 0.9,
"yaxis": "left"
}
],
"timeRegions": [],
"title": "Persistent queue write saturation ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:662",
"format": "s",
"logBase": 1,
"min": "0",
"show": true
},
{
"$$hashKey": "object:663",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows saturation persistent queue for reads. If the threshold of 0.9sec is reached, then persistent is saturated by more than 90% and vmagent won't be able to keep up with reading data from the disk. In this case, consider to decrease load on the vmagent or improve the disk throughput.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 13
},
"hiddenSeries": false,
"id": 99,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": true,
"expr": "sum(rate(vm_persistentqueue_read_duration_seconds_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by (instance)",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"thresholds": [
{
"$$hashKey": "object:683",
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 0.9,
"yaxis": "left"
}
],
"timeRegions": [],
"title": "Persistent queue read saturation ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:662",
"format": "s",
"logBase": 1,
"min": "0",
"show": true
},
{
"$$hashKey": "object:663",
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
}
],
"title": "Troubleshooting",
"type": "row"
},
{
"collapsed": true,
"datasource": {
"uid": "$ds"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 44
},
"id": 45,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the CPU usage per vmagent instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 62
},
"hiddenSeries": false,
"id": 35,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [
{
"targetBlank": true,
"title": "Profiling",
"url": "https://docs.victoriametrics.com/vmagent.html#profiling"
}
],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:77",
"alias": "/Limit.*/",
"color": "#F2495C"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
"expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"exemplar": false,
"expr": "process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Limit ({{instance}})",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "CPU ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Amount of used memory\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 62
},
"hiddenSeries": false,
"id": 37,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [
{
"targetBlank": true,
"title": "Profiling",
"url": "https://docs.victoriametrics.com/vmagent.html#profiling"
}
],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)",
"interval": "",
"legendFormat": "resident {{instance}}",
"refId": "A"
},
{
"exemplar": true,
"expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "anonymous {{instance}}",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Memory usage ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Shows the number of bytes read/write from the storage layer when vmagent has to buffer data on disk or read already buffered data.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 70
},
"hiddenSeries": false,
"id": 81,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "read",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "read",
"refId": "A"
},
{
"expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "write",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Disk writes/reads ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Network usage shows the bytes rate for data accepted by vmagent and pushed via remotewrite protocol.\nDiscrepancies are possible because of different protocols used for ingesting, scraping and writing data.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 6,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 70
},
"hiddenSeries": false,
"id": 7,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"rightSide": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "out",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8\n+ sum(rate(vm_promscrape_conn_bytes_read_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8",
"interval": "",
"legendFormat": "in",
"refId": "A"
},
{
"expr": "sum(rate(vmagent_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__interval])) * 8",
"interval": "",
"legendFormat": "out",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Network usage ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "bps",
"logBase": 1,
"show": true
},
{
"format": "bytes",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"description": "Panel shows the number of open file descriptors in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 78
},
"hiddenSeries": false,
"id": 83,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:913",
"alias": "max",
"color": "#C4162A"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(process_open_fds{job=~\"$job\", instance=~\"$instance\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "open",
"refId": "A"
},
{
"expr": "min(process_max_fds{job=~\"$job\", instance=~\"$instance\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "max",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Open FDs ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"logBase": 2,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 78
},
"hiddenSeries": false,
"id": 39,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Goroutines ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 86
},
"hiddenSeries": false,
"id": 43,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "max(go_gc_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"1\"}) by(instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "GC duration ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"uid": "$ds"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 86
},
"hiddenSeries": false,
"id": 41,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(process_num_threads{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Threads ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
}
],
"title": "Resource usage",
"type": "row"
}
],
"refresh": "",
"schemaVersion": 35,
"style": "dark",
"tags": [
"vmagent",
"victoriametrics"
],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "VM",
"value": "VM"
},
"hide": 0,
"includeAll": false,
"multi": false,
"name": "ds",
"options": [],
"query": "prometheus",
"queryValue": "",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": "",
"current": {},
"datasource": {
"uid": "$ds"
},
"definition": "label_values(vm_app_version{version=~\"^vmagent.*\"}, job)",
"hide": 0,
"includeAll": false,
"multi": true,
"name": "job",
"options": [],
"query": {
"query": "label_values(vm_app_version{version=~\"^vmagent.*\"}, job)",
"refId": "VictoriaMetrics-job-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".*",
"current": {},
"datasource": {
"uid": "$ds"
},
"definition": "label_values(vm_app_version{job=~\"$job\"}, instance)",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "instance",
"options": [],
"query": {
"query": "label_values(vm_app_version{job=~\"$job\"}, instance)",
"refId": "VictoriaMetrics-instance-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".*",
"current": {},
"datasource": {
"uid": "$ds"
},
"definition": "label_values(vmagent_remotewrite_requests_total{job=~\"$job\", instance=~\"$instance\"}, url)",
"description": "The remote write URLs",
"hide": 0,
"includeAll": true,
"multi": true,
"name": "url",
"options": [],
"query": {
"query": "label_values(vmagent_remotewrite_requests_total{job=~\"$job\", instance=~\"$instance\"}, url)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "vmagent",
"uid": "G7Z9GzMGz",
"version": 1,
"weekStart": ""
}