VictoriaMetrics/dashboards/vmalert.json
Roman Khavronenko eff940aa76
Vmalert metrics update (#1580)
* vmalert: remove `vmalert_execution_duration_seconds` metric

The summary for `vmalert_execution_duration_seconds` metric gives no additional
value comparing to `vmalert_iteration_duration_seconds` metric.

* vmalert: update config reload success metric properly

Previously, if there was unsuccessfull attempt to reload config and then
rollback to previous version - the metric remained set to 0.

* vmalert: add Grafana dashboard to overview application metrics

* docker: include vmalert target into list for scraping

* vmalert: extend notifier metrics with addr label

The change adds an `addr` label to metrics for alerts_sent and alerts_send_errors
to identify which exact address is having issues.
The according change was made to vmalert dashboard.

* vmalert: update documentation and docker environment for vmalert's dashboard

Mention Grafana's dashboard in vmalert's README in a new section #Monitoring.

Update docker-compose env to automatically add vmalert's dashboard.
Update docker-compose README with additional info about services.
2021-08-31 12:28:02 +03:00

2300 lines
No EOL
59 KiB
JSON

{
"__inputs": [],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.0.3"
},
{
"type": "panel",
"id": "graph",
"name": "Graph (old)",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "table-old",
"name": "Table (old)",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Overview for VictoriaMetrics vmalert v1.65.0 or higher",
"editable": true,
"gnetId": null,
"graphTooltip": 1,
"id": null,
"iteration": 1630393200659,
"links": [
{
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": false,
"tags": [],
"targetBlank": true,
"title": "vmalert docs",
"tooltip": "",
"type": "link",
"url": "https://docs.victoriametrics.com/vmalert.html"
},
{
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": false,
"tags": [],
"targetBlank": true,
"title": "Found a bug?",
"tooltip": "",
"type": "link",
"url": " https://github.com/VictoriaMetrics/VictoriaMetrics/issues"
},
{
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": false,
"tags": [],
"targetBlank": true,
"title": "New releases",
"tooltip": "",
"type": "link",
"url": " https://github.com/VictoriaMetrics/VictoriaMetrics/releases"
}
],
"panels": [
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 11,
"panels": [],
"title": "General ($instance)",
"type": "row"
},
{
"datasource": "$ds",
"description": "Shows if the last configuration update was successful. \"Not Ok\" means there was an unsuccessful attempt to update the configuration due to some error. Check the log for details.",
"fieldConfig": {
"defaults": {
"mappings": [
{
"options": {
"0": {
"color": "green",
"index": 0,
"text": "Ok"
}
},
"type": "value"
},
{
"options": {
"from": 1,
"result": {
"color": "red",
"index": 1,
"text": "Not Ok"
},
"to": 999999
},
"type": "range"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 3,
"x": 0,
"y": 1
},
"id": 6,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"exemplar": false,
"expr": "count(vmalert_config_last_reload_successful{job=~\"$job\", instance=~\"$instance\"} < 1 ) or 0",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Config error",
"type": "stat"
},
{
"datasource": "$ds",
"description": "Shows the total number of errors generated by recording/alerting rules for selected instances and groups.",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 5,
"x": 3,
"y": 1
},
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"exemplar": false,
"expr": "(sum(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) or vector(0)) + \n(sum(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) or vector(0))",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Errors",
"type": "stat"
},
{
"datasource": "$ds",
"description": "Shows the total number of loaded alerting rules across selected instances and groups.",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 5,
"x": 8,
"y": 1
},
"id": 9,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"exemplar": false,
"expr": "count(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"})",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Alerting rules",
"type": "stat"
},
{
"datasource": "$ds",
"description": "Shows the total number of loaded recording rules across selected instances and groups.",
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 5,
"x": 13,
"y": 1
},
"id": 7,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.3",
"targets": [
{
"exemplar": false,
"expr": "count(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"})",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Recording rules",
"type": "stat"
},
{
"columns": [],
"datasource": "$ds",
"fontSize": "100%",
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 1
},
"id": 2,
"pageSize": null,
"scroll": true,
"showHeader": true,
"sort": {
"col": 3,
"desc": false
},
"styles": [
{
"alias": "uptime",
"align": "auto",
"colorMode": "cell",
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [
"1800",
"3600"
],
"type": "number",
"unit": "s"
},
{
"alias": "",
"align": "auto",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "instance",
"thresholds": [],
"type": "string",
"unit": "short"
},
{
"alias": "",
"align": "auto",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "/.*/",
"thresholds": [],
"type": "hidden",
"unit": "short"
}
],
"targets": [
{
"exemplar": false,
"expr": "sort((time() - vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}) or (up{job=~\"$job\", instance=~\"$instance\"}))",
"format": "table",
"hide": false,
"instant": true,
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Uptime",
"transform": "table",
"type": "table-old"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 4,
"w": 18,
"x": 0,
"y": 4
},
"hiddenSeries": false,
"id": 4,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"sort": "current",
"sortDesc": false,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": true,
"targets": [
{
"exemplar": false,
"expr": "sort(sum(up{job=~\"$job\", instance=~\"$instance\"}) by (job, instance))",
"format": "time_series",
"instant": false,
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Uptime",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:170",
"decimals": 0,
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:171",
"decimals": null,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": 2
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows the number of fired alerts by instance.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"hiddenSeries": false,
"id": 15,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(increase(vmalert_alerts_fired_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Alerts fired total",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Average evaluation duration by group. Basically means how long it takes to execute all the rules per each group.",
"fieldConfig": {
"defaults": {
"unit": "s"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"hiddenSeries": false,
"id": 23,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": false
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(rate(vmalert_iteration_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(group) / \nsum(rate(vmalert_iteration_duration_seconds_count{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(group)",
"interval": "",
"legendFormat": "{{group}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Groups avg evaluation duration ($group)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows how many requests (executions) per second vmalert sends to the configured datasource.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"hiddenSeries": false,
"id": 24,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(rate(vmalert_execution_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Rules execution rate ($instance)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows the error rate while executing configured rules. Non-zero value means there are some issues with existing rules. Check the logs to get more details.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"hiddenSeries": false,
"id": 25,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(increase(vmalert_execution_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Rules execution errors ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": true,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 24
},
"id": 17,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows the current active (firing) alerting rules per group.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 25
},
"hiddenSeries": false,
"id": 14,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(vmalert_alerts_firing{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0",
"interval": "",
"legendFormat": "{{alertname}} ({{group}})",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Active ($group)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows the events when rule execution resulted into an error. Check the logs for more details.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 25
},
"hiddenSeries": false,
"id": 13,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0",
"interval": "",
"legendFormat": "{{alertname}} ({{group}})",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Errors ($group)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows the current pending alerting rules per group.\nBy pending means the rule which remains active less than configured `for` parameter.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 33
},
"hiddenSeries": false,
"id": 20,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(vmalert_alerts_pending{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0",
"interval": "",
"legendFormat": "{{alertname}} ({{group}})",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Pending ($group)",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows how many alerts are sent to Alertmanager per second. Only active alerts are sent.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 33
},
"hiddenSeries": false,
"id": 26,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(rate(vmalert_alerts_sent_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance, addr)",
"interval": "",
"legendFormat": "{{instance}} => {{addr}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Requests rate to Alertmanager ($group)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:229",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:230",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows the error rate for the attempts to send alerts to Alertmanager. If not zero it means there issues on attempt to send notification to Alertmanager and some alerts may be not delivered properly. Check the logs for more details.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 41
},
"hiddenSeries": false,
"id": 32,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(rate(vmalert_alerts_send_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(instance, addr)",
"interval": "",
"legendFormat": "{{instance}} => {{addr}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Errors rate to Alertmanager ($group)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:229",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:230",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"title": "Alerting rules ($instance)",
"type": "row"
},
{
"collapsed": true,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 25
},
"id": 28,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows the top 10 recording rules which generate the most of samples. Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 26
},
"hiddenSeries": false,
"id": 31,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "topk(10, sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) > 0)",
"interval": "",
"legendFormat": "{{recording}} ({{group}})",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Top 10 rules by produced samples ($group)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "$ds",
"description": "Shows the rules which do not produce any samples during the evaluation. Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 26
},
"id": 33,
"options": {
"legend": {
"calcs": [
"max",
"lastNotNull",
"mean"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.0.3",
"targets": [
{
"exemplar": false,
"expr": "sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) < 1",
"interval": "",
"legendFormat": "{{recording}} ({{group}})",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Rules with 0 produced samples ($group)",
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 34
},
"hiddenSeries": false,
"id": 30,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) > 0",
"interval": "",
"legendFormat": "{{recording}} ({{group}})",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Errors ($group)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"title": "Recording rules ($instance)",
"type": "row"
},
{
"collapsed": true,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 26
},
"id": 43,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Shows the CPU usage per vmalert instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 4
},
"hiddenSeries": false,
"id": 35,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [
{
"targetBlank": true,
"title": "Profiling",
"url": "https://docs.victoriametrics.com/vmagent.html#profiling"
}
],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "CPU ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Amount of used memory\n\nResident memory shows share which can be freed by OS when needed.\n\nAnonymous shows share for memory allocated by the process itself. This share cannot be freed by the OS, so it must be taken into account by OOM killer.\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 4
},
"hiddenSeries": false,
"id": 37,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [
{
"targetBlank": true,
"title": "Profiling",
"url": "https://docs.victoriametrics.com/vmagent.html#profiling"
}
],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)",
"interval": "",
"legendFormat": "resident {{instance}}",
"refId": "A"
},
{
"exemplar": false,
"expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)",
"hide": false,
"interval": "",
"legendFormat": "anonymous {{instance}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Memory usage ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"description": "Panel shows the number of open file descriptors in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 12
},
"hiddenSeries": false,
"id": 39,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:1161",
"alias": "max",
"color": "#C4162A"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": false,
"expr": "sum(process_open_fds{job=~\"$job\", instance=~\"$instance\"}) by (instance)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "open {{instance}}",
"refId": "A"
},
{
"expr": "min(process_max_fds{job=~\"$job\", instance=~\"$instance\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "max",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Open FDs ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"label": null,
"logBase": 2,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$ds",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 12
},
"hiddenSeries": false,
"id": 41,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Goroutines ($instance)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": 0,
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"title": "Resource usage",
"type": "row"
}
],
"refresh": false,
"schemaVersion": 30,
"style": "dark",
"tags": [
"victoriametrics",
"vmalert"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Prometheus",
"value": "Prometheus"
},
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "ds",
"options": [],
"query": "prometheus",
"queryValue": "",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"type": "datasource"
},
{
"allValue": null,
"current": {},
"datasource": "$ds",
"definition": "label_values(vm_app_version{version=~\"^vmalert.*\"}, job)",
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "job",
"options": [],
"query": {
"query": "label_values(vm_app_version{version=~\"^vmalert.*\"}, job)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": ".*",
"current": {},
"datasource": "$ds",
"definition": "label_values(vm_app_version{job=~\"$job\"}, instance)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "instance",
"options": [],
"query": {
"query": "label_values(vm_app_version{job=~\"$job\"}, instance)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
},
{
"allValue": ".*",
"current": {},
"datasource": "$ds",
"definition": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "group",
"options": [],
"query": {
"query": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "vmalert",
"uid": "LzldHAVnz",
"version": 1
}