mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
9ad578214e
Co-authored-by: Timour I. Bakeev <tbakeev@ripe.net>
2334 lines
59 KiB
JSON
2334 lines
59 KiB
JSON
{
|
|
"__inputs": [],
|
|
"__requires": [
|
|
{
|
|
"type": "grafana",
|
|
"id": "grafana",
|
|
"name": "Grafana",
|
|
"version": "9.0.3"
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "graph",
|
|
"name": "Graph (old)",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "datasource",
|
|
"id": "prometheus",
|
|
"name": "Prometheus",
|
|
"version": "1.0.0"
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "stat",
|
|
"name": "Stat",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "table",
|
|
"name": "Table",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "timeseries",
|
|
"name": "Time series",
|
|
"version": ""
|
|
}
|
|
],
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"target": {
|
|
"limit": 100,
|
|
"matchAny": false,
|
|
"tags": [],
|
|
"type": "dashboard"
|
|
},
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "Overview for VictoriaMetrics vmalert v1.73.0 or higher",
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 1,
|
|
"id": null,
|
|
"iteration": 1663341746917,
|
|
"links": [
|
|
{
|
|
"asDropdown": false,
|
|
"icon": "external link",
|
|
"includeVars": false,
|
|
"keepTime": false,
|
|
"tags": [],
|
|
"targetBlank": true,
|
|
"title": "vmalert docs",
|
|
"tooltip": "",
|
|
"type": "link",
|
|
"url": "https://docs.victoriametrics.com/vmalert.html"
|
|
},
|
|
{
|
|
"asDropdown": false,
|
|
"icon": "external link",
|
|
"includeVars": false,
|
|
"keepTime": false,
|
|
"tags": [],
|
|
"targetBlank": true,
|
|
"title": "Found a bug?",
|
|
"tooltip": "",
|
|
"type": "link",
|
|
"url": " https://github.com/VictoriaMetrics/VictoriaMetrics/issues"
|
|
},
|
|
{
|
|
"asDropdown": false,
|
|
"icon": "external link",
|
|
"includeVars": false,
|
|
"keepTime": false,
|
|
"tags": [],
|
|
"targetBlank": true,
|
|
"title": "New releases",
|
|
"tooltip": "",
|
|
"type": "link",
|
|
"url": " https://github.com/VictoriaMetrics/VictoriaMetrics/releases"
|
|
}
|
|
],
|
|
"liveNow": false,
|
|
"panels": [
|
|
{
|
|
"collapsed": false,
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 11,
|
|
"panels": [],
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "General ($instance)",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows if the last configuration update was successful. \"Not Ok\" means there was an unsuccessful attempt to update the configuration due to some error. Check the log for details.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"0": {
|
|
"color": "green",
|
|
"index": 0,
|
|
"text": "Ok"
|
|
}
|
|
},
|
|
"type": "value"
|
|
},
|
|
{
|
|
"options": {
|
|
"from": 1,
|
|
"result": {
|
|
"color": "red",
|
|
"index": 1,
|
|
"text": "Not Ok"
|
|
},
|
|
"to": 999999
|
|
},
|
|
"type": "range"
|
|
}
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 1
|
|
},
|
|
"id": 6,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"text": {},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "9.0.3",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "count(vmalert_config_last_reload_successful{job=~\"$job\", instance=~\"$instance\"} < 1 ) or 0",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Config error",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the total number of errors generated by recording/alerting rules for selected instances and groups.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 1
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 4,
|
|
"x": 3,
|
|
"y": 1
|
|
},
|
|
"id": 8,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"text": {},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "9.0.3",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "(sum(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) or vector(0)) + \n(sum(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) or vector(0))",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Errors",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the total number of loaded alerting rules across selected instances and groups.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 4,
|
|
"x": 7,
|
|
"y": 1
|
|
},
|
|
"id": 9,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"text": {},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "9.0.3",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "count(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"})",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Alerting rules",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the total number of loaded recording rules across selected instances and groups.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 4,
|
|
"x": 11,
|
|
"y": 1
|
|
},
|
|
"id": 7,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"text": {},
|
|
"textMode": "auto"
|
|
},
|
|
"pluginVersion": "9.0.3",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "count(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"})",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Recording rules",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"custom": {
|
|
"align": "auto",
|
|
"displayMode": "auto",
|
|
"inspect": false,
|
|
"minWidth": 50
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Time"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.hidden",
|
|
"value": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Value"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "displayName",
|
|
"value": "Count"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 9,
|
|
"x": 0,
|
|
"y": 4
|
|
},
|
|
"id": 45,
|
|
"options": {
|
|
"footer": {
|
|
"fields": "",
|
|
"reducer": [
|
|
"sum"
|
|
],
|
|
"show": false
|
|
},
|
|
"showHeader": true
|
|
},
|
|
"pluginVersion": "9.0.3",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"editorMode": "code",
|
|
"exemplar": false,
|
|
"expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)",
|
|
"format": "table",
|
|
"instant": true,
|
|
"range": false,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"type": "table"
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 15,
|
|
"x": 9,
|
|
"y": 4
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 4,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": false,
|
|
"current": true,
|
|
"hideEmpty": false,
|
|
"hideZero": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": false,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null as zero",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": true,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sort(sum(up{job=~\"$job\", instance=~\"$instance\"}) by (job, instance))",
|
|
"format": "time_series",
|
|
"instant": false,
|
|
"interval": "",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Uptime",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 1,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:170",
|
|
"decimals": 0,
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:171",
|
|
"format": "short",
|
|
"label": "",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": 2
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the number of fired alerts by instance.",
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 15,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(increase(vmalert_alerts_fired_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)",
|
|
"interval": "",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Alerts fired total",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:62",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:63",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Average evaluation duration by group. Basically means how long it takes to execute all the rules per each group.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 23,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": false
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(rate(vmalert_iteration_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(group) / \nsum(rate(vmalert_iteration_duration_seconds_count{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(group)",
|
|
"interval": "",
|
|
"legendFormat": "{{group}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Groups avg evaluation duration ($group)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "s",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows how many requests (executions) per second vmalert sends to the configured datasource.",
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 24,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(rate(vmalert_execution_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)",
|
|
"interval": "",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Rules execution rate ($instance)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:182",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:183",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the error rate while executing configured rules. Non-zero value means there are some issues with existing rules. Check the logs to get more details.",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 16
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 25,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(increase(vmalert_execution_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance) > 0",
|
|
"interval": "",
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Rules execution errors ($instance)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:244",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:245",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"collapsed": true,
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"id": 17,
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the current active (firing) alerting rules per group.",
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 25
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 14,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(vmalert_alerts_firing{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0",
|
|
"interval": "",
|
|
"legendFormat": "{{alertname}} ({{group}})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Active ($group)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the events when rule execution resulted into an error. Check the logs for more details.",
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 25
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 13,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0",
|
|
"interval": "",
|
|
"legendFormat": "{{alertname}} ({{group}})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Errors ($group)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the current pending alerting rules per group.\nBy pending means the rule which remains active less than configured `for` parameter.",
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 33
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 20,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(vmalert_alerts_pending{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0",
|
|
"interval": "",
|
|
"legendFormat": "{{alertname}} ({{group}})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Pending ($group)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows how many alerts are sent to Alertmanager per second. Only active alerts are sent.",
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 33
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 26,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(rate(vmalert_alerts_sent_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance, addr) > 0",
|
|
"interval": "",
|
|
"legendFormat": "{{instance}} => {{addr}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Requests rate to Alertmanager ($group)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:229",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:230",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the error rate for the attempts to send alerts to Alertmanager. If not zero it means there issues on attempt to send notification to Alertmanager and some alerts may be not delivered properly. Check the logs for more details.",
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 41
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 32,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(rate(vmalert_alerts_send_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(instance, addr) > 0",
|
|
"interval": "",
|
|
"legendFormat": "{{instance}} => {{addr}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Errors rate to Alertmanager ($group)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:229",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:230",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
}
|
|
],
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Alerting rules ($instance)",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"collapsed": true,
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 25
|
|
},
|
|
"id": 28,
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the top 10 recording rules which generate the most of samples. Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.",
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 50
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 31,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "topk(10, sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) > 0)",
|
|
"interval": "",
|
|
"legendFormat": "{{recording}} ({{group}})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Top 10 rules by produced samples ($group)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the rules which do not produce any samples during the evaluation. Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "never",
|
|
"spanNulls": true,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green"
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 50
|
|
},
|
|
"id": 33,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"max",
|
|
"lastNotNull",
|
|
"mean"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "8.0.3",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) < 1",
|
|
"interval": "",
|
|
"legendFormat": "{{recording}} ({{group}})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Rules with 0 produced samples ($group)",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 58
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 30,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) > 0",
|
|
"interval": "",
|
|
"legendFormat": "{{recording}} ({{group}})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Errors ($group)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
}
|
|
],
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Recording rules ($instance)",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"collapsed": true,
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 26
|
|
},
|
|
"id": 43,
|
|
"panels": [
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Shows the CPU usage percentage per vmalert instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 67
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 35,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [
|
|
{
|
|
"targetBlank": true,
|
|
"title": "Profiling",
|
|
"url": "https://docs.victoriametrics.com/vmagent.html#profiling"
|
|
}
|
|
],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance) / min(process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
|
|
"format": "time_series",
|
|
"interval": "",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "CPU ($instance)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:473",
|
|
"format": "percentunit",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:474",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Amount of used memory\n\nResident memory shows share which can be freed by OS when needed.\n\nAnonymous shows share for memory allocated by the process itself. This share cannot be freed by the OS, so it must be taken into account by OOM killer.\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 67
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 37,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [
|
|
{
|
|
"targetBlank": true,
|
|
"title": "Profiling",
|
|
"url": "https://docs.victoriametrics.com/vmagent.html#profiling"
|
|
}
|
|
],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)",
|
|
"interval": "",
|
|
"legendFormat": "resident {{instance}}",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"exemplar": false,
|
|
"expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)",
|
|
"hide": false,
|
|
"interval": "",
|
|
"legendFormat": "anonymous {{instance}}",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Memory usage ($instance)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"description": "Panel shows the percentage of open file descriptors in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 75
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 39,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"editorMode": "code",
|
|
"exemplar": false,
|
|
"expr": "sum(process_open_fds{job=~\"$job\", instance=~\"$instance\"}) by (instance) \n/\nmin(process_max_fds{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
|
|
"format": "time_series",
|
|
"interval": "",
|
|
"intervalFactor": 2,
|
|
"legendFormat": "open {{instance}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Open FDs ($instance)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:540",
|
|
"decimals": 3,
|
|
"format": "percentunit",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:541",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"links": []
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 0,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 75
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 41,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": true,
|
|
"min": false,
|
|
"show": true,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "9.0.3",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)",
|
|
"format": "time_series",
|
|
"interval": "",
|
|
"intervalFactor": 2,
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Goroutines ($instance)",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"decimals": 0,
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
}
|
|
],
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Resource usage",
|
|
"type": "row"
|
|
}
|
|
],
|
|
"refresh": false,
|
|
"schemaVersion": 36,
|
|
"style": "dark",
|
|
"tags": [
|
|
"victoriametrics",
|
|
"vmalert"
|
|
],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {
|
|
"selected": false,
|
|
"text": "VictoriaMetrics",
|
|
"value": "VictoriaMetrics"
|
|
},
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"multi": false,
|
|
"name": "ds",
|
|
"options": [],
|
|
"query": "prometheus",
|
|
"queryValue": "",
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"type": "datasource"
|
|
},
|
|
{
|
|
"current": {},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"definition": "label_values(vm_app_version{version=~\"^vmalert.*\"}, job)",
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"multi": false,
|
|
"name": "job",
|
|
"options": [],
|
|
"query": {
|
|
"query": "label_values(vm_app_version{version=~\"^vmalert.*\"}, job)",
|
|
"refId": "StandardVariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"sort": 0,
|
|
"type": "query"
|
|
},
|
|
{
|
|
"allValue": ".*",
|
|
"current": {},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"definition": "label_values(vm_app_version{job=~\"$job\"}, instance)",
|
|
"hide": 0,
|
|
"includeAll": true,
|
|
"multi": true,
|
|
"name": "instance",
|
|
"options": [],
|
|
"query": {
|
|
"query": "label_values(vm_app_version{job=~\"$job\"}, instance)",
|
|
"refId": "StandardVariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"sort": 0,
|
|
"type": "query"
|
|
},
|
|
{
|
|
"allValue": ".*",
|
|
"current": {},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"definition": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)",
|
|
"hide": 0,
|
|
"includeAll": true,
|
|
"multi": true,
|
|
"name": "group",
|
|
"options": [],
|
|
"query": {
|
|
"query": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)",
|
|
"refId": "StandardVariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"sort": 0,
|
|
"type": "query"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "$ds"
|
|
},
|
|
"filters": [],
|
|
"hide": 0,
|
|
"name": "adhoc",
|
|
"skipUrlSync": false,
|
|
"type": "adhoc"
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-3h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {},
|
|
"timezone": "",
|
|
"title": "VictoriaMetrics - vmalert",
|
|
"uid": "LzldHAVnz",
|
|
"version": 1,
|
|
"weekStart": ""
|
|
}
|