mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/promscrape: add metric vm_promscrape_scrapes_skipped_total
(#5074)
* lib/promscrape: add metric `vm_promscrape_scrapes_skipped_total` add metric `vm_promscrape_scrapes_skipped_total`to show whether vmagent skips the scrapes. This could happen if vmagent is overloaded or target is responding too slow for configured `scrape_interval`. The follow-up commit should add a corresponding alerting rule and panel to vmagent dashboard. Signed-off-by: hagen1778 <roman@victoriametrics.com> * deployment/docker: add `TooManyScrapeSkips` alerting rule for vmagent Signed-off-by: hagen1778 <roman@victoriametrics.com> * dashboards: add panels `Scrape duration 0.99 quantile` and `Skipped scrapes` to vmagent dashboard Signed-off-by: hagen1778 <roman@victoriametrics.com> --------- Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
7b33a27874
commit
74301cdbf5
5 changed files with 510 additions and 67 deletions
|
@ -1271,8 +1271,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -1374,8 +1373,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -2390,7 +2388,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 36
|
||||
"y": 4
|
||||
},
|
||||
"id": 92,
|
||||
"options": {
|
||||
|
@ -2493,7 +2491,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 36
|
||||
"y": 4
|
||||
},
|
||||
"id": 95,
|
||||
"options": {
|
||||
|
@ -2599,7 +2597,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 44
|
||||
"y": 12
|
||||
},
|
||||
"id": 98,
|
||||
"options": {
|
||||
|
@ -2705,7 +2703,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 44
|
||||
"y": 12
|
||||
},
|
||||
"id": 99,
|
||||
"options": {
|
||||
|
@ -2810,7 +2808,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 52
|
||||
"y": 20
|
||||
},
|
||||
"id": 79,
|
||||
"links": [],
|
||||
|
@ -2916,7 +2914,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 52
|
||||
"y": 20
|
||||
},
|
||||
"id": 18,
|
||||
"links": [
|
||||
|
@ -3027,7 +3025,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 60
|
||||
"y": 28
|
||||
},
|
||||
"id": 127,
|
||||
"links": [],
|
||||
|
@ -3131,7 +3129,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 60
|
||||
"y": 28
|
||||
},
|
||||
"id": 50,
|
||||
"options": {
|
||||
|
@ -3234,7 +3232,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 68
|
||||
"y": 36
|
||||
},
|
||||
"id": 130,
|
||||
"links": [],
|
||||
|
@ -3286,6 +3284,110 @@
|
|||
"title": "Concurrent inserts ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the number of skipped scrapes.\n\nScrapes could be skipped due to\n1. vmagent overload;\n2. too small scrape interval for configured job or target.\n\nVerify that vmagent has enough CPU/Mem resources, network bandwidth (try increasing `-remoteWrite.queues`).\n\nCheck `Scrape duration` pane to measure the scrape duration. If vmagent has enough resources and scrape duration is very close or exceeds scrape interval - try increasing the scrape interval or debugging why scraped target is responding too slowly.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 36
|
||||
},
|
||||
"id": 132,
|
||||
"links": [],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(increase(vm_promscrape_scrapes_skipped_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, instance)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Skipped scrapes ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
|
@ -3347,7 +3449,7 @@
|
|||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 76
|
||||
"y": 44
|
||||
},
|
||||
"id": 129,
|
||||
"options": {
|
||||
|
@ -3484,7 +3586,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3500,7 +3603,7 @@
|
|||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 45
|
||||
"y": 52
|
||||
},
|
||||
"id": 48,
|
||||
"options": {
|
||||
|
@ -3588,7 +3691,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3604,7 +3708,7 @@
|
|||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 45
|
||||
"y": 52
|
||||
},
|
||||
"id": 76,
|
||||
"options": {
|
||||
|
@ -3691,7 +3795,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3707,7 +3812,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 52
|
||||
"y": 59
|
||||
},
|
||||
"id": 20,
|
||||
"options": {
|
||||
|
@ -3793,7 +3898,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3809,7 +3915,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 52
|
||||
"y": 59
|
||||
},
|
||||
"id": 126,
|
||||
"options": {
|
||||
|
@ -3894,7 +4000,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3910,7 +4017,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 60
|
||||
"y": 67
|
||||
},
|
||||
"id": 46,
|
||||
"options": {
|
||||
|
@ -3948,6 +4055,109 @@
|
|||
"title": "Scrape response size 0.99 quantile ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 67
|
||||
},
|
||||
"id": 133,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(histogram_quantile(0.99, sum(rate(vm_promscrape_scrape_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, vmrange))) by(job)",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Scrape duration 0.99 quantile ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
|
@ -3995,7 +4205,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -4011,7 +4222,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 60
|
||||
"y": 75
|
||||
},
|
||||
"id": 31,
|
||||
"options": {
|
||||
|
@ -4172,8 +4383,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -4289,8 +4499,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -4393,8 +4602,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
|
|
@ -1270,8 +1270,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -1373,8 +1372,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -2389,7 +2387,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 36
|
||||
"y": 4
|
||||
},
|
||||
"id": 92,
|
||||
"options": {
|
||||
|
@ -2492,7 +2490,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 36
|
||||
"y": 4
|
||||
},
|
||||
"id": 95,
|
||||
"options": {
|
||||
|
@ -2598,7 +2596,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 44
|
||||
"y": 12
|
||||
},
|
||||
"id": 98,
|
||||
"options": {
|
||||
|
@ -2704,7 +2702,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 44
|
||||
"y": 12
|
||||
},
|
||||
"id": 99,
|
||||
"options": {
|
||||
|
@ -2809,7 +2807,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 52
|
||||
"y": 20
|
||||
},
|
||||
"id": 79,
|
||||
"links": [],
|
||||
|
@ -2915,7 +2913,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 52
|
||||
"y": 20
|
||||
},
|
||||
"id": 18,
|
||||
"links": [
|
||||
|
@ -3026,7 +3024,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 60
|
||||
"y": 28
|
||||
},
|
||||
"id": 127,
|
||||
"links": [],
|
||||
|
@ -3130,7 +3128,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 60
|
||||
"y": 28
|
||||
},
|
||||
"id": 50,
|
||||
"options": {
|
||||
|
@ -3233,7 +3231,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 68
|
||||
"y": 36
|
||||
},
|
||||
"id": 130,
|
||||
"links": [],
|
||||
|
@ -3285,6 +3283,110 @@
|
|||
"title": "Concurrent inserts ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the number of skipped scrapes.\n\nScrapes could be skipped due to\n1. vmagent overload;\n2. too small scrape interval for configured job or target.\n\nVerify that vmagent has enough CPU/Mem resources, network bandwidth (try increasing `-remoteWrite.queues`).\n\nCheck `Scrape duration` pane to measure the scrape duration. If vmagent has enough resources and scrape duration is very close or exceeds scrape interval - try increasing the scrape interval or debugging why scraped target is responding too slowly.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 36
|
||||
},
|
||||
"id": 132,
|
||||
"links": [],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(increase(vm_promscrape_scrapes_skipped_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, instance)",
|
||||
"interval": "",
|
||||
"legendFormat": "{{instance}} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Skipped scrapes ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
|
@ -3346,7 +3448,7 @@
|
|||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 76
|
||||
"y": 44
|
||||
},
|
||||
"id": 129,
|
||||
"options": {
|
||||
|
@ -3483,7 +3585,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3499,7 +3602,7 @@
|
|||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 45
|
||||
"y": 52
|
||||
},
|
||||
"id": 48,
|
||||
"options": {
|
||||
|
@ -3587,7 +3690,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3603,7 +3707,7 @@
|
|||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 45
|
||||
"y": 52
|
||||
},
|
||||
"id": 76,
|
||||
"options": {
|
||||
|
@ -3690,7 +3794,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3706,7 +3811,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 52
|
||||
"y": 59
|
||||
},
|
||||
"id": 20,
|
||||
"options": {
|
||||
|
@ -3792,7 +3897,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3808,7 +3914,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 52
|
||||
"y": 59
|
||||
},
|
||||
"id": 126,
|
||||
"options": {
|
||||
|
@ -3893,7 +3999,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -3909,7 +4016,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 60
|
||||
"y": 67
|
||||
},
|
||||
"id": 46,
|
||||
"options": {
|
||||
|
@ -3947,6 +4054,109 @@
|
|||
"title": "Scrape response size 0.99 quantile ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 67
|
||||
},
|
||||
"id": 133,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(histogram_quantile(0.99, sum(rate(vm_promscrape_scrape_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, vmrange))) by(job)",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Scrape duration 0.99 quantile ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
|
@ -3994,7 +4204,8 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -4010,7 +4221,7 @@
|
|||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 60
|
||||
"y": 75
|
||||
},
|
||||
"id": 31,
|
||||
"options": {
|
||||
|
@ -4171,8 +4382,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -4288,8 +4498,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
@ -4392,8 +4601,7 @@
|
|||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
|
|
|
@ -132,3 +132,19 @@ groups:
|
|||
summary: "Configuration reload failed for vmagent instance {{ $labels.instance }}"
|
||||
description: "Configuration hot-reload failed for vmagent on instance {{ $labels.instance }}.
|
||||
Check vmagent's logs for detailed error message."
|
||||
|
||||
- alert: TooManyScrapeSkips
|
||||
expr: |
|
||||
rate(vm_promscrape_scrapes_skipped_total[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=132&var-instance={{ $labels.instance }}"
|
||||
summary: "vmagent on instance {{ $labels.instance }} is skipping scrapes"
|
||||
description: "Scrape intervals are skipped for vmagent on instance {{ $labels.instance }}
|
||||
due to either overload or too small scrape interval. Verify that vmagent has enough CPU/Mem resources
|
||||
and network bandwidth (try increasing -remoteWrite.queues).
|
||||
Check vm_promscrape_scrape_duration_seconds metric to measure the scrape duration. If vmagent has enough
|
||||
resources and scrape duration is very close or exceeds scrape interval - try increasing the scrape interval
|
||||
or debuggin why scraped target is responding too slow;y."
|
|
@ -35,6 +35,7 @@ The sandbox cluster installation is running under the constant load generated by
|
|||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not log `unexpected EOF` when reading incoming metrics, since this error is expected and is handled during metrics' parsing. This reduces the amounts of noisy logs. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4817).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): retry failed write request on the closed connection immediately, without waiting for backoff. This should improve data delivery speed and reduce amount of error logs emitted by vmagent when using idle connections. See related [issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): reduces load on Kubernetes control plane during initial service discovery. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4855) for details.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add metric `vm_promscrape_scrapes_skipped_total` to show whether vmagent skips the scrapes. This could happen if vmagent is overloaded or target is responding too slow for configured `scrape_interval`.
|
||||
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): reduce the maximum recovery time at `vmselect` and `vminsert` when some of `vmstorage` nodes become unavailable because of networking issues from 60 seconds to 3 seconds by default. The recovery time can be tuned at `vmselect` and `vminsert` nodes with `-vmstorageUserTimeout` command-line flag if needed. Thanks to @wjordan for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4423).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add Prometheus data support to the "Explore cardinality" page. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4320) for details.
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): make the warning message more noticeable for text fields. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4848).
|
||||
|
@ -43,6 +44,7 @@ The sandbox cluster installation is running under the constant load generated by
|
|||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): organize `min`, `max`, `median` values on the chart legend and tooltips for better visibility.
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add explanation about [cardinality explorer](https://docs.victoriametrics.com/#cardinality-explorer) statistic inaccuracy in VictoriaMetrics cluster. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3070).
|
||||
* FEATURE: dashboards: provide copies of Grafana dashboards alternated with VictoriaMetrics datasource at [dashboards/vm](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/dashboards/vm).
|
||||
* FEATURE: dashboards: add panels `Scrape duration 0.99 quantile` and `Skipped scrapes` to vmagent dashboard.
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): added ability to set, override and clear request and response headers on a per-user and per-path basis. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4825) and [these docs](https://docs.victoriametrics.com/vmauth.html#auth-config) for details.
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to retry requests to the [remaining backends](https://docs.victoriametrics.com/vmauth.html#load-balancing) if they return response status codes specified in the `retry_status_codes` list. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): expose metrics `vmauth_config_last_reload_*` for tracking the state of config reloads, similarly to vmagent/vmalert components.
|
||||
|
|
|
@ -326,6 +326,7 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}, globalStopCh <-chan struct{})
|
|||
sw.scrapeAndLogError(timestamp, timestamp)
|
||||
}
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
timestamp += scrapeInterval.Milliseconds()
|
||||
select {
|
||||
|
@ -350,10 +351,17 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}, globalStopCh <-chan struct{})
|
|||
return
|
||||
case tt := <-ticker.C:
|
||||
t := tt.UnixNano() / 1e6
|
||||
if d := math.Abs(float64(t - timestamp)); d > 0 && d/float64(scrapeInterval.Milliseconds()) > 0.1 {
|
||||
if d := math.Abs(float64(t - timestamp)); d > 0 {
|
||||
intervalDelay := d / float64(scrapeInterval.Milliseconds())
|
||||
if intervalDelay > 0.1 {
|
||||
// Too big jitter. Adjust timestamp
|
||||
timestamp = t
|
||||
}
|
||||
if intervalDelay >= 1 {
|
||||
skipped := math.Floor(intervalDelay)
|
||||
scrapesSkipped.Add(int(skipped))
|
||||
}
|
||||
}
|
||||
sw.scrapeAndLogError(timestamp, t)
|
||||
}
|
||||
}
|
||||
|
@ -395,6 +403,7 @@ var (
|
|||
scrapeDuration = metrics.NewHistogram("vm_promscrape_scrape_duration_seconds")
|
||||
scrapeResponseSize = metrics.NewHistogram("vm_promscrape_scrape_response_size_bytes")
|
||||
scrapedSamples = metrics.NewHistogram("vm_promscrape_scraped_samples")
|
||||
scrapesSkipped = metrics.NewCounter("vm_promscrape_scrapes_skipped_total")
|
||||
scrapesSkippedBySampleLimit = metrics.NewCounter("vm_promscrape_scrapes_skipped_by_sample_limit_total")
|
||||
scrapesFailed = metrics.NewCounter("vm_promscrape_scrapes_failed_total")
|
||||
pushDataDuration = metrics.NewHistogram("vm_promscrape_push_data_duration_seconds")
|
||||
|
|
Loading…
Reference in a new issue