From 74301cdbf5d1360840ceeb60e94f1e96374834f7 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Mon, 2 Oct 2023 17:12:12 +0200 Subject: [PATCH] lib/promscrape: add metric `vm_promscrape_scrapes_skipped_total` (#5074) * lib/promscrape: add metric `vm_promscrape_scrapes_skipped_total` add metric `vm_promscrape_scrapes_skipped_total`to show whether vmagent skips the scrapes. This could happen if vmagent is overloaded or target is responding too slow for configured `scrape_interval`. The follow-up commit should add a corresponding alerting rule and panel to vmagent dashboard. Signed-off-by: hagen1778 * deployment/docker: add `TooManyScrapeSkips` alerting rule for vmagent Signed-off-by: hagen1778 * dashboards: add panels `Scrape duration 0.99 quantile` and `Skipped scrapes` to vmagent dashboard Signed-off-by: hagen1778 --------- Signed-off-by: hagen1778 --- dashboards/vm/vmagent.json | 272 +++++++++++++++++++++++---- dashboards/vmagent.json | 272 +++++++++++++++++++++++---- deployment/docker/alerts-vmagent.yml | 16 ++ docs/CHANGELOG.md | 2 + lib/promscrape/scrapework.go | 15 +- 5 files changed, 510 insertions(+), 67 deletions(-) diff --git a/dashboards/vm/vmagent.json b/dashboards/vm/vmagent.json index 3928c3248..a15e64299 100644 --- a/dashboards/vm/vmagent.json +++ b/dashboards/vm/vmagent.json @@ -1271,8 +1271,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1374,8 +1373,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2390,7 +2388,7 @@ "h": 8, "w": 12, "x": 0, - "y": 36 + "y": 4 }, "id": 92, "options": { @@ -2493,7 +2491,7 @@ "h": 8, "w": 12, "x": 12, - "y": 36 + "y": 4 }, "id": 95, "options": { @@ -2599,7 +2597,7 @@ "h": 8, "w": 12, "x": 0, - "y": 44 + "y": 12 }, "id": 98, "options": { @@ -2705,7 +2703,7 @@ "h": 8, "w": 12, "x": 12, - "y": 44 + "y": 12 }, "id": 99, "options": { @@ -2810,7 +2808,7 @@ "h": 8, "w": 12, "x": 0, - "y": 52 + "y": 20 }, "id": 79, "links": [], @@ -2916,7 +2914,7 @@ "h": 8, "w": 12, "x": 12, - "y": 52 + "y": 20 }, "id": 18, "links": [ @@ -3027,7 +3025,7 @@ "h": 8, "w": 12, "x": 0, - "y": 60 + "y": 28 }, "id": 127, "links": [], @@ -3131,7 +3129,7 @@ "h": 8, "w": 12, "x": 12, - "y": 60 + "y": 28 }, "id": 50, "options": { @@ -3234,7 +3232,7 @@ "h": 8, "w": 12, "x": 0, - "y": 68 + "y": 36 }, "id": 130, "links": [], @@ -3286,6 +3284,110 @@ "title": "Concurrent inserts ($instance)", "type": "timeseries" }, + { + "datasource": { + "type": "victoriametrics-datasource", + "uid": "$ds" + }, + "description": "Shows the number of skipped scrapes.\n\nScrapes could be skipped due to\n1. vmagent overload;\n2. too small scrape interval for configured job or target.\n\nVerify that vmagent has enough CPU/Mem resources, network bandwidth (try increasing `-remoteWrite.queues`).\n\nCheck `Scrape duration` pane to measure the scrape duration. If vmagent has enough resources and scrape duration is very close or exceeds scrape interval - try increasing the scrape interval or debugging why scraped target is responding too slowly.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 132, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "victoriametrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(vm_promscrape_scrapes_skipped_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, instance)", + "interval": "", + "legendFormat": "{{instance}} ({{job}})", + "range": true, + "refId": "A" + } + ], + "title": "Skipped scrapes ($instance)", + "type": "timeseries" + }, { "datasource": { "type": "victoriametrics-datasource", @@ -3347,7 +3449,7 @@ "h": 7, "w": 24, "x": 0, - "y": 76 + "y": 44 }, "id": 129, "options": { @@ -3484,7 +3586,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3500,7 +3603,7 @@ "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 52 }, "id": 48, "options": { @@ -3588,7 +3691,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3604,7 +3708,7 @@ "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 52 }, "id": 76, "options": { @@ -3691,7 +3795,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3707,7 +3812,7 @@ "h": 8, "w": 12, "x": 0, - "y": 52 + "y": 59 }, "id": 20, "options": { @@ -3793,7 +3898,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3809,7 +3915,7 @@ "h": 8, "w": 12, "x": 12, - "y": 52 + "y": 59 }, "id": 126, "options": { @@ -3894,7 +4000,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3910,7 +4017,7 @@ "h": 8, "w": 12, "x": 0, - "y": 60 + "y": 67 }, "id": 46, "options": { @@ -3948,6 +4055,109 @@ "title": "Scrape response size 0.99 quantile ($instance)", "type": "timeseries" }, + { + "datasource": { + "type": "victoriametrics-datasource", + "uid": "$ds" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 133, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "victoriametrics-datasource", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(histogram_quantile(0.99, sum(rate(vm_promscrape_scrape_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, vmrange))) by(job)", + "format": "time_series", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Scrape duration 0.99 quantile ($instance)", + "type": "timeseries" + }, { "datasource": { "type": "victoriametrics-datasource", @@ -3995,7 +4205,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -4011,7 +4222,7 @@ "h": 8, "w": 12, "x": 12, - "y": 60 + "y": 75 }, "id": 31, "options": { @@ -4172,8 +4383,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4289,8 +4499,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4393,8 +4602,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", diff --git a/dashboards/vmagent.json b/dashboards/vmagent.json index de0502665..d84d4ff62 100644 --- a/dashboards/vmagent.json +++ b/dashboards/vmagent.json @@ -1270,8 +1270,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1373,8 +1372,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2389,7 +2387,7 @@ "h": 8, "w": 12, "x": 0, - "y": 36 + "y": 4 }, "id": 92, "options": { @@ -2492,7 +2490,7 @@ "h": 8, "w": 12, "x": 12, - "y": 36 + "y": 4 }, "id": 95, "options": { @@ -2598,7 +2596,7 @@ "h": 8, "w": 12, "x": 0, - "y": 44 + "y": 12 }, "id": 98, "options": { @@ -2704,7 +2702,7 @@ "h": 8, "w": 12, "x": 12, - "y": 44 + "y": 12 }, "id": 99, "options": { @@ -2809,7 +2807,7 @@ "h": 8, "w": 12, "x": 0, - "y": 52 + "y": 20 }, "id": 79, "links": [], @@ -2915,7 +2913,7 @@ "h": 8, "w": 12, "x": 12, - "y": 52 + "y": 20 }, "id": 18, "links": [ @@ -3026,7 +3024,7 @@ "h": 8, "w": 12, "x": 0, - "y": 60 + "y": 28 }, "id": 127, "links": [], @@ -3130,7 +3128,7 @@ "h": 8, "w": 12, "x": 12, - "y": 60 + "y": 28 }, "id": 50, "options": { @@ -3233,7 +3231,7 @@ "h": 8, "w": 12, "x": 0, - "y": 68 + "y": 36 }, "id": 130, "links": [], @@ -3285,6 +3283,110 @@ "title": "Concurrent inserts ($instance)", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the number of skipped scrapes.\n\nScrapes could be skipped due to\n1. vmagent overload;\n2. too small scrape interval for configured job or target.\n\nVerify that vmagent has enough CPU/Mem resources, network bandwidth (try increasing `-remoteWrite.queues`).\n\nCheck `Scrape duration` pane to measure the scrape duration. If vmagent has enough resources and scrape duration is very close or exceeds scrape interval - try increasing the scrape interval or debugging why scraped target is responding too slowly.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 132, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(vm_promscrape_scrapes_skipped_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, instance)", + "interval": "", + "legendFormat": "{{instance}} ({{job}})", + "range": true, + "refId": "A" + } + ], + "title": "Skipped scrapes ($instance)", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -3346,7 +3448,7 @@ "h": 7, "w": 24, "x": 0, - "y": 76 + "y": 44 }, "id": 129, "options": { @@ -3483,7 +3585,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3499,7 +3602,7 @@ "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 52 }, "id": 48, "options": { @@ -3587,7 +3690,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3603,7 +3707,7 @@ "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 52 }, "id": 76, "options": { @@ -3690,7 +3794,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3706,7 +3811,7 @@ "h": 8, "w": 12, "x": 0, - "y": 52 + "y": 59 }, "id": 20, "options": { @@ -3792,7 +3897,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3808,7 +3914,7 @@ "h": 8, "w": 12, "x": 12, - "y": 52 + "y": 59 }, "id": 126, "options": { @@ -3893,7 +3999,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3909,7 +4016,7 @@ "h": 8, "w": 12, "x": 0, - "y": 60 + "y": 67 }, "id": 46, "options": { @@ -3947,6 +4054,109 @@ "title": "Scrape response size 0.99 quantile ($instance)", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 133, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(histogram_quantile(0.99, sum(rate(vm_promscrape_scrape_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, vmrange))) by(job)", + "format": "time_series", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Scrape duration 0.99 quantile ($instance)", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -3994,7 +4204,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -4010,7 +4221,7 @@ "h": 8, "w": 12, "x": 12, - "y": 60 + "y": 75 }, "id": 31, "options": { @@ -4171,8 +4382,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4288,8 +4498,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4392,8 +4601,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", diff --git a/deployment/docker/alerts-vmagent.yml b/deployment/docker/alerts-vmagent.yml index b4f4ee9e0..220121107 100644 --- a/deployment/docker/alerts-vmagent.yml +++ b/deployment/docker/alerts-vmagent.yml @@ -132,3 +132,19 @@ groups: summary: "Configuration reload failed for vmagent instance {{ $labels.instance }}" description: "Configuration hot-reload failed for vmagent on instance {{ $labels.instance }}. Check vmagent's logs for detailed error message." + + - alert: TooManyScrapeSkips + expr: | + rate(vm_promscrape_scrapes_skipped_total[5m]) > 0 + for: 15m + labels: + severity: warning + annotations: + dashboard: "http://localhost:3000/d/G7Z9GzMGz?viewPanel=132&var-instance={{ $labels.instance }}" + summary: "vmagent on instance {{ $labels.instance }} is skipping scrapes" + description: "Scrape intervals are skipped for vmagent on instance {{ $labels.instance }} + due to either overload or too small scrape interval. Verify that vmagent has enough CPU/Mem resources + and network bandwidth (try increasing -remoteWrite.queues). + Check vm_promscrape_scrape_duration_seconds metric to measure the scrape duration. If vmagent has enough + resources and scrape duration is very close or exceeds scrape interval - try increasing the scrape interval + or debuggin why scraped target is responding too slow;y." \ No newline at end of file diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c6debc584..50490d42e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -35,6 +35,7 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not log `unexpected EOF` when reading incoming metrics, since this error is expected and is handled during metrics' parsing. This reduces the amounts of noisy logs. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4817). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): retry failed write request on the closed connection immediately, without waiting for backoff. This should improve data delivery speed and reduce amount of error logs emitted by vmagent when using idle connections. See related [issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): reduces load on Kubernetes control plane during initial service discovery. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4855) for details. +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add metric `vm_promscrape_scrapes_skipped_total` to show whether vmagent skips the scrapes. This could happen if vmagent is overloaded or target is responding too slow for configured `scrape_interval`. * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): reduce the maximum recovery time at `vmselect` and `vminsert` when some of `vmstorage` nodes become unavailable because of networking issues from 60 seconds to 3 seconds by default. The recovery time can be tuned at `vmselect` and `vminsert` nodes with `-vmstorageUserTimeout` command-line flag if needed. Thanks to @wjordan for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4423). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add Prometheus data support to the "Explore cardinality" page. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4320) for details. * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): make the warning message more noticeable for text fields. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4848). @@ -43,6 +44,7 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): organize `min`, `max`, `median` values on the chart legend and tooltips for better visibility. * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add explanation about [cardinality explorer](https://docs.victoriametrics.com/#cardinality-explorer) statistic inaccuracy in VictoriaMetrics cluster. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3070). * FEATURE: dashboards: provide copies of Grafana dashboards alternated with VictoriaMetrics datasource at [dashboards/vm](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/dashboards/vm). +* FEATURE: dashboards: add panels `Scrape duration 0.99 quantile` and `Skipped scrapes` to vmagent dashboard. * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): added ability to set, override and clear request and response headers on a per-user and per-path basis. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4825) and [these docs](https://docs.victoriametrics.com/vmauth.html#auth-config) for details. * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to retry requests to the [remaining backends](https://docs.victoriametrics.com/vmauth.html#load-balancing) if they return response status codes specified in the `retry_status_codes` list. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893). * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): expose metrics `vmauth_config_last_reload_*` for tracking the state of config reloads, similarly to vmagent/vmalert components. diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 3a23c8655..737e5a3dc 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -326,6 +326,7 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}, globalStopCh <-chan struct{}) sw.scrapeAndLogError(timestamp, timestamp) } defer ticker.Stop() + for { timestamp += scrapeInterval.Milliseconds() select { @@ -350,9 +351,16 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}, globalStopCh <-chan struct{}) return case tt := <-ticker.C: t := tt.UnixNano() / 1e6 - if d := math.Abs(float64(t - timestamp)); d > 0 && d/float64(scrapeInterval.Milliseconds()) > 0.1 { - // Too big jitter. Adjust timestamp - timestamp = t + if d := math.Abs(float64(t - timestamp)); d > 0 { + intervalDelay := d / float64(scrapeInterval.Milliseconds()) + if intervalDelay > 0.1 { + // Too big jitter. Adjust timestamp + timestamp = t + } + if intervalDelay >= 1 { + skipped := math.Floor(intervalDelay) + scrapesSkipped.Add(int(skipped)) + } } sw.scrapeAndLogError(timestamp, t) } @@ -395,6 +403,7 @@ var ( scrapeDuration = metrics.NewHistogram("vm_promscrape_scrape_duration_seconds") scrapeResponseSize = metrics.NewHistogram("vm_promscrape_scrape_response_size_bytes") scrapedSamples = metrics.NewHistogram("vm_promscrape_scraped_samples") + scrapesSkipped = metrics.NewCounter("vm_promscrape_scrapes_skipped_total") scrapesSkippedBySampleLimit = metrics.NewCounter("vm_promscrape_scrapes_skipped_by_sample_limit_total") scrapesFailed = metrics.NewCounter("vm_promscrape_scrapes_failed_total") pushDataDuration = metrics.NewHistogram("vm_promscrape_push_data_duration_seconds")