From ffec5131ae4ac6db43c908820787d1b39219eced Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 27 Jan 2021 00:37:09 +0200 Subject: [PATCH] lib/promscrape: export `vm_promscrape_scrapes_failed_per_url_total` and `vm_promscrape_scrapes_skipped_by_sample_limit_per_url_total` metrics These metrics could be useful for determining imporperly working scrape targets. Note that these metrics are exported only for failing scrape targets. They aren't exposed for normally working targets. --- docs/CHANGELOG.md | 1 + lib/promscrape/scrapework.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c4b45ba4e..fc9f1f7bd 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -6,6 +6,7 @@ * FEATURE: added `-search.maxStepForPointsAdjustment` command-line flag, which can be used for disabling adjustment for points returned `/api/v1/query_range` handler if such points have timestamps closer than `-search.latencyOffset` to the current time. Such points may contain incomplete data, so they are substituted by the previous values for `step` query args smaller than one minute by default. * FEATURE: vmalert: added `-datasource.queryStep` command-line flag for passing `step` query arg to `/api/v1/query` endpoint. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1025 * FEATURE: vmagent: added `-remoteWrite.rateLimit` command-line flag for limiting data transfer rate to `-remoteWrite.url`. This may be useful when big amounts of buffered data is sent after temporarily unavailability of the remote storage. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1035 +* FEATURE: vmagent: export `vm_promscrape_scrapes_failed_per_url_total` and `vm_promscrape_scrapes_skipped_by_sample_limit_per_url_total` counters, which may help identifying improperly working scrape targets. * BUGFIX: vmagent: reduce the HTTP reconnection rate to scrape targets. Previously vmagent could errorneusly close HTTP keep-alive connections more often than needed. * BUGFIX: vmagent: retry scrape and service discovery requests when the remote server closes HTTP keep-alive connection. Previously `disable_keepalive: true` option could be used under `scrape_configs` section when working with such servers. diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 585529e01..2c37469ed 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -269,6 +269,7 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error if err != nil { up = 0 scrapesFailed.Inc() + metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_scrapes_failed_per_url_total{url=%q}`, sw.Config.ScrapeURL)).Inc() } else { bodyString := bytesutil.ToUnsafeString(body.B) wc.rows.UnmarshalWithErrLogger(bodyString, sw.logError) @@ -280,6 +281,7 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error srcRows = srcRows[:0] up = 0 scrapesSkippedBySampleLimit.Inc() + metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_scrapes_skipped_by_sample_limit_per_url_total{url=%q}`, sw.Config.ScrapeURL)).Inc() } samplesPostRelabeling := 0 for i := range srcRows {