diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c1cc6ca65..d1c9ffa50 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ sort: 15 * BUGFIX: vmagent: do not retry scraping targets, which don't support HTTP. This should reduce CPU load and network usage at `vmagent` and at scrape target. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1289). * BUGFIX: vmagent: fix possible race when refreshing `role: endpoints` and `role: endpointslices` scrape targets in `kubernetes_sd_config`. Prevoiusly `pod` objects could be updated after the related `endpoints` object update. This could lead to missing scrape targets. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240). * BUGFIX: vmagent: properly spread scrape targets among `vmagent` replicas if `-promscrape.cluster.replicationFactor` exceeds 1. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1292). +* BUGFIX: vmagent: limit `scrape_timeout` by `scrape_interval`. This guarantees that only a single sample is lost during the configured `scrape_interval` when scrape target responds slowly. See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1281#issuecomment-840538907) for details. * BUGFIX: properly remove stale parts outside the configured retention if `-retentionPeriod` is smaller than one month. Previously stale parts could remain active for up to a month after they go outside the retention. * BUGFIX: stop the process on panic errors, since such errors may leave the process in inconsistent state. Previously panics could be recovered, which could result in unexpected hard-to-debug further behavior of running process. * BUGFIX: vminsert, vmagent: make sure data ingestion connections are closed before completing graceful shutdown. Previously the connection may remain open, which could result in trailing samples loss. diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index a100288d9..7ad155915 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -541,6 +541,12 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf scrapeTimeout = defaultScrapeTimeout } } + if scrapeTimeout > scrapeInterval { + // Limit the `scrape_timeout` with `scrape_interval` like Prometheus does. + // This guarantees that the scraper can miss only a single scrape if the target sometimes responds slowly. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1281#issuecomment-840538907 + scrapeTimeout = scrapeInterval + } honorLabels := sc.HonorLabels honorTimestamps := sc.HonorTimestamps denyRedirects := false diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go index 8f3cf6a97..b63d89bb6 100644 --- a/lib/promscrape/config_test.go +++ b/lib/promscrape/config_test.go @@ -898,7 +898,7 @@ scrape_configs: { ScrapeURL: "http://1.2.3.4:80/metrics", ScrapeInterval: 8 * time.Second, - ScrapeTimeout: 34 * time.Second, + ScrapeTimeout: 8 * time.Second, Labels: []prompbmarshal.Label{ { Name: "__address__", @@ -932,7 +932,7 @@ scrape_configs: { ScrapeURL: "http://foobar:80/metrics", ScrapeInterval: 8 * time.Second, - ScrapeTimeout: 34 * time.Second, + ScrapeTimeout: 8 * time.Second, Labels: []prompbmarshal.Label{ { Name: "__address__",