lib/promscrape: limit scrape_timeout by scrape_interval like Prometheus does

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1281
This commit is contained in:
Aliaksandr Valialkin 2021-05-13 16:09:45 +03:00
parent 76e03b46df
commit e3f61d540b
3 changed files with 9 additions and 2 deletions

View file

@ -15,6 +15,7 @@ sort: 15
* BUGFIX: vmagent: do not retry scraping targets, which don't support HTTP. This should reduce CPU load and network usage at `vmagent` and at scrape target. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1289).
* BUGFIX: vmagent: fix possible race when refreshing `role: endpoints` and `role: endpointslices` scrape targets in `kubernetes_sd_config`. Prevoiusly `pod` objects could be updated after the related `endpoints` object update. This could lead to missing scrape targets. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240).
* BUGFIX: vmagent: properly spread scrape targets among `vmagent` replicas if `-promscrape.cluster.replicationFactor` exceeds 1. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1292).
* BUGFIX: vmagent: limit `scrape_timeout` by `scrape_interval`. This guarantees that only a single sample is lost during the configured `scrape_interval` when scrape target responds slowly. See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1281#issuecomment-840538907) for details.
* BUGFIX: properly remove stale parts outside the configured retention if `-retentionPeriod` is smaller than one month. Previously stale parts could remain active for up to a month after they go outside the retention.
* BUGFIX: stop the process on panic errors, since such errors may leave the process in inconsistent state. Previously panics could be recovered, which could result in unexpected hard-to-debug further behavior of running process.
* BUGFIX: vminsert, vmagent: make sure data ingestion connections are closed before completing graceful shutdown. Previously the connection may remain open, which could result in trailing samples loss.

View file

@ -541,6 +541,12 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
scrapeTimeout = defaultScrapeTimeout
}
}
if scrapeTimeout > scrapeInterval {
// Limit the `scrape_timeout` with `scrape_interval` like Prometheus does.
// This guarantees that the scraper can miss only a single scrape if the target sometimes responds slowly.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1281#issuecomment-840538907
scrapeTimeout = scrapeInterval
}
honorLabels := sc.HonorLabels
honorTimestamps := sc.HonorTimestamps
denyRedirects := false

View file

@ -898,7 +898,7 @@ scrape_configs:
{
ScrapeURL: "http://1.2.3.4:80/metrics",
ScrapeInterval: 8 * time.Second,
ScrapeTimeout: 34 * time.Second,
ScrapeTimeout: 8 * time.Second,
Labels: []prompbmarshal.Label{
{
Name: "__address__",
@ -932,7 +932,7 @@ scrape_configs:
{
ScrapeURL: "http://foobar:80/metrics",
ScrapeInterval: 8 * time.Second,
ScrapeTimeout: 34 * time.Second,
ScrapeTimeout: 8 * time.Second,
Labels: []prompbmarshal.Label{
{
Name: "__address__",