lib/promscrape: exponentially increase retry interval on unsuccesful requests to scrape targets or to service discovery services

This should reduce CPU load at vmagent and at remote side when the remote side doesn't accept HTTP requests.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1289
This commit is contained in:
Aliaksandr Valialkin 2021-05-13 10:38:43 +03:00
parent c0ec541559
commit f6a641de62
2 changed files with 16 additions and 2 deletions

View file

@ -299,6 +299,7 @@ var (
)
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error {
sleepTime := time.Second
for {
// Use DoDeadline instead of Do even if hc.ReadTimeout is already set in order to guarantee the given deadline
// across multiple retries.
@ -310,9 +311,15 @@ func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request,
return err
}
// Retry request if the server closes the keep-alive connection unless deadline exceeds.
if time.Since(deadline) >= 0 {
maxSleepTime := time.Until(deadline)
if sleepTime > maxSleepTime {
return fmt.Errorf("the server closes all the connection attempts: %w", err)
}
sleepTime += sleepTime
if sleepTime > maxSleepTime {
maxSleepTime = maxSleepTime
}
time.Sleep(sleepTime)
scrapeRetries.Inc()
}
}

View file

@ -222,6 +222,7 @@ func (c *Client) getAPIResponseWithParamsAndClient(client *fasthttp.HostClient,
}
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error {
sleepTime := time.Second
discoveryRequests.Inc()
for {
// Use DoDeadline instead of Do even if hc.ReadTimeout is already set in order to guarantee the given deadline
@ -234,9 +235,15 @@ func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request,
return err
}
// Retry request if the server closes the keep-alive connection unless deadline exceeds.
if time.Since(deadline) >= 0 {
maxSleepTime := time.Until(deadline)
if sleepTime > maxSleepTime {
return fmt.Errorf("the server closes all the connection attempts: %w", err)
}
sleepTime += sleepTime
if sleepTime > maxSleepTime {
sleepTime = maxSleepTime
}
time.Sleep(sleepTime)
discoveryRetries.Inc()
}
}