From 8b133e40d59d169ba8d1362208036c38c4f339ff Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@gmail.com>
Date: Fri, 3 Jul 2020 20:39:50 +0300
Subject: [PATCH] lib/promscrape: prevent from too big deadline misses on
 scrape retries

The maximum deadline miss duration is reduced to 2x scrape_interval in the worst case.
By default it is limited to scrape_interval configured for the given scrape target.
---
 lib/promscrape/client.go | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/promscrape/client.go b/lib/promscrape/client.go
index 34492b5a8..fd49d4433 100644
--- a/lib/promscrape/client.go
+++ b/lib/promscrape/client.go
@@ -7,6 +7,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
 	"github.com/VictoriaMetrics/fasthttp"
 	"github.com/VictoriaMetrics/metrics"
 )
@@ -89,7 +90,7 @@ func (c *client) ReadData(dst []byte) ([]byte, error) {
 		req.Header.Set("Authorization", c.authHeader)
 	}
 	resp := fasthttp.AcquireResponse()
-	err := doRequestWithPossibleRetry(c.hc, req, resp)
+	err := doRequestWithPossibleRetry(c.hc, req, resp, c.hc.ReadTimeout)
 	statusCode := resp.StatusCode()
 	if err == nil && (statusCode == fasthttp.StatusMovedPermanently || statusCode == fasthttp.StatusFound) {
 		// Allow a single redirect.
@@ -144,7 +145,9 @@ var (
 	scrapesGunzipFailed = metrics.NewCounter(`vm_promscrape_scrapes_gunzip_failed_total`)
 )
 
-func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response) error {
+func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, timeout time.Duration) error {
+	// Round deadline to the smallest value in order to protect from too big deadline misses on retry.
+	deadline := fasttime.UnixTimestamp() + uint64(timeout.Seconds()) - 1
 	attempts := 0
 again:
 	// There is no need in calling DoTimeout, since the timeout must be already set in hc.ReadTimeout.
@@ -155,7 +158,10 @@ again:
 	if err != fasthttp.ErrConnectionClosed {
 		return err
 	}
-	// Retry request if the server closed the keep-alive connection during the first attempt.
+	// Retry request if the server closes the keep-alive connection unless deadline exceeds.
+	if fasttime.UnixTimestamp() > deadline {
+		return fasthttp.ErrTimeout
+	}
 	attempts++
 	if attempts > 3 {
 		return fmt.Errorf("the server closed 3 subsequent connections: %w", err)