lib/httpserver: do not close client connections every 2 minutes by default

Closing client connections every 2 minutes doesn't help load balancing - this just leads to "jumpy" connections between multiple backend servers, e.g. the load isn't spread evenly among backend servers, and instead jumps between the servers every 2 minutes. It is still possible periodically closing client connections by specifying non-zero -http.connTimeout command-line flag. This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1304#issuecomment-1636997037 This is a follow-up for d387da142e
2025-01-30 15:22:07 +00:00 · 2024-02-08 21:01:20 +02:00 · 2024-02-08 21:01:20 +02:00 · d8c1db7953
commit d8c1db7953
parent ee745ab900
2 changed files with 10 additions and 4 deletions
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -32,6 +32,7 @@ The sandbox cluster installation is running under the constant load generated by

 * FEATURE: all VictoriaMetrics components: add support for TLS client certificate verification at `-httpListenAddr` (aka [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication)). See [these docs](https://docs.victoriametrics.com/#mtls-protection). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5458).
 * FEATURE: all VictoriaMetrics components: add support for empty command flag values in short array notation. For example, `-remoteWrite.sendTimeout=',20s,'` specifies three `-remoteWrite.sendTimeout` values - the first and the last ones are default values (`30s` in this case), while the second one is `20s`.
+* FEATURE: all VictoriaMetrics components: do not client connections at `-httpListenAddr` every 2 minutes. This behavior didn't help spreading load among multiple backend servers behind load-balancing TCP proxy. Instead, it could lead to hard-to-debug issues like [this one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1304#issuecomment-1636997037). If you still need periodically closing client connections because of some reason, then pass the desired timeout to `-http.connTimeout` command-line flag.
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html) and [single-node VictoriaMetrics](https://docs.victoriametrics.com): add support for data ingestion via [DataDog lambda extension](https://docs.datadoghq.com/serverless/libraries_integrations/extension/) aka `/api/beta/sketches` endpoint. See [these docs](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3091). Thanks to @AndrewChubatiuk for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5584).
 * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): add `-disableReroutingOnUnavailable` command-line flag to `vminsert`, which can be used for reducing resource usage spikes at `vmstorage` nodes during rolling restart. See [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#improving-re-routing-performance-during-restart). Thanks to @Muxa1L for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5713).
 * FEATURE: add `-search.resetRollupResultCacheOnStartup` command-line flag for resetting [query cache](https://docs.victoriametrics.com/#rollup-result-cache) on startup. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/834).
--- a/lib/httpserver/httpserver.go
+++ b/lib/httpserver/httpserver.go
@ -52,7 +52,7 @@ var (
 	maxGracefulShutdownDuration = flag.Duration("http.maxGracefulShutdownDuration", 7*time.Second, `The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown`)
 	shutdownDelay               = flag.Duration("http.shutdownDelay", 0, `Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers`)
 	idleConnTimeout             = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections")
-	connTimeout                 = flag.Duration("http.connTimeout", 2*time.Minute, `Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem`)
+	connTimeout                 = flag.Duration("http.connTimeout", 0, `Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem`)

 	headerHSTS         = flag.String("http.header.hsts", "", "Value for 'Strict-Transport-Security' header, recommended: `max-age=31536000; includeSubDomains`")
 	headerFrameOptions = flag.String("http.header.frameOptions", "", "Value for 'X-Frame-Options' header")
@ -131,8 +131,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
 		// since these timeouts must be controlled by request handlers.

 		ErrorLog: logger.StdErrorLogger(),
-
-		ConnContext: func(ctx context.Context, c net.Conn) context.Context {
+	}
+	if *connTimeout > 0 {
+		s.s.ConnContext = func(ctx context.Context, c net.Conn) context.Context {
 			timeoutSec := connTimeout.Seconds()
 			// Add a jitter for connection timeout in order to prevent Thundering herd problem
 			// when all the connections are established at the same time.
@ -140,8 +141,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
 			jitterSec := fastrand.Uint32n(uint32(timeoutSec / 10))
 			deadline := fasttime.UnixTimestamp() + uint64(timeoutSec) + uint64(jitterSec)
 			return context.WithValue(ctx, connDeadlineTimeKey, &deadline)
-		},
+		}
 	}
+
 	serversLock.Lock()
 	servers[addr] = &s
 	serversLock.Unlock()
@ -155,6 +157,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
 }

 func whetherToCloseConn(r *http.Request) bool {
+	if *connTimeout <= 0 {
+		return false
+	}
 	ctx := r.Context()
 	v := ctx.Value(connDeadlineTimeKey)
 	deadline, ok := v.(*uint64)