lib/httpserver: do not close client connections every 2 minutes by default

Closing client connections every 2 minutes doesn't help load balancing -
this just leads to "jumpy" connections between multiple backend servers,
e.g. the load isn't spread evenly among backend servers, and instead jumps
between the servers every 2 minutes.

It is still possible periodically closing client connections by specifying non-zero -http.connTimeout command-line flag.

This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1304#issuecomment-1636997037

This is a follow-up for d387da142e
This commit is contained in:
Aliaksandr Valialkin 2024-02-08 21:01:20 +02:00
parent ee745ab900
commit d8c1db7953
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
2 changed files with 10 additions and 4 deletions

View file

@ -32,6 +32,7 @@ The sandbox cluster installation is running under the constant load generated by
* FEATURE: all VictoriaMetrics components: add support for TLS client certificate verification at `-httpListenAddr` (aka [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication)). See [these docs](https://docs.victoriametrics.com/#mtls-protection). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5458).
* FEATURE: all VictoriaMetrics components: add support for empty command flag values in short array notation. For example, `-remoteWrite.sendTimeout=',20s,'` specifies three `-remoteWrite.sendTimeout` values - the first and the last ones are default values (`30s` in this case), while the second one is `20s`.
* FEATURE: all VictoriaMetrics components: do not client connections at `-httpListenAddr` every 2 minutes. This behavior didn't help spreading load among multiple backend servers behind load-balancing TCP proxy. Instead, it could lead to hard-to-debug issues like [this one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1304#issuecomment-1636997037). If you still need periodically closing client connections because of some reason, then pass the desired timeout to `-http.connTimeout` command-line flag.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html) and [single-node VictoriaMetrics](https://docs.victoriametrics.com): add support for data ingestion via [DataDog lambda extension](https://docs.datadoghq.com/serverless/libraries_integrations/extension/) aka `/api/beta/sketches` endpoint. See [these docs](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3091). Thanks to @AndrewChubatiuk for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5584).
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): add `-disableReroutingOnUnavailable` command-line flag to `vminsert`, which can be used for reducing resource usage spikes at `vmstorage` nodes during rolling restart. See [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#improving-re-routing-performance-during-restart). Thanks to @Muxa1L for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5713).
* FEATURE: add `-search.resetRollupResultCacheOnStartup` command-line flag for resetting [query cache](https://docs.victoriametrics.com/#rollup-result-cache) on startup. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/834).

View file

@ -52,7 +52,7 @@ var (
maxGracefulShutdownDuration = flag.Duration("http.maxGracefulShutdownDuration", 7*time.Second, `The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown`)
shutdownDelay = flag.Duration("http.shutdownDelay", 0, `Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers`)
idleConnTimeout = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections")
connTimeout = flag.Duration("http.connTimeout", 2*time.Minute, `Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem`)
connTimeout = flag.Duration("http.connTimeout", 0, `Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem`)
headerHSTS = flag.String("http.header.hsts", "", "Value for 'Strict-Transport-Security' header, recommended: `max-age=31536000; includeSubDomains`")
headerFrameOptions = flag.String("http.header.frameOptions", "", "Value for 'X-Frame-Options' header")
@ -131,8 +131,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
// since these timeouts must be controlled by request handlers.
ErrorLog: logger.StdErrorLogger(),
ConnContext: func(ctx context.Context, c net.Conn) context.Context {
}
if *connTimeout > 0 {
s.s.ConnContext = func(ctx context.Context, c net.Conn) context.Context {
timeoutSec := connTimeout.Seconds()
// Add a jitter for connection timeout in order to prevent Thundering herd problem
// when all the connections are established at the same time.
@ -140,8 +141,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
jitterSec := fastrand.Uint32n(uint32(timeoutSec / 10))
deadline := fasttime.UnixTimestamp() + uint64(timeoutSec) + uint64(jitterSec)
return context.WithValue(ctx, connDeadlineTimeKey, &deadline)
},
}
}
serversLock.Lock()
servers[addr] = &s
serversLock.Unlock()
@ -155,6 +157,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
}
func whetherToCloseConn(r *http.Request) bool {
if *connTimeout <= 0 {
return false
}
ctx := r.Context()
v := ctx.Value(connDeadlineTimeKey)
deadline, ok := v.(*uint64)