mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/httpserver: do not close client connections every 2 minutes by default
Closing client connections every 2 minutes doesn't help load balancing -
this just leads to "jumpy" connections between multiple backend servers,
e.g. the load isn't spread evenly among backend servers, and instead jumps
between the servers every 2 minutes.
It is still possible periodically closing client connections by specifying non-zero -http.connTimeout command-line flag.
This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1304#issuecomment-1636997037
This is a follow-up for d387da142e
This commit is contained in:
parent
ee745ab900
commit
d8c1db7953
2 changed files with 10 additions and 4 deletions
|
@ -32,6 +32,7 @@ The sandbox cluster installation is running under the constant load generated by
|
||||||
|
|
||||||
* FEATURE: all VictoriaMetrics components: add support for TLS client certificate verification at `-httpListenAddr` (aka [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication)). See [these docs](https://docs.victoriametrics.com/#mtls-protection). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5458).
|
* FEATURE: all VictoriaMetrics components: add support for TLS client certificate verification at `-httpListenAddr` (aka [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication)). See [these docs](https://docs.victoriametrics.com/#mtls-protection). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5458).
|
||||||
* FEATURE: all VictoriaMetrics components: add support for empty command flag values in short array notation. For example, `-remoteWrite.sendTimeout=',20s,'` specifies three `-remoteWrite.sendTimeout` values - the first and the last ones are default values (`30s` in this case), while the second one is `20s`.
|
* FEATURE: all VictoriaMetrics components: add support for empty command flag values in short array notation. For example, `-remoteWrite.sendTimeout=',20s,'` specifies three `-remoteWrite.sendTimeout` values - the first and the last ones are default values (`30s` in this case), while the second one is `20s`.
|
||||||
|
* FEATURE: all VictoriaMetrics components: do not client connections at `-httpListenAddr` every 2 minutes. This behavior didn't help spreading load among multiple backend servers behind load-balancing TCP proxy. Instead, it could lead to hard-to-debug issues like [this one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1304#issuecomment-1636997037). If you still need periodically closing client connections because of some reason, then pass the desired timeout to `-http.connTimeout` command-line flag.
|
||||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html) and [single-node VictoriaMetrics](https://docs.victoriametrics.com): add support for data ingestion via [DataDog lambda extension](https://docs.datadoghq.com/serverless/libraries_integrations/extension/) aka `/api/beta/sketches` endpoint. See [these docs](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3091). Thanks to @AndrewChubatiuk for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5584).
|
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html) and [single-node VictoriaMetrics](https://docs.victoriametrics.com): add support for data ingestion via [DataDog lambda extension](https://docs.datadoghq.com/serverless/libraries_integrations/extension/) aka `/api/beta/sketches` endpoint. See [these docs](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3091). Thanks to @AndrewChubatiuk for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5584).
|
||||||
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): add `-disableReroutingOnUnavailable` command-line flag to `vminsert`, which can be used for reducing resource usage spikes at `vmstorage` nodes during rolling restart. See [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#improving-re-routing-performance-during-restart). Thanks to @Muxa1L for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5713).
|
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): add `-disableReroutingOnUnavailable` command-line flag to `vminsert`, which can be used for reducing resource usage spikes at `vmstorage` nodes during rolling restart. See [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#improving-re-routing-performance-during-restart). Thanks to @Muxa1L for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5713).
|
||||||
* FEATURE: add `-search.resetRollupResultCacheOnStartup` command-line flag for resetting [query cache](https://docs.victoriametrics.com/#rollup-result-cache) on startup. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/834).
|
* FEATURE: add `-search.resetRollupResultCacheOnStartup` command-line flag for resetting [query cache](https://docs.victoriametrics.com/#rollup-result-cache) on startup. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/834).
|
||||||
|
|
|
@ -52,7 +52,7 @@ var (
|
||||||
maxGracefulShutdownDuration = flag.Duration("http.maxGracefulShutdownDuration", 7*time.Second, `The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown`)
|
maxGracefulShutdownDuration = flag.Duration("http.maxGracefulShutdownDuration", 7*time.Second, `The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown`)
|
||||||
shutdownDelay = flag.Duration("http.shutdownDelay", 0, `Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers`)
|
shutdownDelay = flag.Duration("http.shutdownDelay", 0, `Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers`)
|
||||||
idleConnTimeout = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections")
|
idleConnTimeout = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections")
|
||||||
connTimeout = flag.Duration("http.connTimeout", 2*time.Minute, `Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem`)
|
connTimeout = flag.Duration("http.connTimeout", 0, `Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem`)
|
||||||
|
|
||||||
headerHSTS = flag.String("http.header.hsts", "", "Value for 'Strict-Transport-Security' header, recommended: `max-age=31536000; includeSubDomains`")
|
headerHSTS = flag.String("http.header.hsts", "", "Value for 'Strict-Transport-Security' header, recommended: `max-age=31536000; includeSubDomains`")
|
||||||
headerFrameOptions = flag.String("http.header.frameOptions", "", "Value for 'X-Frame-Options' header")
|
headerFrameOptions = flag.String("http.header.frameOptions", "", "Value for 'X-Frame-Options' header")
|
||||||
|
@ -131,8 +131,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
|
||||||
// since these timeouts must be controlled by request handlers.
|
// since these timeouts must be controlled by request handlers.
|
||||||
|
|
||||||
ErrorLog: logger.StdErrorLogger(),
|
ErrorLog: logger.StdErrorLogger(),
|
||||||
|
}
|
||||||
ConnContext: func(ctx context.Context, c net.Conn) context.Context {
|
if *connTimeout > 0 {
|
||||||
|
s.s.ConnContext = func(ctx context.Context, c net.Conn) context.Context {
|
||||||
timeoutSec := connTimeout.Seconds()
|
timeoutSec := connTimeout.Seconds()
|
||||||
// Add a jitter for connection timeout in order to prevent Thundering herd problem
|
// Add a jitter for connection timeout in order to prevent Thundering herd problem
|
||||||
// when all the connections are established at the same time.
|
// when all the connections are established at the same time.
|
||||||
|
@ -140,8 +141,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
|
||||||
jitterSec := fastrand.Uint32n(uint32(timeoutSec / 10))
|
jitterSec := fastrand.Uint32n(uint32(timeoutSec / 10))
|
||||||
deadline := fasttime.UnixTimestamp() + uint64(timeoutSec) + uint64(jitterSec)
|
deadline := fasttime.UnixTimestamp() + uint64(timeoutSec) + uint64(jitterSec)
|
||||||
return context.WithValue(ctx, connDeadlineTimeKey, &deadline)
|
return context.WithValue(ctx, connDeadlineTimeKey, &deadline)
|
||||||
},
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
serversLock.Lock()
|
serversLock.Lock()
|
||||||
servers[addr] = &s
|
servers[addr] = &s
|
||||||
serversLock.Unlock()
|
serversLock.Unlock()
|
||||||
|
@ -155,6 +157,9 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func whetherToCloseConn(r *http.Request) bool {
|
func whetherToCloseConn(r *http.Request) bool {
|
||||||
|
if *connTimeout <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
ctx := r.Context()
|
ctx := r.Context()
|
||||||
v := ctx.Value(connDeadlineTimeKey)
|
v := ctx.Value(connDeadlineTimeKey)
|
||||||
deadline, ok := v.(*uint64)
|
deadline, ok := v.(*uint64)
|
||||||
|
|
Loading…
Reference in a new issue