app/vmagent: add remoteWrite.retryMinInterval and remoteWrite.retryMaxTime flags (#6289)

## Describe Your Changes

Add RemoteWrite Retry Controls

This PR introduces two new flags to the remote write functionality:
- remoteWrite.retryMinInterval
- remoteWrite.retryMaxTime
 
These flags provide finer control over the retry behavior for
remoteWrite operations, allowing users to customize the minimum interval
between retries and the maximum duration for retry attempts.

Fixes #5486.

## Checklist

- [x] The following checks are mandatory:

My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: Yury Akudovich <ya@matterlabs.dev>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Yury Akudovich 2024-08-23 14:05:51 +02:00 committed by GitHub
parent 336406e2e1
commit d0f5a9d77a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 28 additions and 12 deletions

View file

@ -34,8 +34,10 @@ var (
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", 0, "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+ rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", 0, "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
"By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+ "By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
"is sent after temporary unavailability of the remote storage. See also -maxIngestionRate") "is sent after temporary unavailability of the remote storage. See also -maxIngestionRate")
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to the corresponding -remoteWrite.url") sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+ retryMinInterval = flagutil.NewArrayDuration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
retryMaxTime = flagutil.NewArrayDuration("remoteWrite.retryMaxTime", time.Minute, "The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. Change this value if it is expected for -remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
"Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234") "Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234")
tlsHandshakeTimeout = flagutil.NewArrayDuration("remoteWrite.tlsHandshakeTimeout", 20*time.Second, "The timeout for establishing tls connections to the corresponding -remoteWrite.url") tlsHandshakeTimeout = flagutil.NewArrayDuration("remoteWrite.tlsHandshakeTimeout", 20*time.Second, "The timeout for establishing tls connections to the corresponding -remoteWrite.url")
@ -90,6 +92,9 @@ type client struct {
fq *persistentqueue.FastQueue fq *persistentqueue.FastQueue
hc *http.Client hc *http.Client
retryMinInterval time.Duration
retryMaxTime time.Duration
sendBlock func(block []byte) bool sendBlock func(block []byte) bool
authCfg *promauth.Config authCfg *promauth.Config
awsCfg *awsapi.Config awsCfg *awsapi.Config
@ -143,13 +148,15 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
Timeout: sendTimeout.GetOptionalArg(argIdx), Timeout: sendTimeout.GetOptionalArg(argIdx),
} }
c := &client{ c := &client{
sanitizedURL: sanitizedURL, sanitizedURL: sanitizedURL,
remoteWriteURL: remoteWriteURL, remoteWriteURL: remoteWriteURL,
authCfg: authCfg, authCfg: authCfg,
awsCfg: awsCfg, awsCfg: awsCfg,
fq: fq, fq: fq,
hc: hc, hc: hc,
stopCh: make(chan struct{}), retryMinInterval: retryMinInterval.GetOptionalArg(argIdx),
retryMaxTime: retryMaxTime.GetOptionalArg(argIdx),
stopCh: make(chan struct{}),
} }
c.sendBlock = c.sendBlockHTTP c.sendBlock = c.sendBlockHTTP
@ -396,11 +403,11 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
// sendBlockHTTP sends the given block to c.remoteWriteURL. // sendBlockHTTP sends the given block to c.remoteWriteURL.
// //
// The function returns false only if c.stopCh is closed. // The function returns false only if c.stopCh is closed.
// Otherwise it tries sending the block to remote storage indefinitely. // Otherwise, it tries sending the block to remote storage indefinitely.
func (c *client) sendBlockHTTP(block []byte) bool { func (c *client) sendBlockHTTP(block []byte) bool {
c.rl.Register(len(block)) c.rl.Register(len(block))
maxRetryDuration := timeutil.AddJitterToDuration(time.Minute) maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxTime)
retryDuration := timeutil.AddJitterToDuration(time.Second) retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
retriesCount := 0 retriesCount := 0
again: again:

View file

@ -28,6 +28,7 @@ The value of `instance` label for those scrape targets will be changed from `<ad
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): allow overriding the `sample_limit` option at [scrape_configs](https://docs.victoriametrics.com/sd_configs/#scrape_configs) when a label `__sample_limit__` is specified for target. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6665). Thanks to @zoglam for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6666). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): allow overriding the `sample_limit` option at [scrape_configs](https://docs.victoriametrics.com/sd_configs/#scrape_configs) when a label `__sample_limit__` is specified for target. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6665). Thanks to @zoglam for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6666).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): reduce memory usage when scraping targets with big response body. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6759). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): reduce memory usage when scraping targets with big response body. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6759).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [Single-node VictoriaMetrics](https://docs.victoriametrics.com/): stop adding default port 80/443 for scrape URLs without a port. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6792). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [Single-node VictoriaMetrics](https://docs.victoriametrics.com/): stop adding default port 80/443 for scrape URLs without a port. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6792).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add flags `-remoteWrite.retryMinInterval` and `-remoteWrite.retryMaxTime` for adjusting remote-write requests retry policy. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5486). Thanks to @yorik for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6289).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): add command-line flag `-notifier.headers` to allow configuring additional headers for all requests sent to the corresponding `-notifier.url`. * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): add command-line flag `-notifier.headers` to allow configuring additional headers for all requests sent to the corresponding `-notifier.url`.
* FEATURE: [vmalert-tool](https://docs.victoriametrics.com/vmalert-tool/): add `-external.label` and `-external.url` command-line flags, in the same way as these flags are supported by vmalert. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6735). * FEATURE: [vmalert-tool](https://docs.victoriametrics.com/vmalert-tool/): add `-external.label` and `-external.url` command-line flags, in the same way as these flags are supported by vmalert. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6735).
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): use exponential backoff for retries when uploading or downloading data from S3. This should reduce the number of failed uploads and downloads when S3 is temporarily unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6732). * FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): use exponential backoff for retries when uploading or downloading data from S3. This should reduce the number of failed uploads and downloads when S3 is temporarily unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6732).

View file

@ -2168,6 +2168,14 @@ See the docs at https://docs.victoriametrics.com/vmagent/ .
Empty values are set to default value. Empty values are set to default value.
-remoteWrite.relabelConfig string -remoteWrite.relabelConfig string
Optional path to file with relabeling configs, which are applied to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent/#relabeling Optional path to file with relabeling configs, which are applied to all the metrics before sending them to -remoteWrite.url. See also -remoteWrite.urlRelabelConfig. The path can point either to local file or to http url. See https://docs.victoriametrics.com/vmagent/#relabeling
-remoteWrite.retryMaxTime array
The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. Change this value if it is expected for -remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval (default 1m0s)
Supports array of values separated by comma or specified via multiple flags.
Empty values are set to default value.
-remoteWrite.retryMinInterval array
The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval (default 1s)
Supports array of values separated by comma or specified via multiple flags.
Empty values are set to default value.
-remoteWrite.roundDigits array -remoteWrite.roundDigits array
Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics (default 100) Round metric values to this number of decimal digits after the point before writing them to remote storage. Examples: -remoteWrite.roundDigits=2 would round 1.236 to 1.24, while -remoteWrite.roundDigits=-1 would round 126.78 to 130. By default, digits rounding is disabled. Set it to 100 for disabling it for a particular remote storage. This option may be used for improving data compression for the stored metrics (default 100)
Supports array of values separated by comma or specified via multiple flags. Supports array of values separated by comma or specified via multiple flags.