From cb00b4b00fa4af39048389fe5a4cd13ab813c133 Mon Sep 17 00:00:00 2001 From: Zakhar Bessarab Date: Wed, 7 Aug 2024 18:55:29 +0400 Subject: [PATCH] lib/backup/s3remote: add retryer configuration (#6747) ### Describe Your Changes This helps to improve reliability of performing backups in environments with unreliable connection and tolerate temporary errors at S3 provider side. See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6732 Default retry timeout is up to 3 minutes to make this consistent with the same configuration for GCS: https://github.com/VictoriaMetrics/VictoriaMetrics/blob/a05317f61f94ce01f07008432d6ab763c6565ccc/lib/backup/gcsremote/gcs.go#L70-L76 ### Checklist The following checks are **mandatory**: - [x] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). --------- Signed-off-by: Zakhar Bessarab --- docs/CHANGELOG.md | 1 + lib/backup/s3remote/s3.go | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index b2e4bcf8e..e67371ae7 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -33,6 +33,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl/): add `--vm-backoff-retries`, `--vm-backoff-factor`, `--vm-backoff-min-duration` and `--vm-native-backoff-retries`, `--vm-native-backoff-factor`, `--vm-native-backoff-min-duration` command-line flags. These flags allow to change backoff policy config for import requests to VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6622). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): allow overriding the `sample_limit` option at [scrape_configs](https://docs.victoriametrics.com/sd_configs/#scrape_configs) when a label `__sample_limit__` is specified for target. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6665). Thanks to @zoglam for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6666). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): reduce memory usage when scraping targets with big response body. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6759). +* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmrestore](https://docs.victoriametrics.com/vmrestore/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): use exponential backoff for retries when uploading or downloading data from S3. This should reduce the number of failed uploads and downloads when S3 is temporarily unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6732). * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert): respect HTTP headers defined in [notifier configuration file](https://docs.victoriametrics.com/vmalert/#notifier-configuration-file) for each request to notifiers. Previously, this param was ignored by mistake. * BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): correctly apply `-streamAggr.dropInputLabels` when global stream deduplication is enabled without `-streamAggr.config`. Previously, `-remoteWrite.streamAggr.dropInputLabels` was used instead. diff --git a/lib/backup/s3remote/s3.go b/lib/backup/s3remote/s3.go index f734cbf74..e93404301 100644 --- a/lib/backup/s3remote/s3.go +++ b/lib/backup/s3remote/s3.go @@ -9,8 +9,10 @@ import ( "net/http" "path" "strings" + "time" "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/retry" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" @@ -97,6 +99,12 @@ func (fs *FS) Init() error { configOpts := []func(*config.LoadOptions) error{ config.WithSharedConfigProfile(fs.ProfileName), config.WithDefaultRegion("us-east-1"), + config.WithRetryer(func() aws.Retryer { + return retry.NewStandard(func(o *retry.StandardOptions) { + o.Backoff = retry.NewExponentialJitterBackoff(3 * time.Minute) + o.MaxAttempts = 10 + }) + }), } if len(fs.ConfigFilePath) > 0 {