diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 56c704a1c..1bc324ee1 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -19,6 +19,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components * [How to build vmalert](https://docs.victoriametrics.com/vmalert/#how-to-build-from-sources) * [How to build vmauth](https://docs.victoriametrics.com/vmauth/#how-to-build-from-sources) * [How to build vmctl](https://docs.victoriametrics.com/vmctl/#how-to-build) +* [How to build vmbackup](https://docs.victoriametrics.com/vmbackup/index.html#how-to-build-from-sources) Metrics of the latest version of VictoriaMetrics cluster are available for viewing at our [sandbox](https://play-grafana.victoriametrics.com/d/oS7Bi_0Wz_vm/victoriametrics-cluster-vm). @@ -50,6 +51,8 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): do not retry RPC calls to vmstorage nodes if [complexity limits](https://docs.victoriametrics.com/#resource-usage-limits) were exceeded. * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): make `-replay.timeTo` optional in [replay mode](https://docs.victoriametrics.com/vmalert/#rules-backfilling). When omitted, the current timestamp will be used. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6492). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): show compacted result in the JSON tab for query results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6559). +* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/index.html): add support of using Azure Managed Identity and default credentials lookup when performing backups. See configuration docs [here](https://docs.victoriametrics.com/vmbackup/#providing-credentials-via-env-variables). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5984) for the details. Thanks to @justinrush for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6518). +* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/index.html): allow overriding Azure storage domain when performing backups. See configuration docs [here](https://docs.victoriametrics.com/vmbackup/#providing-credentials-via-env-variables). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5984) for the details. Thanks to @justinrush for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6518). * BUGFIX: [docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#docker-compose-environment-for-victoriametrics): fix incorrect link to vmui from [VictoriaMetrics plugin in Grafana](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#grafana). * BUGFIX: [docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#docker-compose-environment-for-victoriametrics): fix incorrect link to vmui from [VictoriaMetrics plugin in Grafana](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#grafana). diff --git a/docs/vmbackup.md b/docs/vmbackup.md index ba2072b30..9ccd2c420 100644 --- a/docs/vmbackup.md +++ b/docs/vmbackup.md @@ -206,7 +206,11 @@ Obtaining credentials from env variables. - For AWS S3 compatible storages set env variable `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. Also you can set env variable `AWS_SHARED_CREDENTIALS_FILE` with path to credentials file. - For GCE cloud storage set env variable `GOOGLE_APPLICATION_CREDENTIALS` with path to credentials file. -- For Azure storage either set env variables `AZURE_STORAGE_ACCOUNT_NAME` and `AZURE_STORAGE_ACCOUNT_KEY`, or `AZURE_STORAGE_ACCOUNT_CONNECTION_STRING`. +- For Azure storage use one of these env variables: + - `AZURE_STORAGE_ACCOUNT_NAME` and `AZURE_STORAGE_ACCOUNT_KEY`: Use a specific account name and key (either primary or secondary). + - `AZURE_STORAGE_ACCOUNT_CONNECTION_STRING`: Use a connection string (must be either SAS Token or Account/Key) + - `AZURE_USE_DEFAULT_CREDENTIAL` and `AZURE_STORAGE_ACCOUNT_NAME`: Use the `DefaultAzureCredential` to allow the azure library to search multiple options (for example, managed identity related variables). + - `AZURE_STORAGE_DOMAIN`: Optionally override the default blob domain for the Azure storage service. Please, note that `vmbackup` will use credentials provided by cloud providers metadata service [when applicable](https://docs.victoriametrics.com/vmbackup/#using-cloud-providers-metadata-service). diff --git a/go.mod b/go.mod index f7e130f6b..243f0a10d 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.22.5 require ( cloud.google.com/go/storage v1.42.0 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.12.0 + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.2 github.com/VictoriaMetrics/easyproto v0.1.4 github.com/VictoriaMetrics/fastcache v1.12.2 @@ -42,7 +43,6 @@ require ( cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.4.0 // indirect cloud.google.com/go/iam v1.1.10 // indirect - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.9.1 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/VividCortex/ewma v1.2.0 // indirect diff --git a/lib/backup/azremote/azblob.go b/lib/backup/azremote/azblob.go index b2f559829..ea4590048 100644 --- a/lib/backup/azremote/azblob.go +++ b/lib/backup/azremote/azblob.go @@ -10,6 +10,7 @@ import ( "time" "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob" @@ -27,9 +28,31 @@ const ( envStorageAcctName = "AZURE_STORAGE_ACCOUNT_NAME" envStorageAccKey = "AZURE_STORAGE_ACCOUNT_KEY" envStorageAccCs = "AZURE_STORAGE_ACCOUNT_CONNECTION_STRING" + envStorageDomain = "AZURE_STORAGE_DOMAIN" + envStorageDefault = "AZURE_USE_DEFAULT_CREDENTIAL" storageErrorCodeBlobNotFound = "BlobNotFound" ) +var ( + errNoCredentials = fmt.Errorf( + `failed to detect credentials for AZBlob. +Ensure that one of the options is set: connection string at %q; shared key at %q and %q; account name at %q and set %q to "true"`, + envStorageAccCs, + envStorageAcctName, + envStorageAccKey, + envStorageAcctName, + envStorageDefault, + ) + + errInvalidCredentials = fmt.Errorf("failed to process credentials: only one of %s, %s and %s, or %s and %s can be specified", + envStorageAccCs, + envStorageAcctName, + envStorageAccKey, + envStorageAcctName, + envStorageDefault, + ) +) + // FS represents filesystem for backups in Azure Blob Storage. // // Init must be called before calling other FS methods. @@ -41,49 +64,25 @@ type FS struct { Dir string client *container.Client + env envLookuper } // Init initializes fs. // // The returned fs must be stopped when no long needed with MustStop call. func (fs *FS) Init() error { - if fs.client != nil { + switch { + case fs.client != nil: logger.Panicf("BUG: fs.Init has been already called") + case fs.env == nil: + fs.env = envtemplate.LookupEnv } - for strings.HasPrefix(fs.Dir, "/") { - fs.Dir = fs.Dir[1:] - } - if !strings.HasSuffix(fs.Dir, "/") { - fs.Dir += "/" - } + fs.Dir = cleanDirectory(fs.Dir) - var sc *service.Client - var err error - if cs, ok := envtemplate.LookupEnv(envStorageAccCs); ok { - sc, err = service.NewClientFromConnectionString(cs, nil) - if err != nil { - return fmt.Errorf("failed to create AZBlob service client from connection string: %w", err) - } - } - - accountName, ok1 := envtemplate.LookupEnv(envStorageAcctName) - accountKey, ok2 := envtemplate.LookupEnv(envStorageAccKey) - if ok1 && ok2 { - creds, err := azblob.NewSharedKeyCredential(accountName, accountKey) - if err != nil { - return fmt.Errorf("failed to create AZBlob credentials from account name and key: %w", err) - } - serviceURL := fmt.Sprintf("https://%s.blob.core.windows.net/", accountName) - - sc, err = service.NewClientWithSharedKeyCredential(serviceURL, creds, nil) - if err != nil { - return fmt.Errorf("failed to create AZBlob service client from account name and key: %w", err) - } - } - - if sc == nil { - return fmt.Errorf(`failed to detect any credentials type for AZBlob. Ensure there is connection string set at %q, or shared key at %q and %q`, envStorageAccCs, envStorageAcctName, envStorageAccKey) + sc, err := fs.newClient() + if err != nil { + return fmt.Errorf("failed to create AZBlob service client: %w", err) } containerClient := sc.NewContainerClient(fs.Container) @@ -92,6 +91,47 @@ func (fs *FS) Init() error { return nil } +func (fs *FS) newClient() (*service.Client, error) { + connString, hasConnString := fs.env(envStorageAccCs) + accountName, hasAccountName := fs.env(envStorageAcctName) + accountKey, hasAccountKey := fs.env(envStorageAccKey) + useDefault, _ := fs.env(envStorageDefault) + + domain := "blob.core.windows.net" + if storageDomain, ok := fs.env(envStorageDomain); ok { + logger.Infof("Overriding default Azure blob domain with %q", storageDomain) + domain = storageDomain + } + + // not used if connection string is set + serviceURL := fmt.Sprintf("https://%s.%s/", accountName, domain) + + switch { + // can't specify any combination of more than one credential + case moreThanOne(hasConnString, (hasAccountName && hasAccountKey), (useDefault == "true" && hasAccountName)): + return nil, errInvalidCredentials + case hasConnString: + logger.Infof("Creating AZBlob service client from connection string") + return service.NewClientFromConnectionString(connString, nil) + case hasAccountName && hasAccountKey: + logger.Infof("Creating AZBlob service client from account name and key") + creds, err := azblob.NewSharedKeyCredential(accountName, accountKey) + if err != nil { + return nil, fmt.Errorf("failed to create Shared Key credentials: %w", err) + } + return service.NewClientWithSharedKeyCredential(serviceURL, creds, nil) + case useDefault == "true" && hasAccountName: + logger.Infof("Creating AZBlob service client from default credential") + creds, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, fmt.Errorf("failed to create default Azure credentials: %w", err) + } + return service.NewClient(serviceURL, creds, nil) + default: + return nil, errNoCredentials + } +} + // MustStop stops fs. func (fs *FS) MustStop() { fs.client = nil @@ -242,7 +282,6 @@ func (fs *FS) UploadPart(p common.Part, r io.Reader) error { ctx := context.Background() _, err := bc.UploadStream(ctx, r, &blockblob.UploadStreamOptions{}) - if err != nil { return fmt.Errorf("cannot upload data to %q at %s (remote path %q): %w", p.Path, fs, bc.URL(), err) } @@ -341,7 +380,6 @@ func (fs *FS) CreateFile(filePath string, data []byte) error { _, err := bc.UploadBuffer(ctx, data, &blockblob.UploadBufferOptions{ Concurrency: 1, }) - if err != nil { return fmt.Errorf("cannot upload %d bytes to %q at %s (remote path %q): %w", len(data), filePath, fs, bc.URL(), err) } @@ -383,3 +421,34 @@ func (fs *FS) ReadFile(filePath string) ([]byte, error) { return b, nil } + +// envLookuper is for looking up environment variables. It is +// needed to allow unit tests to provide alternate values since the envtemplate +// package uses a singleton to read all environment variables into memory at +// init time. +type envLookuper func(name string) (string, bool) + +func moreThanOne(vals ...bool) bool { + var n int + + for _, v := range vals { + if v { + n++ + } + } + return n > 1 +} + +// cleanDirectory ensures that the directory is properly formatted for Azure +// Blob Storage. It removes any leading slashes and ensures that the directory +// ends with a trailing slash. +func cleanDirectory(dir string) string { + for strings.HasPrefix(dir, "/") { + dir = dir[1:] + } + if !strings.HasSuffix(dir, "/") { + dir += "/" + } + + return dir +} diff --git a/lib/backup/azremote/azblob_test.go b/lib/backup/azremote/azblob_test.go new file mode 100644 index 000000000..821ef4417 --- /dev/null +++ b/lib/backup/azremote/azblob_test.go @@ -0,0 +1,160 @@ +package azremote + +import ( + "bytes" + "errors" + "strings" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +func Test_cleanDirectory(t *testing.T) { + cases := map[string]struct { + Dir string + ExpectedDir string + }{ + "dir / prefix is removed": { + Dir: "/foo/", + ExpectedDir: "foo/", + }, + "multiple dir prefix / is removed": { + Dir: "//foo/", + ExpectedDir: "foo/", + }, + "suffix is added": { + Dir: "foo", + ExpectedDir: "foo/", + }, + } + + for name, test := range cases { + t.Run(name, func(t *testing.T) { + dir := cleanDirectory(test.Dir) + + if dir != test.ExpectedDir { + t.Errorf("expected dir %q, got %q", test.ExpectedDir, dir) + } + }) + } +} + +func Test_FSInit(t *testing.T) { + cases := map[string]struct { + IgnoreFakeEnv bool + Env testEnv + ExpectedErr error + ExpectedLogs []string + }{ + "connection string env var is used": { + Env: map[string]string{ + envStorageAccCs: "BlobEndpoint=https://test.blob.core.windows.net/;SharedAccessSignature=", + }, + ExpectedLogs: []string{`Creating AZBlob service client from connection string`}, + }, + "base envtemplate package is used and connection string err bubbles": { + IgnoreFakeEnv: true, + Env: map[string]string{ + envStorageAccCs: "BlobEndpoint=https://test.blob.core.windows.net/;SharedAccessSignature=", + }, + ExpectedErr: errNoCredentials, + }, + "only storage account name is an err": { + Env: map[string]string{ + envStorageAcctName: "test", + }, + ExpectedErr: errNoCredentials, + }, + "uses shared key credential": { + Env: map[string]string{ + envStorageAcctName: "test", + envStorageAccKey: "dGVhcG90Cg==", + }, + ExpectedLogs: []string{`Creating AZBlob service client from account name and key`}, + }, + "allows overriding domain name with account name and key": { + Env: map[string]string{ + envStorageAcctName: "test", + envStorageAccKey: "dGVhcG90Cg==", + envStorageDomain: "foo.bar", + }, + ExpectedLogs: []string{ + `Creating AZBlob service client from account name and key`, + `Overriding default Azure blob domain with "foo.bar"`, + }, + }, + "can't specify both connection string and shared key": { + Env: map[string]string{ + envStorageAccCs: "teapot", + envStorageAcctName: "test", + envStorageAccKey: "dGVhcG90Cg==", + }, + ExpectedErr: errInvalidCredentials, + }, + "just use default is an err": { + Env: map[string]string{ + envStorageDefault: "true", + }, + ExpectedErr: errNoCredentials, + }, + "uses default credential": { + Env: map[string]string{ + envStorageDefault: "true", + envStorageAcctName: "test", + }, + ExpectedLogs: []string{`Creating AZBlob service client from default credential`}, + }, + } + + for name, test := range cases { + t.Run(name, func(t *testing.T) { + tlog := &testLogger{} + + logger.SetOutputForTests(tlog) + t.Cleanup(logger.ResetOutputForTest) + + fs := &FS{Dir: "foo"} + if test.Env != nil && !test.IgnoreFakeEnv { + fs.env = test.Env.LookupEnv + } + + err := fs.Init() + if err != nil && !errors.Is(err, test.ExpectedErr) { + t.Errorf("expected error %q, got %q", test.ExpectedErr, err) + } + + tlog.MustContain(t, test.ExpectedLogs...) + }) + } +} + +type testLogger struct { + buf *bytes.Buffer +} + +func (l *testLogger) Write(p []byte) (n int, err error) { + if l.buf == nil { + l.buf = &bytes.Buffer{} + } + + return l.buf.Write(p) +} + +func (l *testLogger) MustContain(t *testing.T, vals ...string) { + t.Helper() + + contents := l.buf.String() + + for _, val := range vals { + if !strings.Contains(contents, val) { + t.Errorf("expected log to contain %q, got %q", val, l.buf.String()) + } + } +} + +type testEnv map[string]string + +func (e testEnv) LookupEnv(key string) (string, bool) { + val, ok := e[key] + return val, ok +}