lib/backup: add -deleteAllObjectVersions command-line flag (#5147)

New flag enforces removal of all versions of the object in remote object storage.

See:
- https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5121
- https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
This commit is contained in:
Zakhar Bessarab 2023-10-10 16:13:23 +04:00 committed by GitHub
parent 6dc5306c9b
commit 2fc7e9f47e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 175 additions and 29 deletions

View file

@ -265,6 +265,16 @@ You have to add a custom url endpoint via flag:
-customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com
```
### Permanent deletion of objects in S3 and compatible storages
By default, when using S3 compatible storages, `vmbackup` and `vmbackupmanager` will use the basic delete operation,
which will delete current version of the object only.
In order to enforce removing all versions of an object when object is deleted, you need to use `-deleteAllObjectVersions` flag.
Using this flag will enforce listing all versions of an object and deleting them one by one.
Alternatively, it is possible to use object storage lifecycle rules to remove non-current versions of objects automatically.
Refer to the respective documentation for your object storage provider for more details.
### Command-line flags
Run `vmbackup -help` in order to see all the available options:
@ -282,6 +292,8 @@ Run `vmbackup -help` in order to see all the available options:
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
-customS3Endpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
-deleteAllObjectVersions
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
-dst string
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded

View file

@ -418,6 +418,8 @@ command-line flags:
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
-customS3Endpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
-deleteAllObjectVersions
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
-disableDaily
Disable daily run. Default false
-disableHourly

View file

@ -35,14 +35,15 @@ The sandbox cluster installation is running under the constant load generated by
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support data ingestion from [NewRelic infrastructure agent](https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-newrelic-agent), [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3520) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4712).
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): add `-filestream.disableFadvise` command-line flag, which can be used for disabling `fadvise` syscall during backup upload to the remote storage. By default `vmbackup` uses `fadvise` syscall in order to prevent from eviction of recently accessed data from the [OS page cache](https://en.wikipedia.org/wiki/Page_cache) when backing up large files. Sometimes the `fadvise` syscall may take significant amounts of CPU when the backup is performed with large value of `-concurrency` command-line flag on systems with big number of CPU cores. In this case it is better to manually disable `fadvise` syscall by passing `-filestream.disableFadvise` command-line flag to `vmbackup`. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5120) for details.
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): add `-deleteAllObjectVersions` command-line flag, which can be used for forcing removal of all object versions in remote object storage. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5121) issue and [these docs](https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages) for the details.
* FEATURE: [Alerting rules for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts): account for `vmauth` component for alerts `ServiceDown` and `TooManyRestarts`.
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add support for functions, labels, values in autocomplete. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3006).
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): retain specified time interval when executing a query from `Top Queries`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5097).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): strip sensitive information such as auth headers or passwords from datasource, remote-read, remote-write or notifier URLs in log messages or UI. This behavior is by default and is controlled via `-datasource.showURL`, `-remoteRead.showURL`, `remoteWrite.showURL` or `-notifier.showURL` cmd-line flags. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5044).
* BUGFIX: [vmselect](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): improve performance and memory usage during query processing on machines with big number of CPU cores. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5087) for details.
## [v1.94.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.94.0)
Released at 2023-10-02

View file

@ -276,6 +276,16 @@ You have to add a custom url endpoint via flag:
-customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com
```
### Permanent deletion of objects in S3 and compatible storages
By default, when using S3 compatible storages, `vmbackup` and `vmbackupmanager` will use the basic delete operation,
which will delete current version of the object only.
In order to enforce removing all versions of an object when object is deleted, you need to use `-deleteAllObjectVersions` flag.
Using this flag will enforce listing all versions of an object and deleting them one by one.
Alternatively, it is possible to use object storage lifecycle rules to remove non-current versions of objects automatically.
Refer to the respective documentation for your object storage provider for more details.
### Command-line flags
Run `vmbackup -help` in order to see all the available options:
@ -293,6 +303,8 @@ Run `vmbackup -help` in order to see all the available options:
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
-customS3Endpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
-deleteAllObjectVersions
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
-dst string
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded

View file

@ -429,6 +429,8 @@ command-line flags:
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
-customS3Endpoint string
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
-deleteAllObjectVersions
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
-disableDaily
Disable daily run. Default false
-disableHourly

View file

@ -143,12 +143,7 @@ func (fs *FS) ListParts() ([]common.Part, error) {
// DeletePart deletes part p from fs.
func (fs *FS) DeletePart(p common.Part) error {
bc := fs.clientForPart(p)
ctx := context.Background()
if _, err := bc.Delete(ctx, &blob.DeleteOptions{}); err != nil {
return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", p.Path, fs, bc.URL(), err)
}
return nil
return fs.delete(p.RemotePath(fs.Dir))
}
// RemoveEmptyDirs recursively removes empty dirs in fs.
@ -278,14 +273,58 @@ func (fs *FS) DeleteFile(filePath string) error {
}
path := fs.Dir + filePath
bc := fs.clientForPath(path)
if err != nil {
return err
return fs.delete(path)
}
func (fs *FS) delete(path string) error {
if *common.DeleteAllObjectVersions {
return fs.deleteObjectWithGenerations(path)
}
return fs.deleteObject(path)
}
func (fs *FS) deleteObjectWithGenerations(path string) error {
pager := fs.client.NewListBlobsFlatPager(&azblob.ListBlobsFlatOptions{
Prefix: &path,
Include: azblob.ListBlobsInclude{
Versions: true,
},
})
ctx := context.Background()
for pager.More() {
resp, err := pager.NextPage(ctx)
if err != nil {
return fmt.Errorf("cannot list blobs at %s (remote path %q): %w", path, fs.Container, err)
}
for _, v := range resp.Segment.BlobItems {
var c *blob.Client
// Either versioning is disabled or we are deleting the current version
if v.VersionID == nil || (v.VersionID != nil && v.IsCurrentVersion != nil && *v.IsCurrentVersion) {
c = fs.client.NewBlobClient(*v.Name)
} else {
c, err = fs.client.NewBlobClient(*v.Name).WithVersionID(*v.VersionID)
if err != nil {
return fmt.Errorf("cannot read blob at %q at %s: %w", path, fs.Container, err)
}
}
if _, err := c.Delete(ctx, nil); err != nil {
return fmt.Errorf("cannot delete %q at %s: %w", path, fs.Container, err)
}
}
}
return nil
}
func (fs *FS) deleteObject(path string) error {
bc := fs.clientForPath(path)
ctx := context.Background()
if _, err := bc.Delete(ctx, nil); err != nil {
return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", filePath, fs, bc.URL(), err)
return fmt.Errorf("cannot delete %q at %s: %w", bc.URL(), fs, err)
}
return nil
}

View file

@ -0,0 +1,13 @@
package common
import (
"flag"
)
var (
// DeleteAllObjectVersions is a flag for whether to prune previous object versions when deleting an object.
DeleteAllObjectVersions = flag.Bool("deleteAllObjectVersions", false, "Whether to prune previous object versions when deleting an object. "+
"By default, when object storage has versioning enabled deleting the file removes only current version. "+
"This option forces removal of all previous versions. "+
"See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages")
)

View file

@ -2,6 +2,7 @@ package gcsremote
import (
"context"
"errors"
"fmt"
"io"
"strings"
@ -131,12 +132,8 @@ func (fs *FS) ListParts() ([]common.Part, error) {
// DeletePart deletes part p from fs.
func (fs *FS) DeletePart(p common.Part) error {
o := fs.object(p)
ctx := context.Background()
if err := o.Delete(ctx); err != nil {
return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", p.Path, fs, o.ObjectName(), err)
}
return nil
path := p.RemotePath(fs.Dir)
return fs.delete(path)
}
// RemoveEmptyDirs recursively removes empty dirs in fs.
@ -215,13 +212,52 @@ func (fs *FS) object(p common.Part) *storage.ObjectHandle {
// The function does nothing if the filePath doesn't exists.
func (fs *FS) DeleteFile(filePath string) error {
path := fs.Dir + filePath
return fs.delete(path)
}
func (fs *FS) delete(path string) error {
if *common.DeleteAllObjectVersions {
return fs.deleteObjectWithGenerations(path)
}
return fs.deleteObject(path)
}
// deleteObjectWithGenerations deletes object at path and all its generations.
func (fs *FS) deleteObjectWithGenerations(path string) error {
it := fs.bkt.Objects(context.Background(), &storage.Query{
Versions: true,
Prefix: path,
})
ctx := context.Background()
for {
attrs, err := it.Next()
if errors.Is(err, iterator.Done) {
return nil
}
if err != nil {
return fmt.Errorf("cannot read %q at %s: %w", path, fs, err)
}
if err := fs.bkt.Object(path).Generation(attrs.Generation).Delete(ctx); err != nil {
if !errors.Is(err, storage.ErrObjectNotExist) {
return fmt.Errorf("cannot delete %q at %s: %w", path, fs, err)
}
}
}
}
// deleteObject deletes object at path.
// It does not specify a Generation, so it will delete the latest generation of the object.
func (fs *FS) deleteObject(path string) error {
o := fs.bkt.Object(path)
ctx := context.Background()
if err := o.Delete(ctx); err != nil {
if err != storage.ErrObjectNotExist {
return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", filePath, fs, o.ObjectName(), err)
if !errors.Is(err, storage.ErrObjectNotExist) {
return fmt.Errorf("cannot delete %q at %s: %w", o.ObjectName(), fs, err)
}
}
return nil
}

View file

@ -194,15 +194,7 @@ func (fs *FS) ListParts() ([]common.Part, error) {
// DeletePart deletes part p from fs.
func (fs *FS) DeletePart(p common.Part) error {
path := fs.path(p)
input := &s3.DeleteObjectInput{
Bucket: aws.String(fs.Bucket),
Key: aws.String(path),
}
_, err := fs.s3.DeleteObject(context.Background(), input)
if err != nil {
return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", p.Path, fs, path, err)
}
return nil
return fs.delete(path)
}
// RemoveEmptyDirs recursively removes empty dirs in fs.
@ -301,16 +293,53 @@ func (fs *FS) DeleteFile(filePath string) error {
}
path := fs.Dir + filePath
return fs.delete(path)
}
func (fs *FS) delete(path string) error {
if *common.DeleteAllObjectVersions {
return fs.deleteObjectWithVersions(path)
}
return fs.deleteObject(path)
}
// deleteObject deletes object at path.
// It does not specify a version ID, so it will delete the latest version of the object.
func (fs *FS) deleteObject(path string) error {
input := &s3.DeleteObjectInput{
Bucket: aws.String(fs.Bucket),
Key: aws.String(path),
}
if _, err := fs.s3.DeleteObject(context.Background(), input); err != nil {
return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", filePath, fs, path, err)
return fmt.Errorf("cannot delete %q at %s: %w", path, fs, err)
}
return nil
}
// deleteObjectWithVersions deletes object at path and all its versions.
func (fs *FS) deleteObjectWithVersions(path string) error {
versions, err := fs.s3.ListObjectVersions(context.Background(), &s3.ListObjectVersionsInput{
Bucket: aws.String(fs.Bucket),
Prefix: aws.String(path),
})
if err != nil {
return fmt.Errorf("cannot list versions for %q at %s: %w", path, fs, err)
}
for _, version := range versions.Versions {
input := &s3.DeleteObjectInput{
Bucket: aws.String(fs.Bucket),
Key: version.Key,
VersionId: version.VersionId,
}
if _, err := fs.s3.DeleteObject(context.Background(), input); err != nil {
return fmt.Errorf("cannot delete %q at %s: %w", path, fs, err)
}
}
return nil
}
// CreateFile creates filePath at fs and puts data into it.
//
// The file is overwritten if it already exists.