From 9a63f6c1b8c8e0248a821769840df95d399dddd3 Mon Sep 17 00:00:00 2001
From: Dmytro Kozlov <kozlovdmitriyy@gmail.com>
Date: Fri, 6 May 2022 18:04:09 +0300
Subject: [PATCH] vmbackup: Prevent save backups to the same folder where TSDB
 data is (#2547)

* {vmbackup, vmbackup/snapshot}: validate snapshot name

* vmbackup/snapshot: added another checks

* backup/actions: added check that we ignore backup_complete.ignore file

* vmbackup: moved snapshot to lib directory

* lib/snapshot: added functions description

* lib/snapshot: fixed typo

* vmbackup: code cleanup

* wip

* vmbackup: Prevent save backups to the same folder where TSDB data is

* Apply suggestions from code review

* wip

* wip

* wip

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
---
 app/vmbackup/README.md    |  2 +-
 app/vmbackup/main.go      | 20 ++++++++++++++++++++
 app/vmbackup/main_test.go | 29 +++++++++++++++++++++++++++++
 docs/CHANGELOG.md         |  1 +
 docs/vmbackup.md          |  2 +-
 5 files changed, 52 insertions(+), 2 deletions(-)
 create mode 100644 app/vmbackup/main_test.go

diff --git a/app/vmbackup/README.md b/app/vmbackup/README.md
index 135ff422b5..a3f960b9f7 100644
--- a/app/vmbackup/README.md
+++ b/app/vmbackup/README.md
@@ -7,7 +7,7 @@ Supported storage systems for backups:
 * [GCS](https://cloud.google.com/storage/). Example: `gs://<bucket>/<path/to/backup>`
 * [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
 * Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/) or [Swift](https://platform.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
-* Local filesystem. Example: `fs://</absolute/path/to/backup>`
+* Local filesystem. Example: `fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
 
 `vmbackup` supports incremental and full backups. Incremental backups are created automatically if the destination path already contains data from the previous backup.
 Full backups can be sped up with `-origin` pointing to an already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
diff --git a/app/vmbackup/main.go b/app/vmbackup/main.go
index 9c53f5fcaa..d527157c69 100644
--- a/app/vmbackup/main.go
+++ b/app/vmbackup/main.go
@@ -4,6 +4,7 @@ import (
 	"flag"
 	"fmt"
 	"os"
+	"path/filepath"
 	"strings"
 	"time"
 
@@ -155,9 +156,28 @@ func newDstFS() (common.RemoteFS, error) {
 	if err != nil {
 		return nil, fmt.Errorf("cannot parse `-dst`=%q: %w", *dst, err)
 	}
+	if hasFilepathPrefix(*dst, *storageDataPath) {
+		return nil, fmt.Errorf("-dst=%q can not point to the directory with VictoriaMetrics data (aka -storageDataPath=%q)", *dst, *storageDataPath)
+	}
 	return fs, nil
 }
 
+func hasFilepathPrefix(path, prefix string) bool {
+	if !strings.HasPrefix(path, "fs://") {
+		return false
+	}
+	path = path[len("fs://"):]
+	pathAbs, err := filepath.Abs(path)
+	if err != nil {
+		return false
+	}
+	prefixAbs, err := filepath.Abs(prefix)
+	if err != nil {
+		return false
+	}
+	return strings.HasPrefix(pathAbs, prefixAbs)
+}
+
 func newOriginFS() (common.OriginFS, error) {
 	if len(*origin) == 0 {
 		return &fsnil.FS{}, nil
diff --git a/app/vmbackup/main_test.go b/app/vmbackup/main_test.go
new file mode 100644
index 0000000000..f3288fb499
--- /dev/null
+++ b/app/vmbackup/main_test.go
@@ -0,0 +1,29 @@
+package main
+
+import (
+	"path/filepath"
+	"testing"
+)
+
+func TestHasFilepathPrefix(t *testing.T) {
+	f := func(dst, storageDataPath string, resultExpected bool) {
+		t.Helper()
+		result := hasFilepathPrefix(dst, storageDataPath)
+		if result != resultExpected {
+			t.Errorf("unexpected hasFilepathPrefix(%q, %q); got: %v; want: %v", dst, storageDataPath, result, resultExpected)
+		}
+	}
+	pwd, err := filepath.Abs("")
+	if err != nil {
+		t.Fatalf("cannot determine working directory: %s", err)
+	}
+	f("s3://foo/bar", "foo", false)
+	f("fs://"+pwd+"/foo", "foo", true)
+	f("fs://"+pwd+"/foo", "foo/bar", false)
+	f("fs://"+pwd+"/foo/bar", "foo", true)
+	f("fs://"+pwd+"/foo", "bar", false)
+	f("fs://"+pwd+"/foo", pwd+"/foo", true)
+	f("fs://"+pwd+"/foo", pwd+"/foo/bar", false)
+	f("fs://"+pwd+"/foo/bar", pwd+"/foo", true)
+	f("fs://"+pwd+"/foo", pwd+"/bar", false)
+}
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 1babd01cd3..cd04cc24fd 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -23,6 +23,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
 * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): properly show the graph when clicking on `Prometheus` link in Grafana graph editor. Previously the graph wasn't shown until clicking on the `Graph` tab. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2402#issuecomment-1115830648).
 * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): rename `vmagent_remote_write_rate_limit_reached_total` metric to `vmagent_remotewrite_rate_limit_reached_total`, so its name is consistent with the rest of `vmagent_remotewrite_` metrics.
 * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): rename `promscrape_stale_samples_created_total` metric to `vm_promscrape_stale_samples_created_total`, so its name is consistent with the rest of `vm_promscrape_` metrics.
+* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): disallow writing backups to `-storageDataPath` directory, since this directory is managed solely by VictoriaMetrics or `vmstorage`. Other apps shouldn't write into this directory. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2503).
 
 
 ## [v1.77.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.77.0)
diff --git a/docs/vmbackup.md b/docs/vmbackup.md
index de46ca0936..64b160c493 100644
--- a/docs/vmbackup.md
+++ b/docs/vmbackup.md
@@ -11,7 +11,7 @@ Supported storage systems for backups:
 * [GCS](https://cloud.google.com/storage/). Example: `gs://<bucket>/<path/to/backup>`
 * [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
 * Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/) or [Swift](https://platform.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
-* Local filesystem. Example: `fs://</absolute/path/to/backup>`
+* Local filesystem. Example: `fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
 
 `vmbackup` supports incremental and full backups. Incremental backups are created automatically if the destination path already contains data from the previous backup.
 Full backups can be sped up with `-origin` pointing to an already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared