From d0c103ad05bef4753a3f54c62c758b17a790fe15 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 18 Sep 2023 16:16:11 +0200 Subject: [PATCH] lib/backup: properly copy parts.json files inside indexdb directory additional to data directory This is a follow-up for 264ffe3fa19e29ba87a4add8dfe7e7e1a0d466cb Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5005 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5006 --- docs/CHANGELOG.md | 2 +- lib/backup/actions/backup.go | 1 - lib/backup/actions/copy.go | 1 - lib/backup/actions/restore.go | 1 - lib/backup/common/part.go | 39 ++++++++++------------------------- 5 files changed, 12 insertions(+), 32 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 117899a2e..9649e8963 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -49,7 +49,7 @@ ssue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4825) and [these * BUGFIX: [Graphite Render API](https://docs.victoriametrics.com/#graphite-render-api-usage) correctly return null instead of +Inf for render response. See this [issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3783) for details. * BUGFIX: [Official Grafana dashboards for VictoriaMetrics](https://grafana.com/orgs/victoriametrics): fix display of ingested rows rate for `Samples ingested/s` and `Samples rate` panels for vmagent's dasbhoard. Previously, not all ingested protocols were accounted in these panels. An extra panel `Rows rate` was added to `Ingestion` section to display the split for rows ingested rate by protocol. * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix the bug causing render looping when switching to heatmap. -* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): force copying of `data/small/.../parts.json` in order to ensure backup consistency. Previously, `parts.json` could be skipped during copying, which could lead to data loss during restore. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5005). +* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): properly copy `parts.json` files inside `<-storageDataPath>/{data,indexdb}` folders during [incremental backups](https://docs.victoriametrics.com/vmbackup.html#incremental-backups). Previously the new `parts.json` could be skipped during incremental backups, which could lead to inability to restore from the backup. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5005). This issue has been introduced in [v1.90.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.90.0). * BUGFIX: [VictoriaMetrics enterprise](https://docs.victoriametrics.com/enterprise.html) validate `-dedup.minScrapeInterval` value and `-downsampling.period` intervals are multiples of each other. See [these docs](https://docs.victoriametrics.com/#downsampling). ## [v1.93.4](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.4) diff --git a/lib/backup/actions/backup.go b/lib/backup/actions/backup.go index 941cd17da..1fffc51b9 100644 --- a/lib/backup/actions/backup.go +++ b/lib/backup/actions/backup.go @@ -147,7 +147,6 @@ func runBackup(src *fslocal.FS, dst common.RemoteFS, origin common.OriginFS, con } srcCopyParts := common.PartsDifference(partsToCopy, originParts) - srcCopyParts = common.EnforceSpecialsCopy(srcParts, srcCopyParts) uploadSize := getPartsSize(srcCopyParts) if len(srcCopyParts) > 0 { logger.Infof("uploading %d parts from %s to %s", len(srcCopyParts), src, dst) diff --git a/lib/backup/actions/copy.go b/lib/backup/actions/copy.go index 396d44eec..c684d2a61 100644 --- a/lib/backup/actions/copy.go +++ b/lib/backup/actions/copy.go @@ -84,7 +84,6 @@ func runCopy(src common.OriginFS, dst common.RemoteFS, concurrency int) error { } partsToCopy := common.PartsDifference(srcParts, dstParts) - partsToCopy = common.EnforceSpecialsCopy(srcParts, partsToCopy) copySize := getPartsSize(partsToCopy) if err := copySrcParts(src, dst, partsToCopy, concurrency); err != nil { return fmt.Errorf("cannot server-side copy parts from src to dst: %w", err) diff --git a/lib/backup/actions/restore.go b/lib/backup/actions/restore.go index 5a7434a9e..2dd10d073 100644 --- a/lib/backup/actions/restore.go +++ b/lib/backup/actions/restore.go @@ -144,7 +144,6 @@ func (r *Restore) Run() error { } partsToCopy := common.PartsDifference(srcParts, dstParts) - partsToCopy = common.EnforceSpecialsCopy(srcParts, partsToCopy) downloadSize := getPartsSize(partsToCopy) if len(partsToCopy) > 0 { perPath := make(map[string][]common.Part) diff --git a/lib/backup/common/part.go b/lib/backup/common/part.go index 41e72d856..d7f9aa239 100644 --- a/lib/backup/common/part.go +++ b/lib/backup/common/part.go @@ -7,6 +7,7 @@ import ( "sort" "strconv" "strings" + "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) @@ -37,11 +38,21 @@ type Part struct { ActualSize uint64 } +// key returns a string, which uniquely identifies p. func (p *Part) key() string { + if strings.HasSuffix(p.Path, "/parts.json") { + // parts.json file contents changes over time, so it must have an unique key in order + // to always copy it during backup, restore and server-side copy. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5005 + id := atomic.AddUint64(&uniqueKeyID, 1) + return fmt.Sprintf("unique-%016X", id) + } // Do not use p.FileSize in the key, since it cannot be properly initialized when resuming the restore for partially restored file return fmt.Sprintf("%s%016X%016X%016X", p.Path, p.Offset, p.Size, p.ActualSize) } +var uniqueKeyID uint64 + // String returns human-readable representation of the part. func (p *Part) String() string { return fmt.Sprintf("part{path: %q, file_size: %d, offset: %d, size: %d}", p.Path, p.FileSize, p.Offset, p.Size) @@ -158,31 +169,3 @@ func PartsIntersect(a, b []Part) []Part { } return d } - -// EnforceSpecialsCopy enforces copying of special parts from src to toCopy without checking whether -// part is already present in dst. -func EnforceSpecialsCopy(src, toCopy []Part) []Part { - // `parts.json` files must be copied from src to dst without checking whether they already exist in dst. - // This is needed because size and paths for those files can be the same even if the contents differ. - // See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5005 - filtered := make(map[Part]bool) - for _, pt := range src { - if strings.HasPrefix(pt.Path, "data") && strings.HasSuffix(pt.Path, "parts.json") { - filtered[pt] = false - } - } - - for _, pt := range toCopy { - if _, ok := filtered[pt]; ok { - filtered[pt] = true - } - } - - for pt, ok := range filtered { - if !ok { - toCopy = append(toCopy, pt) - } - } - - return toCopy -}