From bbebdf9ba1013560c152daa2d2e98e37a0de0745 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 22 Apr 2021 12:58:53 +0300 Subject: [PATCH] lib/{storage,mergeset}: remove empty directories on startup. Such directories can be left after unclean shutdown on NFS storage Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1142 --- docs/CHANGELOG.md | 3 +++ lib/fs/dir_remover.go | 6 +++--- lib/fs/fs.go | 18 ++++++++++++++++++ lib/mergeset/table.go | 6 ++++++ lib/storage/partition.go | 6 ++++++ 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index fb7ec8e29..ee953dd3e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,6 +10,9 @@ sort: 14 * FEATURE: add OpenTSDB migration option to vmctl. See more details [here](https://docs.victoriametrics.com/vmctl#migrating-data-from-opentsdb). Thanks to @johnseekins! +* BUGFIX: vmstorage: remove empty directories on startup. Such directories can be left after unclean shutdown on NFS storage. Previously such directories could lead to crashloop until manually removed. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1142). + + ## [v1.58.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.58.0) * FEATURE: vminsert and vmagent: add `-sortLabels` command-line flag for sorting metric labels before pushing them to `vmstorage`. This should reduce the size of `MetricName -> internal_series_id` cache (aka `vm_cache_size_bytes{type="storage/tsid"}`) when ingesting samples for the same time series with distinct order of labels. For example, `foo{k1="v1",k2="v2"}` and `foo{k2="v2",k1="v1"}` represent a single time series. Labels sorting is disabled by default, since the majority of established exporters preserve the order of labels for the exported metrics. diff --git a/lib/fs/dir_remover.go b/lib/fs/dir_remover.go index 7c343c251..a2982d40f 100644 --- a/lib/fs/dir_remover.go +++ b/lib/fs/dir_remover.go @@ -81,7 +81,7 @@ func dirRemover() { time.Sleep(sleepTime) if sleepTime < maxSleepTime { sleepTime *= 2 - } else if sleepTime > time.Second { + } else if sleepTime > 5*time.Second { logger.Warnf("failed to remove directory %q due to NFS lock; retrying later in %.3f seconds", w.path, sleepTime.Seconds()) } } @@ -121,11 +121,11 @@ func MustStopDirRemover() { dirRemoverWG.Wait() close(doneCh) }() - const maxWaitTime = 5 * time.Second + const maxWaitTime = 10 * time.Second select { case <-doneCh: return case <-time.After(maxWaitTime): - logger.Panicf("FATAL: cannot stop dirRemover in %s", maxWaitTime) + logger.Errorf("cannot stop dirRemover in %s; the remaining empty NFS directories should be automatically removed on the next startup", maxWaitTime) } } diff --git a/lib/fs/fs.go b/lib/fs/fs.go index f417f8257..f2227f2db 100644 --- a/lib/fs/fs.go +++ b/lib/fs/fs.go @@ -174,6 +174,24 @@ func mustSyncParentDirIfExists(path string) { MustSyncPath(parentDirPath) } +// IsEmptyDir returns true if path points to empty directory. +func IsEmptyDir(path string) bool { + // See https://stackoverflow.com/a/30708914/274937 + f, err := os.Open(path) + if err != nil { + logger.Panicf("FATAL: unexpected error when opening directory %q: %s", path, err) + } + _, err = f.Readdirnames(1) + MustClose(f) + if err != nil { + if err == io.EOF { + return true + } + logger.Panicf("FATAL: unexpected error when reading directory %q: %s", path, err) + } + return false +} + // MustRemoveAll removes path with all the contents. // // It properly fsyncs the parent directory after path removal. diff --git a/lib/mergeset/table.go b/lib/mergeset/table.go index ac57e975d..a5ce4d4ff 100644 --- a/lib/mergeset/table.go +++ b/lib/mergeset/table.go @@ -1014,6 +1014,12 @@ func openParts(path string) ([]*partWrapper, error) { continue } partPath := path + "/" + fn + if fs.IsEmptyDir(partPath) { + // Remove empty directory, which can be left after unclean shutdown on NFS. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1142 + fs.MustRemoveAll(partPath) + continue + } p, err := openFilePart(partPath) if err != nil { mustCloseParts(pws) diff --git a/lib/storage/partition.go b/lib/storage/partition.go index 3d9518c55..0196b916b 100644 --- a/lib/storage/partition.go +++ b/lib/storage/partition.go @@ -1546,6 +1546,12 @@ func openParts(pathPrefix1, pathPrefix2, path string) ([]*partWrapper, error) { continue } partPath := path + "/" + fn + if fs.IsEmptyDir(partPath) { + // Remove empty directory, which can be left after unclean shutdown on NFS. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1142 + fs.MustRemoveAll(partPath) + continue + } startTime := time.Now() p, err := openFilePart(partPath) if err != nil {