2019-11-12 14:18:09 +00:00
|
|
|
package fs
|
|
|
|
|
|
|
|
import (
|
|
|
|
"os"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
2021-05-21 14:55:14 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
|
2019-11-12 14:18:09 +00:00
|
|
|
"github.com/VictoriaMetrics/metrics"
|
|
|
|
)
|
|
|
|
|
2022-09-13 12:56:05 +00:00
|
|
|
// MustRemoveAll removes path with all the contents.
|
|
|
|
//
|
|
|
|
// It properly fsyncs the parent directory after path removal.
|
|
|
|
//
|
|
|
|
// It properly handles NFS issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
|
|
|
|
func MustRemoveAll(path string) {
|
2021-05-24 01:51:54 +00:00
|
|
|
if tryRemoveAll(path) {
|
2021-05-21 14:55:14 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
select {
|
|
|
|
case removeDirConcurrencyCh <- struct{}{}:
|
|
|
|
default:
|
|
|
|
logger.Panicf("FATAL: cannot schedule %s for removal, since the removal queue is full (%d entries)", path, cap(removeDirConcurrencyCh))
|
|
|
|
}
|
|
|
|
dirRemoverWG.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer func() {
|
|
|
|
dirRemoverWG.Done()
|
|
|
|
<-removeDirConcurrencyCh
|
|
|
|
}()
|
|
|
|
for {
|
|
|
|
time.Sleep(time.Second)
|
2021-05-24 01:51:54 +00:00
|
|
|
if tryRemoveAll(path) {
|
2021-05-21 14:55:14 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
var dirRemoverWG syncwg.WaitGroup
|
|
|
|
|
2021-05-24 01:51:54 +00:00
|
|
|
func tryRemoveAll(path string) bool {
|
2019-11-12 14:18:09 +00:00
|
|
|
err := os.RemoveAll(path)
|
2021-02-26 21:21:59 +00:00
|
|
|
if err == nil || isStaleNFSFileHandleError(err) {
|
2019-11-12 14:18:09 +00:00
|
|
|
// Make sure the parent directory doesn't contain references
|
|
|
|
// to the current directory.
|
|
|
|
mustSyncParentDirIfExists(path)
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if !isTemporaryNFSError(err) {
|
|
|
|
logger.Panicf("FATAL: cannot remove %q: %s", path, err)
|
|
|
|
}
|
|
|
|
// NFS prevents from removing directories with open files.
|
|
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 .
|
|
|
|
// Schedule for later directory removal.
|
|
|
|
nfsDirRemoveFailedAttempts.Inc()
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2020-08-06 12:30:59 +00:00
|
|
|
var (
|
|
|
|
nfsDirRemoveFailedAttempts = metrics.NewCounter(`vm_nfs_dir_remove_failed_attempts_total`)
|
|
|
|
_ = metrics.NewGauge(`vm_nfs_pending_dirs_to_remove`, func() float64 {
|
2021-05-21 14:55:14 +00:00
|
|
|
return float64(len(removeDirConcurrencyCh))
|
2020-08-06 12:30:59 +00:00
|
|
|
})
|
|
|
|
)
|
2019-11-12 14:18:09 +00:00
|
|
|
|
2021-05-21 14:55:14 +00:00
|
|
|
var removeDirConcurrencyCh = make(chan struct{}, 1024)
|
2019-11-12 14:18:09 +00:00
|
|
|
|
2021-02-26 21:21:59 +00:00
|
|
|
func isStaleNFSFileHandleError(err error) bool {
|
|
|
|
errStr := err.Error()
|
|
|
|
return strings.Contains(errStr, "stale NFS file handle")
|
|
|
|
}
|
|
|
|
|
2019-11-12 14:18:09 +00:00
|
|
|
func isTemporaryNFSError(err error) bool {
|
2024-06-04 13:17:38 +00:00
|
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61
|
|
|
|
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6396 for details.
|
2019-11-12 14:18:09 +00:00
|
|
|
errStr := err.Error()
|
2024-06-04 13:17:38 +00:00
|
|
|
return strings.Contains(errStr, "directory not empty") ||
|
|
|
|
strings.Contains(errStr, "device or resource busy") ||
|
|
|
|
strings.Contains(errStr, "file exists")
|
2019-11-12 14:18:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// MustStopDirRemover must be called in the end of graceful shutdown
|
2021-05-21 14:55:14 +00:00
|
|
|
// in order to wait for removing the remaining directories from removeDirConcurrencyCh.
|
2019-11-12 14:18:09 +00:00
|
|
|
//
|
2021-05-21 14:55:14 +00:00
|
|
|
// It is expected that nobody calls MustRemoveAll when MustStopDirRemover is called.
|
2019-11-12 14:18:09 +00:00
|
|
|
func MustStopDirRemover() {
|
|
|
|
doneCh := make(chan struct{})
|
|
|
|
go func() {
|
|
|
|
dirRemoverWG.Wait()
|
|
|
|
close(doneCh)
|
|
|
|
}()
|
2021-04-22 09:58:53 +00:00
|
|
|
const maxWaitTime = 10 * time.Second
|
2019-11-12 14:18:09 +00:00
|
|
|
select {
|
|
|
|
case <-doneCh:
|
|
|
|
return
|
|
|
|
case <-time.After(maxWaitTime):
|
2021-04-22 09:58:53 +00:00
|
|
|
logger.Errorf("cannot stop dirRemover in %s; the remaining empty NFS directories should be automatically removed on the next startup", maxWaitTime)
|
2019-11-12 14:18:09 +00:00
|
|
|
}
|
|
|
|
}
|