app/vmselect: make vmselect resilient to absence of cache folder (#5987)

vmselect uses a cache folder in file system for two purposes:
1. Storing rollup cache results on shutdown;
2. Storing temporary search results from vmstorage during query executions.

It could happen that cache folder is deleted accidentally by user, or by OS
during cleanup routines. This would cause vmselect to:
1. panic on /metrics call, because `MustGetFreeSpace` will fail;
2. return query error user, as it won't be able to store temporary search results.

The changes in this commit are the following:
1. Make `MustGetFreeSpace` to try re-creating the cache folder if it is missing;
2. Make vmselect to try re-creating the cache folder if it can't persist tmp search
results.

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5985

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Nikolay <nik@victoriametrics.com>
(cherry picked from commit cb23685681)
This commit is contained in:
Roman Khavronenko 2024-03-26 12:59:50 +01:00 committed by hagen1778
parent ab44787a70
commit 548bf31dd2
No known key found for this signature in database
GPG key ID: 3BF75F3741CA9640
3 changed files with 25 additions and 1 deletions

View file

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@ -109,7 +110,7 @@ func (tbf *tmpBlocksFile) WriteBlockData(b []byte, tbfIdx uint) (tmpBlockAddr, e
// Slow path: flush the data from tbf.buf to file.
if tbf.f == nil {
f, err := os.CreateTemp(tmpBlocksDir, "")
f, err := createTemp(tmpBlocksDir)
if err != nil {
return addr, err
}
@ -124,6 +125,21 @@ func (tbf *tmpBlocksFile) WriteBlockData(b []byte, tbfIdx uint) (tmpBlockAddr, e
return addr, nil
}
// createTemp creates new temporary file in the path dir.
// If path doesn't exist, it will try creating it.
func createTemp(path string) (*os.File, error) {
f, err := os.CreateTemp(path, "")
if err == nil {
return f, nil
}
if os.IsNotExist(err) || strings.Contains(err.Error(), "no such file or directory") {
// try re-creating the path and trying again
fs.MustMkdirIfNotExist(path)
return os.CreateTemp(path, "")
}
return nil, err
}
// Len() returnt tbf size in bytes.
func (tbf *tmpBlocksFile) Len() uint64 {
return tbf.offset

View file

@ -63,6 +63,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* BUGFIX: do not drop `match[]` filter at [`/api/v1/series`](https://docs.victoriametrics.com/url-examples/#apiv1series) if `-search.ignoreExtraFiltersAtLabelsAPI` command-line flag is set, since missing `match[]` filter breaks `/api/v1/series` requests.
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): properly parse TLS key and CA files for [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x) and [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb) migration modes.
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix VictoriaLogs UI query handling to correctly apply `_time` filter across all queries. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5920).
* BUGFIX: [vmselect](https://docs.victoriametrics.com/): make vmselect resilient to absence of cache folder. If cache folder was mistakenly deleted by user or OS, vmselect will try re-creating it first. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5985).
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): limit duration of requests to /api/v1/labels, /api/v1/label/.../values or /api/v1/series with `-search.maxLabelsAPIDuration` duration. Before, `-search.maxExportDuration` value was used by mistake. Thanks to @kbweave for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5992).
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix VictoriaLogs UI query handling to correctly apply `_time` filter across all queries. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5920).

View file

@ -351,6 +351,7 @@ func MustCreateFlockFile(dir string) *os.File {
const FlockFilename = "flock.lock"
// MustGetFreeSpace returns free space for the given directory path.
// It tries to re-create path if it doesn't exist yet.
func MustGetFreeSpace(path string) uint64 {
// Try obtaining cached value at first.
freeSpaceMapLock.Lock()
@ -363,6 +364,12 @@ func MustGetFreeSpace(path string) uint64 {
}
// Slow path.
// The path might be not available because:
// 1. We forgot to create it in the code
// 2. OS cleaned it up
MustMkdirIfNotExist(path)
// Determine the amount of free space at path.
e.freeSpace = mustGetFreeSpace(path)
e.updateTime = fasttime.UnixTimestamp()