From 9fa2632ac35bb0a2dc2834d93e9d2767ad20254d Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 26 Feb 2021 23:03:57 +0200 Subject: [PATCH 1/5] lib/promscrape: typo fix after ed8441ec5240f3cd2d70c6372f1b13b8761c83f5 --- lib/promscrape/config.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 351637cf9..11a452ae0 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -563,7 +563,9 @@ func (swc *scrapeWorkCache) Get(key string) *ScrapeWork { currentTime := fasttime.UnixTimestamp() swc.mu.Lock() swe := swc.m[key] - swe.lastAccessTime = currentTime + if swe != nil { + swe.lastAccessTime = currentTime + } swc.mu.Unlock() return swe.sw } From 8683ea85e6f79acacf18b8da4590000334975637 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 26 Feb 2021 23:21:59 +0200 Subject: [PATCH 2/5] lib/fs: properly handle `stale NFS file handle` error during file deletion This error can appear when -storageDataPath points to NFS volume and the given file has been already removed. --- docs/CHANGELOG.md | 1 + lib/fs/dir_remover.go | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 17d70b8cd..336422d41 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -22,6 +22,7 @@ * BUGFIX: reduce the probability of `duplicate time series` errors when querying Kubernetes metrics. * BUGFIX: properly calculate `histogram_quantile()` over time series with only a single non-zero bucket with `{le="+Inf"}`. Previously `NaN` was returned, now the value for the last bucket before `{le="+Inf"}` is returned like Prometheus does. * BUGFIX: vmselect: do not cache partial query results on timeout when receiving data from `vmstorage` nodes. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1085 +* BUGFIX: properly handle `stale NFS file handle` error. # [v1.54.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.54.1) diff --git a/lib/fs/dir_remover.go b/lib/fs/dir_remover.go index aefcd77eb..fae5701bb 100644 --- a/lib/fs/dir_remover.go +++ b/lib/fs/dir_remover.go @@ -13,7 +13,7 @@ import ( func mustRemoveAll(path string, done func()) bool { err := os.RemoveAll(path) - if err == nil { + if err == nil || isStaleNFSFileHandleError(err) { // Make sure the parent directory doesn't contain references // to the current directory. mustSyncParentDirIfExists(path) @@ -87,6 +87,11 @@ func dirRemover() { } } +func isStaleNFSFileHandleError(err error) bool { + errStr := err.Error() + return strings.Contains(errStr, "stale NFS file handle") +} + func isTemporaryNFSError(err error) bool { // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/61 for details. errStr := err.Error() From a78948ae8b9cdafb9d67f5515055b2f98b52f7b0 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 26 Feb 2021 23:35:47 +0200 Subject: [PATCH 3/5] lib/promscrape: yet another typo fix after ed8441ec5240f3cd2d70c6372f1b13b8761c83f5 --- lib/promscrape/config.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 11a452ae0..641adf8f1 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -567,6 +567,9 @@ func (swc *scrapeWorkCache) Get(key string) *ScrapeWork { swe.lastAccessTime = currentTime } swc.mu.Unlock() + if swe == nil { + return nil + } return swe.sw } From 186c078fac16d924cd47d08a94396a6d79ce60db Mon Sep 17 00:00:00 2001 From: Nikolay Date: Sat, 27 Feb 2021 01:15:53 +0300 Subject: [PATCH 4/5] adds enforced tag filters into cache key (#1095) --- app/vmselect/promql/rollup_result_cache.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/app/vmselect/promql/rollup_result_cache.go b/app/vmselect/promql/rollup_result_cache.go index 8ef54398e..e518324f6 100644 --- a/app/vmselect/promql/rollup_result_cache.go +++ b/app/vmselect/promql/rollup_result_cache.go @@ -178,7 +178,7 @@ func (rrc *rollupResultCache) Get(ec *EvalConfig, expr metricsql.Expr, window in bb := bbPool.Get() defer bbPool.Put(bb) - bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step) + bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilters) metainfoBuf := rrc.c.Get(nil, bb.B) if len(metainfoBuf) == 0 { return nil, ec.Start @@ -198,7 +198,7 @@ func (rrc *rollupResultCache) Get(ec *EvalConfig, expr metricsql.Expr, window in if len(compressedResultBuf.B) == 0 { mi.RemoveKey(key) metainfoBuf = mi.Marshal(metainfoBuf[:0]) - bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step) + bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilters) rrc.c.Set(bb.B, metainfoBuf) return nil, ec.Start } @@ -301,7 +301,7 @@ func (rrc *rollupResultCache) Put(ec *EvalConfig, expr metricsql.Expr, window in bb.B = key.Marshal(bb.B[:0]) rrc.c.SetBig(bb.B, compressedResultBuf.B) - bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step) + bb.B = marshalRollupResultCacheKey(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilters) metainfoBuf := rrc.c.Get(nil, bb.B) var mi rollupResultCacheMetainfo if len(metainfoBuf) > 0 { @@ -331,11 +331,14 @@ var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total") // Increment this value every time the format of the cache changes. const rollupResultCacheVersion = 7 -func marshalRollupResultCacheKey(dst []byte, expr metricsql.Expr, window, step int64) []byte { +func marshalRollupResultCacheKey(dst []byte, expr metricsql.Expr, window, step int64, filters []storage.TagFilter) []byte { dst = append(dst, rollupResultCacheVersion) dst = encoding.MarshalInt64(dst, window) dst = encoding.MarshalInt64(dst, step) dst = expr.AppendString(dst) + for _, f := range filters { + dst = f.Marshal(dst) + } return dst } From 975dac90869a27b599fa6f2a281e6ad19d1e9cf8 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 27 Feb 2021 00:19:31 +0200 Subject: [PATCH 5/5] docs/CHANGELOG.md: mentioned a bugfix with `extra_label` handling during caching query results Related to 186c078fac16d924cd47d08a94396a6d79ce60db --- docs/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 336422d41..8ae0db448 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -17,12 +17,12 @@ * FEATURE: add `increase_pure(m[d])` function to MetricsQL. It works the same as `increase(m[d])` except of various edge cases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/962) for details. * FEATURE: increase accuracy for `buckets_limit(limit, buckets)` results for small `limit` values. See [MetricsQL docs](https://victoriametrics.github.io/MetricsQL.html) for details. - * BUGFIX: vmagent: properly perform graceful shutdown on `SIGINT` and `SIGTERM` signals. The graceful shutdown has been broken in `v1.54.0`. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1065 * BUGFIX: reduce the probability of `duplicate time series` errors when querying Kubernetes metrics. * BUGFIX: properly calculate `histogram_quantile()` over time series with only a single non-zero bucket with `{le="+Inf"}`. Previously `NaN` was returned, now the value for the last bucket before `{le="+Inf"}` is returned like Prometheus does. * BUGFIX: vmselect: do not cache partial query results on timeout when receiving data from `vmstorage` nodes. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1085 * BUGFIX: properly handle `stale NFS file handle` error. +* BUGFIX: properly cache query results when `extra_label` query arg is used. Previously the cached results could clash for different `extra_label` values. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1095 # [v1.54.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.54.1)