diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 32d5bf232..742c530e9 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -6568,7 +6568,7 @@ func TestExecSuccess(t *testing.T) { q := `rate((2000-time())[100s:100s])` r := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{5.5, 4.5, 6.5, 4.5, 2.5, 0.5}, + Values: []float64{0, 0, 6.5, 4.5, 2.5, 0.5}, Timestamps: timestampsExpected, } resultExpected := []netstorage.Result{r} @@ -6579,7 +6579,7 @@ func TestExecSuccess(t *testing.T) { q := `rate((2000-time())[100s:100s] offset 100s)` r := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{6, 5, 7.5, 5.5, 3.5, 1.5}, + Values: []float64{0, 0, 3.5, 5.5, 3.5, 1.5}, Timestamps: timestampsExpected, } resultExpected := []netstorage.Result{r} @@ -6590,7 +6590,7 @@ func TestExecSuccess(t *testing.T) { q := `rate((2000-time())[100s:100s] offset 100s)[:] offset 100s` r := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{7, 6, 5, 7.5, 5.5, 3.5}, + Values: []float64{0, 0, 0, 3.5, 5.5, 3.5}, Timestamps: timestampsExpected, } resultExpected := []netstorage.Result{r} @@ -6746,7 +6746,7 @@ func TestExecSuccess(t *testing.T) { }) t.Run(`remove_resets()`, func(t *testing.T) { t.Parallel() - q := `remove_resets( abs(1500-time()) )` + q := `remove_resets(abs(1500-time()))` r := netstorage.Result{ MetricName: metricNameExpected, Values: []float64{500, 800, 900, 900, 1100, 1300}, @@ -6755,6 +6755,20 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`remove_resets(sum)`, func(t *testing.T) { + t.Parallel() + q := `remove_resets(sum( + alias(time(), "full"), + alias(time()/5 < 300, "partial"), + ))` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{1200, 1440, 1680, 1680, 1880, 2080}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`range_avg(time())`, func(t *testing.T) { t.Parallel() q := `range_avg(time())` @@ -6945,10 +6959,10 @@ func TestExecSuccess(t *testing.T) { }) t.Run(`aggr_over_time(single-func)`, func(t *testing.T) { t.Parallel() - q := `aggr_over_time("increase", rand(0)[:10s])` + q := `round(aggr_over_time("increase", rand(0)[:10s]),0.01)` r1 := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{5.465672601448873, 6.642207999066246, 6.8400051805114295, 7.182425481980655, 5.1677922402706, 6.594060518641982}, + Values: []float64{5.47, 6.64, 6.84, 7.24, 5.17, 6.59}, Timestamps: timestampsExpected, } r1.MetricName.Tags = []storage.Tag{{ diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index b19ac8273..952a9c6ab 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -704,9 +704,9 @@ func removeCounterResets(values []float64) { d := v - prevValue if d < 0 { if (-d * 8) < prevValue { - // This is likely jitter from `Prometheus HA pairs`. - // Just substitute v with prevValue. - v = prevValue + // This is likely a partial counter reset. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787 + correction += prevValue - v } else { correction += prevValue } diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index bfc6d670e..b43a4fce4 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -100,10 +100,11 @@ func TestRemoveCounterResets(t *testing.T) { timestampsExpected := []int64{0, 1, 2, 3} testRowsEqual(t, values, timestampsExpected, valuesExpected, timestampsExpected) - // verify how jitter from `Prometheus HA pairs` is handled - values = []float64{100, 95, 120, 140, 137, 50} + // verify how partial counter reset is handled. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787 + values = []float64{100, 95, 120, 119, 139, 50} removeCounterResets(values) - valuesExpected = []float64{100, 100, 120, 140, 140, 190} + valuesExpected = []float64{100, 100, 125, 125, 145, 195} timestampsExpected = []int64{0, 1, 2, 3, 4, 5} testRowsEqual(t, values, timestampsExpected, valuesExpected, timestampsExpected) } diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index 52eafd2b4..8a1527bb0 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -2329,9 +2329,9 @@ func removeCounterResetsMaybeNaNs(values []float64) { d := v - prevValue if d < 0 { if (-d * 8) < prevValue { - // This is likely jitter from `Prometheus HA pairs`. - // Just substitute v with prevValue. - v = prevValue + // This is likely a partial counter reset. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787 + correction += prevValue - v } else { correction += prevValue } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f472f1f7f..44e39b454 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -35,6 +35,7 @@ scrape_configs: * FEATURE: [query tracing](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#query-tracing): show timestamps in query traces in human-readable format (aka `RFC3339` in UTC timezone) instead of milliseconds since Unix epoch. For example, `2022-06-27T10:32:54.506Z` instead of `1656325974506`. This improves traces' readability. * FEATURE: improve performance of [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers) requests, which return big number of time series. +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets in [remove_resets](https://docs.victoriametrics.com/MetricsQL.html#remove_resets) function. Now `remove_resets(sum(m))` should returns the expected increasing line when some time series matching `m` disappear on the selected time range. Previously such a query would return horizontal line after the disappeared series. * FEATURE: expose additional histogram metrics at `http://victoriametrics:8428/metrics`, which may help understanding query workload: * `vm_rows_read_per_query` - the number of raw samples read per query. @@ -51,6 +52,7 @@ scrape_configs: {% endraw %} * BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes. +* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets when calculating [rate](https://docs.victoriametrics.com/MetricsQL.html#rate), [irate](https://docs.victoriametrics.com/MetricsQL.html#irate) and [increase](https://docs.victoriametrics.com/MetricsQL.html#increase) functions. Previously these functions could return zero values after partial counter resets until the counter increases to the last value before partial counter reset. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly reload changed `-promscrape.config` file when `-promscrape.configCheckInterval` option is set. The changed config file wasn't reloaded in this case since [v1.69.0](#v1690). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2786). Thanks to @ttyv for the fix. * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767).