From a260e2659efe64a7c32d7478051f0334d673fe7c Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 17 Nov 2022 01:01:46 +0200 Subject: [PATCH] app/vmselect/promql: add `range_stdvar()` and `range_stddev()` functions for calculating variance and deviation over time series on the selected time range --- app/vmselect/promql/exec_test.go | 26 ++++++- app/vmselect/promql/rollup.go | 22 ++++-- app/vmselect/promql/rollup_test.go | 2 +- app/vmselect/promql/transform.go | 74 ++++++++++++++----- docs/CHANGELOG.md | 1 + docs/MetricsQL.md | 10 +++ go.mod | 2 +- go.sum | 4 +- .../VictoriaMetrics/metricsql/transform.go | 2 + vendor/modules.txt | 2 +- 10 files changed, 113 insertions(+), 32 deletions(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index b79c01a45..ed04ba369 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -6380,19 +6380,39 @@ func TestExecSuccess(t *testing.T) { q := `range_quantile(0.5, time())` r := netstorage.Result{ MetricName: metricNameExpected, - // time() results in [1000 1200 1400 1600 1800 2000] Values: []float64{1500, 1500, 1500, 1500, 1500, 1500}, Timestamps: timestampsExpected, } resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`range_stddev()`, func(t *testing.T) { + t.Parallel() + q := `round(range_stddev(time()),0.01)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{341.57, 341.57, 341.57, 341.57, 341.57, 341.57}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`range_stdvar()`, func(t *testing.T) { + t.Parallel() + q := `round(range_stdvar(time()),0.01)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{116666.67, 116666.67, 116666.67, 116666.67, 116666.67, 116666.67}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`range_median()`, func(t *testing.T) { t.Parallel() q := `range_median(time())` r := netstorage.Result{ MetricName: metricNameExpected, - // time() results in [1000 1200 1400 1600 1800 2000] Values: []float64{1500, 1500, 1500, 1500, 1500, 1500}, Timestamps: timestampsExpected, } @@ -8079,6 +8099,8 @@ func TestExecError(t *testing.T) { f(`nonexisting()`) // Invalid number of args + f(`range_stddev()`) + f(`range_stdvar()`) f(`range_quantile()`) f(`range_quantile(1, 2, 3)`) f(`range_median()`) diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index e88ce24ce..587c54137 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -1475,16 +1475,20 @@ func rollupStaleSamples(rfa *rollupFuncArg) float64 { } func rollupStddev(rfa *rollupFuncArg) float64 { - stdvar := rollupStdvar(rfa) - return math.Sqrt(stdvar) + return stddev(rfa.values) } func rollupStdvar(rfa *rollupFuncArg) float64 { - // See `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation + return stdvar(rfa.values) +} - // There is no need in handling NaNs here, since they must be cleaned up - // before calling rollup funcs. - values := rfa.values +func stddev(values []float64) float64 { + v := stdvar(values) + return math.Sqrt(v) +} + +func stdvar(values []float64) float64 { + // See `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation if len(values) == 0 { return nan } @@ -1496,11 +1500,17 @@ func rollupStdvar(rfa *rollupFuncArg) float64 { var count float64 var q float64 for _, v := range values { + if math.IsNaN(v) { + continue + } count++ avgNew := avg + (v-avg)/count q += (v - avg) * (v - avgNew) avg = avgNew } + if count == 0 { + return nan + } return q / count } diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index 3cf83173d..e7b48b887 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -388,7 +388,7 @@ func TestRollupPredictLinear(t *testing.T) { func TestLinearRegression(t *testing.T) { f := func(values []float64, timestamps []int64, expV, expK float64) { t.Helper() - v, k := linearRegression(values, timestamps, timestamps[0] + 100) + v, k := linearRegression(values, timestamps, timestamps[0]+100) if err := compareValues([]float64{v}, []float64{expV}); err != nil { t.Fatalf("unexpected v err: %s", err) } diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index d442cc03e..8b3f8d338 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -92,6 +92,8 @@ var transformFuncs = map[string]transformFunc{ "range_max": newTransformFuncRange(runningMax), "range_min": newTransformFuncRange(runningMin), "range_quantile": transformRangeQuantile, + "range_stddev": transformRangeStddev, + "range_stdvar": transformRangeStdvar, "range_sum": newTransformFuncRange(runningSum), "remove_resets": transformRemoveResets, "round": transformRound, @@ -126,26 +128,28 @@ var transformFuncs = map[string]transformFunc{ // These functions don't change physical meaning of input time series, // so they don't drop metric name var transformFuncsKeepMetricName = map[string]bool{ - "ceil": true, - "clamp": true, - "clamp_max": true, - "clamp_min": true, - "floor": true, - "interpolate": true, - "keep_last_value": true, - "keep_next_value": true, - "range_avg": true, - "range_first": true, - "range_last": true, + "ceil": true, + "clamp": true, + "clamp_max": true, + "clamp_min": true, + "floor": true, + "interpolate": true, + "keep_last_value": true, + "keep_next_value": true, + "range_avg": true, + "range_first": true, + "range_last": true, "range_linear_regression": true, - "range_max": true, - "range_min": true, - "range_quantile": true, - "round": true, - "running_avg": true, - "running_max": true, - "running_min": true, - "smooth_exponential": true, + "range_max": true, + "range_min": true, + "range_quantile": true, + "range_stdvar": true, + "range_sddev": true, + "round": true, + "running_avg": true, + "running_max": true, + "running_min": true, + "smooth_exponential": true, } func getTransformFunc(s string) transformFunc { @@ -1257,6 +1261,38 @@ func transformRangeLinearRegression(tfa *transformFuncArg) ([]*timeseries, error return rvs, nil } +func transformRangeStddev(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 1); err != nil { + return nil, err + } + rvs := args[0] + for _, ts := range rvs { + values := ts.Values + v := stddev(values) + for i := range values { + values[i] = v + } + } + return rvs, nil +} + +func transformRangeStdvar(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 1); err != nil { + return nil, err + } + rvs := args[0] + for _, ts := range rvs { + values := ts.Values + v := stdvar(values) + for i := range values { + values[i] = v + } + } + return rvs, nil +} + func transformRangeQuantile(tfa *transformFuncArg) ([]*timeseries, error) { args := tfa.args if err := expectTransformArgsNum(args, 2); err != nil { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c35d930e3..c33aa1f2a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -16,6 +16,7 @@ The following tip changes can be tested by building VictoriaMetrics components f ## tip * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [range_linear_regression](https://docs.victoriametrics.com/MetricsQL.html#range_linear_regression) function for calculating [simple linear regression](https://en.wikipedia.org/wiki/Simple_linear_regression) over the input time series on the selected time range. This function is useful for predictions and capacity planning. For example, `range_linear_regression(process_resident_memory_bytes)` can predict future memory usage based on the past memory usage. +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [range_stddev](https://docs.victoriametrics.com/MetricsQL.html#range_stddev) and [range_stdvar](https://docs.victoriametrics.com/MetricsQL.html#range_stdvar) functions. ## [v1.83.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.83.1) diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 3219f7243..33d1b9c92 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -1225,6 +1225,16 @@ over the selected time range per each time series returned by `q`. This function `range_quantile(phi, q)` is a [transform function](#transform-functions), which returns `phi`-quantile across points per each time series returned by `q`. `phi` must be in the range `[0...1]`. +#### range_stddev + +`range_stddev(q)` is a [transform function](#transform-functions), which calculates [standard deviation](https://en.wikipedia.org/wiki/Standard_deviation) +per each time series returned by `q` on the selected time range. + +#### range_stdvar + +`range_stdvar(q)` is a [transform function](#transform-functions), which calculates [standard variance](https://en.wikipedia.org/wiki/Variance) +per each time series returned by `q` on the selected time range. + #### range_sum `range_sum(q)` is a [transform function](#transform-functions), which calculates the sum of points per each time series returned by `q`. diff --git a/go.mod b/go.mod index d5f0cdd9a..0fc56d60e 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/metrics v1.23.0 - github.com/VictoriaMetrics/metricsql v0.46.0 + github.com/VictoriaMetrics/metricsql v0.47.0 github.com/aws/aws-sdk-go-v2 v1.17.1 github.com/aws/aws-sdk-go-v2/config v1.17.10 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.37 diff --git a/go.sum b/go.sum index 9032dcb80..8ce522ff3 100644 --- a/go.sum +++ b/go.sum @@ -100,8 +100,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= github.com/VictoriaMetrics/metrics v1.23.0 h1:WzfqyzCaxUZip+OBbg1+lV33WChDSu4ssYII3nxtpeA= github.com/VictoriaMetrics/metrics v1.23.0/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc= -github.com/VictoriaMetrics/metricsql v0.46.0 h1:UeY+3vykSflhShmBmMemYvDVlqISraiCc8uMtyAc+PI= -github.com/VictoriaMetrics/metricsql v0.46.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= +github.com/VictoriaMetrics/metricsql v0.47.0 h1:PQwadjoQnKKkaUiupkDq0ZbCAHX2qP8OOexJ9oJwupo= +github.com/VictoriaMetrics/metricsql v0.47.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/transform.go b/vendor/github.com/VictoriaMetrics/metricsql/transform.go index 9e9a171e0..40d315832 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/transform.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/transform.go @@ -77,6 +77,8 @@ var transformFuncs = map[string]bool{ "range_max": true, "range_min": true, "range_quantile": true, + "range_stddev": true, + "range_stdvar": true, "range_sum": true, "remove_resets": true, "round": true, diff --git a/vendor/modules.txt b/vendor/modules.txt index f58b47ac7..636d82684 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -69,7 +69,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.23.0 ## explicit; go 1.15 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.46.0 +# github.com/VictoriaMetrics/metricsql v0.47.0 ## explicit; go 1.13 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop