From c1a3192d8b2a9c987ec20f72662dc5eb867d81e6 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 17 Nov 2022 00:38:48 +0200 Subject: [PATCH] app/vmselect/promql: add `range_linear_regression(q)` function for calculating simple linear regression for the selected time series on the selected time range --- app/vmselect/promql/exec_test.go | 46 +++++++++++++++++++ app/vmselect/promql/rollup.go | 28 +++++------ app/vmselect/promql/rollup_test.go | 7 +-- app/vmselect/promql/transform.go | 23 ++++++++++ docs/CHANGELOG.md | 1 + docs/MetricsQL.md | 9 +++- go.mod | 2 +- go.sum | 4 +- .../VictoriaMetrics/metricsql/transform.go | 1 + vendor/modules.txt | 2 +- 10 files changed, 98 insertions(+), 25 deletions(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 182c6c965..b79c01a45 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -6905,6 +6905,51 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`range_linear_regression(time())`, func(t *testing.T) { + t.Parallel() + q := `range_linear_regression(time())` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{1000, 1200, 1400, 1600, 1800, 2000}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`range_linear_regression(-time())`, func(t *testing.T) { + t.Parallel() + q := `range_linear_regression(-time())` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{-1000, -1200, -1400, -1600, -1800, -2000}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`range_linear_regression(100/time())`, func(t *testing.T) { + t.Parallel() + q := `sort_desc(round(( + alias(range_linear_regression(100/time()), "regress"), + alias(100/time(), "orig"), + ), + 0.001 + ))` + r1 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.1, 0.083, 0.071, 0.062, 0.056, 0.05}, + Timestamps: timestampsExpected, + } + r1.MetricName.MetricGroup = []byte("orig") + r2 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.095, 0.085, 0.075, 0.066, 0.056, 0.046}, + Timestamps: timestampsExpected, + } + r2.MetricName.MetricGroup = []byte("regress") + resultExpected := []netstorage.Result{r1, r2} + f(q, resultExpected) + }) t.Run(`deriv(N)`, func(t *testing.T) { t.Parallel() q := `deriv(1000)` @@ -8097,6 +8142,7 @@ func TestExecError(t *testing.T) { f(`range_sum(1, 2)`) f(`range_first(1, 2)`) f(`range_last(1, 2)`) + f(`range_linear_regression(1, 2)`) f(`smooth_exponential()`) f(`smooth_exponential(1)`) f(`remove_resets()`) diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index 05ca64216..e88ce24ce 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -894,7 +894,7 @@ func newRollupPredictLinear(args []interface{}) (rollupFunc, error) { return nil, err } rf := func(rfa *rollupFuncArg) float64 { - v, k := linearRegression(rfa) + v, k := linearRegression(rfa.values, rfa.timestamps, rfa.currTimestamp) if math.IsNaN(v) { return nan } @@ -904,13 +904,8 @@ func newRollupPredictLinear(args []interface{}) (rollupFunc, error) { return rf, nil } -func linearRegression(rfa *rollupFuncArg) (float64, float64) { - // There is no need in handling NaNs here, since they must be cleaned up - // before calling rollup funcs. - values := rfa.values - timestamps := rfa.timestamps - n := float64(len(values)) - if n == 0 { +func linearRegression(values []float64, timestamps []int64, interceptTime int64) (float64, float64) { + if len(values) == 0 { return nan, nan } if areConstValues(values) { @@ -918,25 +913,32 @@ func linearRegression(rfa *rollupFuncArg) (float64, float64) { } // See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example - interceptTime := rfa.currTimestamp vSum := float64(0) tSum := float64(0) tvSum := float64(0) ttSum := float64(0) + n := 0 for i, v := range values { + if math.IsNaN(v) { + continue + } dt := float64(timestamps[i]-interceptTime) / 1e3 vSum += v tSum += dt tvSum += dt * v ttSum += dt * dt + n++ + } + if n == 0 { + return nan, nan } k := float64(0) - tDiff := ttSum - tSum*tSum/n + tDiff := ttSum - tSum*tSum/float64(n) if math.Abs(tDiff) >= 1e-6 { // Prevent from incorrect division for too small tDiff values. - k = (tvSum - tSum*vSum/n) / tDiff + k = (tvSum - tSum*vSum/float64(n)) / tDiff } - v := vSum/n - k*tSum/n + v := vSum/float64(n) - k*tSum/float64(n) return v, k } @@ -1605,7 +1607,7 @@ func rollupIdelta(rfa *rollupFuncArg) float64 { func rollupDerivSlow(rfa *rollupFuncArg) float64 { // Use linear regression like Prometheus does. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/73 - _, k := linearRegression(rfa) + _, k := linearRegression(rfa.values, rfa.timestamps, rfa.currTimestamp) return k } diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index 833f32ce9..3cf83173d 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -388,12 +388,7 @@ func TestRollupPredictLinear(t *testing.T) { func TestLinearRegression(t *testing.T) { f := func(values []float64, timestamps []int64, expV, expK float64) { t.Helper() - rfa := &rollupFuncArg{ - values: values, - timestamps: timestamps, - currTimestamp: timestamps[0] + 100, - } - v, k := linearRegression(rfa) + v, k := linearRegression(values, timestamps, timestamps[0] + 100) if err := compareValues([]float64{v}, []float64{expV}); err != nil { t.Fatalf("unexpected v err: %s", err) } diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index e48b028a2..d442cc03e 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -88,6 +88,7 @@ var transformFuncs = map[string]transformFunc{ "range_avg": newTransformFuncRange(runningAvg), "range_first": transformRangeFirst, "range_last": transformRangeLast, + "range_linear_regression": transformRangeLinearRegression, "range_max": newTransformFuncRange(runningMax), "range_min": newTransformFuncRange(runningMin), "range_quantile": transformRangeQuantile, @@ -136,6 +137,7 @@ var transformFuncsKeepMetricName = map[string]bool{ "range_avg": true, "range_first": true, "range_last": true, + "range_linear_regression": true, "range_max": true, "range_min": true, "range_quantile": true, @@ -1234,6 +1236,27 @@ func newTransformFuncRange(rf func(a, b float64, idx int) float64) transformFunc } } +func transformRangeLinearRegression(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 1); err != nil { + return nil, err + } + rvs := args[0] + for _, ts := range rvs { + values := ts.Values + timestamps := ts.Timestamps + if len(timestamps) == 0 { + continue + } + interceptTimestamp := timestamps[0] + v, k := linearRegression(values, timestamps, interceptTimestamp) + for i, t := range timestamps { + values[i] = v + k*float64(t-interceptTimestamp)/1e3 + } + } + return rvs, nil +} + func transformRangeQuantile(tfa *transformFuncArg) ([]*timeseries, error) { args := tfa.args if err := expectTransformArgsNum(args, 2); err != nil { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index b0db4a923..c35d930e3 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f ## tip +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [range_linear_regression](https://docs.victoriametrics.com/MetricsQL.html#range_linear_regression) function for calculating [simple linear regression](https://en.wikipedia.org/wiki/Simple_linear_regression) over the input time series on the selected time range. This function is useful for predictions and capacity planning. For example, `range_linear_regression(process_resident_memory_bytes)` can predict future memory usage based on the past memory usage. ## [v1.83.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.83.1) diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 88882a3b6..3219f7243 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -395,7 +395,7 @@ over the given lookbehind window `d` using the given smoothing factor `sf` and t Both `sf` and `tf` must be in the range `[0...1]`. It is expected that the [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering) returns time series of [gauge type](https://docs.victoriametrics.com/keyConcepts.html#gauge). -This function is supported by PromQL. +This function is supported by PromQL. See also [range_linear_regression](#range_linear_regression). #### idelta @@ -533,7 +533,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht linear interpolation over raw samples on the given lookbehind window `d`. The predicted value is calculated individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). -This function is supported by PromQL. +This function is supported by PromQL. See also [range_linear_regression](#range_linear_regression). #### present_over_time @@ -1203,6 +1203,11 @@ See also [rand](#rand) and [rand_exponential](#rand_exponential). `range_last(q)` is a [transform function](#transform-functions), which returns the value for the last point per each time series returned by `q`. +#### range_linear_regression + +`range_linear_regression(q)` is a [transform function](#transform-functions), which calculates [simple linear regression](https://en.wikipedia.org/wiki/Simple_linear_regression) +over the selected time range per each time series returned by `q`. This function is useful for capacity planning and predictions. + #### range_max `range_max(q)` is a [transform function](#transform-functions), which calculates the max value across points per each time series returned by `q`. diff --git a/go.mod b/go.mod index 755cc192a..d5f0cdd9a 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/metrics v1.23.0 - github.com/VictoriaMetrics/metricsql v0.45.0 + github.com/VictoriaMetrics/metricsql v0.46.0 github.com/aws/aws-sdk-go-v2 v1.17.1 github.com/aws/aws-sdk-go-v2/config v1.17.10 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.37 diff --git a/go.sum b/go.sum index f326e0813..9032dcb80 100644 --- a/go.sum +++ b/go.sum @@ -100,8 +100,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= github.com/VictoriaMetrics/metrics v1.23.0 h1:WzfqyzCaxUZip+OBbg1+lV33WChDSu4ssYII3nxtpeA= github.com/VictoriaMetrics/metrics v1.23.0/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc= -github.com/VictoriaMetrics/metricsql v0.45.0 h1:kVQHnkDJm4qyJ8f5msTclmwqAtlUdPbbEJ7zoa/FTNs= -github.com/VictoriaMetrics/metricsql v0.45.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= +github.com/VictoriaMetrics/metricsql v0.46.0 h1:UeY+3vykSflhShmBmMemYvDVlqISraiCc8uMtyAc+PI= +github.com/VictoriaMetrics/metricsql v0.46.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/transform.go b/vendor/github.com/VictoriaMetrics/metricsql/transform.go index 44084bc3e..9e9a171e0 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/transform.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/transform.go @@ -73,6 +73,7 @@ var transformFuncs = map[string]bool{ "range_avg": true, "range_first": true, "range_last": true, + "range_linear_regression": true, "range_max": true, "range_min": true, "range_quantile": true, diff --git a/vendor/modules.txt b/vendor/modules.txt index e693bde2a..f58b47ac7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -69,7 +69,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.23.0 ## explicit; go 1.15 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.45.0 +# github.com/VictoriaMetrics/metricsql v0.46.0 ## explicit; go 1.13 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop