From 3e084be06b6f48440ea3ffc8cb04f770ae10dbca Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Thu, 7 Oct 2021 12:50:49 +0300 Subject: [PATCH] app/vmselect: make `predict_linear` and `deriv` compatible with Prometheus (#1681) Previously, `predict_linear` returned slightly different results comparing to Prometheus. The change makes linear regression algorithm compatible with Prometheus. `deriv` was excluded from the list of functions which can adjust the time window for the same reasons. --- app/vmselect/promql/exec_test.go | 33 ----------------------------- app/vmselect/promql/rollup.go | 28 ++++++++---------------- app/vmselect/promql/rollup_test.go | 34 ++++++++++++++++++++++++------ 3 files changed, 37 insertions(+), 58 deletions(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index f335e3e07..4b6d4ebc5 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -6239,39 +6239,6 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) - t.Run(`deriv(1)`, func(t *testing.T) { - t.Parallel() - q := `deriv(1)` - r := netstorage.Result{ - MetricName: metricNameExpected, - Values: []float64{0, 0, 0, 0, 0, 0}, - Timestamps: timestampsExpected, - } - resultExpected := []netstorage.Result{r} - f(q, resultExpected) - }) - t.Run(`deriv(time())`, func(t *testing.T) { - t.Parallel() - q := `deriv(2*time())` - r := netstorage.Result{ - MetricName: metricNameExpected, - Values: []float64{2, 2, 2, 2, 2, 2}, - Timestamps: timestampsExpected, - } - resultExpected := []netstorage.Result{r} - f(q, resultExpected) - }) - t.Run(`deriv(-time())`, func(t *testing.T) { - t.Parallel() - q := `deriv(-time())` - r := netstorage.Result{ - MetricName: metricNameExpected, - Values: []float64{-1, -1, -1, -1, -1, -1}, - Timestamps: timestampsExpected, - } - resultExpected := []netstorage.Result{r} - f(q, resultExpected) - }) t.Run(`delta(time())`, func(t *testing.T) { t.Parallel() q := `delta(time())` diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index 8a422c452..d306067ff 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -151,6 +151,7 @@ var rollupFuncsCannotAdjustWindow = map[string]bool{ "holt_winters": true, "idelta": true, "increase": true, + "deriv": true, "predict_linear": true, "resets": true, "avg_over_time": true, @@ -864,37 +865,26 @@ func linearRegression(rfa *rollupFuncArg) (float64, float64) { // before calling rollup funcs. values := rfa.values timestamps := rfa.timestamps - if len(values) == 0 { - return rfa.prevValue, 0 + if len(values) < 2 { + return nan, nan } // See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example - tFirst := rfa.prevTimestamp - vSum := rfa.prevValue + interceptTime := rfa.currTimestamp + vSum := float64(0) tSum := float64(0) tvSum := float64(0) ttSum := float64(0) - n := 1.0 - if math.IsNaN(rfa.prevValue) { - tFirst = timestamps[0] - vSum = 0 - n = 0 - } for i, v := range values { - dt := float64(timestamps[i]-tFirst) / 1e3 + dt := float64(timestamps[i]-interceptTime) / 1e3 vSum += v tSum += dt tvSum += dt * v ttSum += dt * dt } - n += float64(len(values)) - if n == 1 { - return vSum, 0 - } - k := (n*tvSum - tSum*vSum) / (n*ttSum - tSum*tSum) - v := (vSum - k*tSum) / n - // Adjust v to the last timestamp on the given time range. - v += k * (float64(timestamps[len(timestamps)-1]-tFirst) / 1e3) + n := float64(len(values)) + k := (tvSum - tSum*vSum/n) / (ttSum - tSum*tSum/n) + v := vSum/n - k*tSum/n return v, k } diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index 93d85dede..f4af400f4 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -357,10 +357,32 @@ func TestRollupPredictLinear(t *testing.T) { testRollupFunc(t, "predict_linear", args, &me, vExpected) } - f(0e-3, 30.382432471845043) - f(50e-3, 17.03950235614201) - f(100e-3, 3.696572240438975) - f(200e-3, -22.989287990967092) + f(0e-3, 65.07405077267295) + f(50e-3, 51.7311206569699) + f(100e-3, 38.38819054126685) + f(200e-3, 11.702330309860756) +} + +func TestLinearRegression(t *testing.T) { + f := func(values []float64, timestamps []int64, expV, expK float64) { + t.Helper() + rfa := &rollupFuncArg{ + values: values, + timestamps: timestamps, + currTimestamp: timestamps[0] + 100, + } + v, k := linearRegression(rfa) + if err := compareValues([]float64{v}, []float64{expV}); err != nil { + t.Fatalf("unexpected v err: %s", err) + } + if err := compareValues([]float64{k}, []float64{expK}); err != nil { + t.Fatalf("unexpected k err: %s", err) + } + } + + f([]float64{1}, []int64{1}, math.NaN(), math.NaN()) + f([]float64{1, 2}, []int64{100, 300}, 1.5, 5) + f([]float64{2, 4, 6, 8, 10}, []int64{100, 200, 300, 400, 500}, 4, 20) } func TestRollupHoltWinters(t *testing.T) { @@ -448,7 +470,7 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) { f("default_rollup", 34) f("changes", 11) f("delta", 34) - f("deriv", -266.85860231406065) + f("deriv", -266.85860231406093) f("deriv_fast", -712) f("idelta", 0) f("increase", 398) @@ -957,7 +979,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { } rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) values := rc.Do(nil, testValues, testTimestamps) - valuesExpected := []float64{0, -2879.310344827587, 558.0608793686595, 422.84569138276544, 0} + valuesExpected := []float64{nan, -2879.310344827588, 127.87627310448904, -496.5831435079728, nan} timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) })