From 427fa43ce21e3272ec5a5c81dcb90c93ff172d1a Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 17 Jul 2020 18:28:19 +0300 Subject: [PATCH] app/vmselect/promql: add `mode_over_time(m[d])` function See https://en.wikipedia.org/wiki/Mode_(statistics) and https://stackoverflow.com/questions/61134078/promql-query-to-return-the-value-from-a-range-vector-which-occurs-maximum-no-of --- app/vmselect/promql/exec_test.go | 12 +++++++ app/vmselect/promql/rollup.go | 32 +++++++++++++++++++ app/vmselect/promql/rollup_test.go | 15 +++++++++ docs/MetricsQL.md | 1 + go.mod | 2 +- go.sum | 4 +-- .../VictoriaMetrics/metricsql/rollup.go | 3 ++ vendor/modules.txt | 2 +- 8 files changed, 67 insertions(+), 4 deletions(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 4acaa9670..f3c72a176 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -4518,6 +4518,17 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`mode_over_time()`, func(t *testing.T) { + t.Parallel() + q := `mode_over_time(round(time()/500)[100s:1s])` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{2, 2, 3, 3, 3, 4}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`integrate(1)`, func(t *testing.T) { t.Parallel() q := `integrate(1)` @@ -5733,6 +5744,7 @@ func TestExecError(t *testing.T) { f(`hoeffding_bound_upper(0.99, foo, 1)`) f(`outliersk()`) f(`outliersk(1)`) + f(`mode_over_time()`) // Invalid argument type f(`median_over_time({}, 2)`) diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index 7820ab74c..16b5f2604 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "math" + "sort" "strings" "sync" @@ -79,6 +80,9 @@ var rollupFuncs = map[string]newRollupFunc{ // in order to properly handle offset and timestamps unaligned to the current step. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415 for details. "timestamp": newRollupFuncOneArg(rollupTimestamp), + + // See https://en.wikipedia.org/wiki/Mode_(statistics) + "mode_over_time": newRollupFuncOneArg(rollupModeOverTime), } // rollupAggrFuncs are functions that can be passed to `aggr_over_time()` @@ -121,6 +125,7 @@ var rollupAggrFuncs = map[string]rollupFunc{ "ascent_over_time": rollupAscentOverTime, "descent_over_time": rollupDescentOverTime, "timestamp": rollupTimestamp, + "mode_over_time": rollupModeOverTime, } var rollupFuncsCannotAdjustWindow = map[string]bool{ @@ -167,6 +172,7 @@ var rollupFuncsKeepMetricGroup = map[string]bool{ "hoeffding_bound_upper": true, "first_over_time": true, "last_over_time": true, + "mode_over_time": true, } func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) { @@ -1534,6 +1540,32 @@ func rollupTimestamp(rfa *rollupFuncArg) float64 { return float64(timestamps[len(timestamps)-1]) / 1e3 } +func rollupModeOverTime(rfa *rollupFuncArg) float64 { + // There is no need in handling NaNs here, since they must be cleaned up + // before calling rollup funcs. + values := rfa.values + prevValue := rfa.prevValue + if len(values) == 0 { + return prevValue + } + sort.Float64s(values) + j := -1 + dMax := 0 + mode := prevValue + for i, v := range values { + if prevValue == v { + continue + } + if d := i - j; d > dMax { + dMax = d + mode = prevValue + } + j = i + prevValue = v + } + return mode +} + func rollupAscentOverTime(rfa *rollupFuncArg) float64 { // There is no need in handling NaNs here, since they must be cleaned up // before calling rollup funcs. diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index 06b021e83..1e7c42d71 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -392,6 +392,7 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) { f("ascent_over_time", 142) f("descent_over_time", 231) f("timestamp", 0.13) + f("mode_over_time", 34) } func TestRollupNewRollupFuncError(t *testing.T) { @@ -952,6 +953,20 @@ func TestRollupFuncsNoWindow(t *testing.T) { timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) + t.Run("mode_over_time", func(t *testing.T) { + rc := rollupConfig{ + Func: rollupModeOverTime, + Start: 0, + End: 160, + Step: 40, + Window: 80, + } + rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) + values := rc.Do(nil, testValues, testTimestamps) + valuesExpected := []float64{nan, nan, 34, 44, 44} + timestampsExpected := []int64{0, 40, 80, 120, 160} + testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) + }) } func TestRollupBigNumberOfValues(t *testing.T) { diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index ca69567fd..381f654c5 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -123,3 +123,4 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g This aggregate function is useful to detect anomalies across groups of similar time series. - `ascent_over_time(m[d])` - returns the sum of positive deltas between adjancent data points in `m` over `d`. Useful for tracking height gains in GPS track. - `descent_over_time(m[d])` - returns the absolute sum of negative deltas between adjancent data points in `m` over `d`. Useful for tracking height loss in GPS track. +- `mode_over_time(m[d])` - returns [mode](https://en.wikipedia.org/wiki/Mode_(statistics)) for `m` values over `d`. It is expected that `m` values are discrete. diff --git a/go.mod b/go.mod index 6bcf99a39..8e5b0686f 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.0.1 github.com/VictoriaMetrics/metrics v1.11.3 - github.com/VictoriaMetrics/metricsql v0.2.5 + github.com/VictoriaMetrics/metricsql v0.2.6 github.com/aws/aws-sdk-go v1.33.5 github.com/cespare/xxhash/v2 v2.1.1 github.com/golang/snappy v0.0.1 diff --git a/go.sum b/go.sum index 6952dfdd7..d215603e9 100644 --- a/go.sum +++ b/go.sum @@ -53,8 +53,8 @@ github.com/VictoriaMetrics/metrics v1.11.2 h1:t/ceLP6SvagUqypCKU7cI7+tQn54+TIV/t github.com/VictoriaMetrics/metrics v1.11.2/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ= github.com/VictoriaMetrics/metrics v1.11.3 h1:eSfXc0CrquKa1VTNUvhP+dhNjLUZHQGTFfp19mYCQWE= github.com/VictoriaMetrics/metrics v1.11.3/go.mod h1:LU2j9qq7xqZYXz8tF3/RQnB2z2MbZms5TDiIg9/NHiQ= -github.com/VictoriaMetrics/metricsql v0.2.5 h1:9kL+RA2yuPfMpYdqycRbKOJ9WKdDmPmV6hAju5L6ti0= -github.com/VictoriaMetrics/metricsql v0.2.5/go.mod h1:UIjd9S0W1UnTWlJdM0wLS+2pfuPqjwqKoK8yTos+WyE= +github.com/VictoriaMetrics/metricsql v0.2.6 h1:hJxeGeyP++fWuW41URWcl2PTNTMTTeqm7UcT1BEZOOg= +github.com/VictoriaMetrics/metricsql v0.2.6/go.mod h1:UIjd9S0W1UnTWlJdM0wLS+2pfuPqjwqKoK8yTos+WyE= github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8= github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM= github.com/aws/aws-sdk-go v1.33.5 h1:p2fr1ryvNTU6avUWLI+/H7FGv0TBIjzVM5WDgXBBv4U= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/rollup.go b/vendor/github.com/VictoriaMetrics/metricsql/rollup.go index da32c308d..888996589 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/rollup.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/rollup.go @@ -63,6 +63,9 @@ var rollupFuncs = map[string]bool{ // `timestamp` func has been moved here because it must work properly with offsets and samples unaligned to the current step. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/415 for details. "timestamp": true, + + // See https://en.wikipedia.org/wiki/Mode_(statistics) + "mode_over_time": true, } // IsRollupFunc returns whether funcName is known rollup function. diff --git a/vendor/modules.txt b/vendor/modules.txt index 06fc63f38..75d75fd3b 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -16,7 +16,7 @@ github.com/VictoriaMetrics/fasthttp/fasthttputil github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.11.3 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.2.5 +# github.com/VictoriaMetrics/metricsql v0.2.6 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop # github.com/aws/aws-sdk-go v1.33.5