From 0e3de5a0cccb7983d3cb45e24530c0763763a6e3 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 30 Sep 2021 13:22:52 +0300 Subject: [PATCH] app/vmselect/promql: add `topk_last` and `bottomk_last` functions --- app/vmselect/promql/aggr.go | 10 ++++++ app/vmselect/promql/exec.go | 4 +-- app/vmselect/promql/exec_test.go | 32 +++++++++++++++++++ docs/CHANGELOG.md | 1 + docs/MetricsQL.md | 24 +++++++++----- go.mod | 2 +- go.sum | 4 +-- .../VictoriaMetrics/metricsql/aggr.go | 2 ++ vendor/modules.txt | 2 +- 9 files changed, 67 insertions(+), 14 deletions(-) diff --git a/app/vmselect/promql/aggr.go b/app/vmselect/promql/aggr.go index 661d547a8..458061959 100644 --- a/app/vmselect/promql/aggr.go +++ b/app/vmselect/promql/aggr.go @@ -39,10 +39,12 @@ var aggrFuncs = map[string]aggrFunc{ "topk_max": newAggrFuncRangeTopK(maxValue, false), "topk_avg": newAggrFuncRangeTopK(avgValue, false), "topk_median": newAggrFuncRangeTopK(medianValue, false), + "topk_last": newAggrFuncRangeTopK(lastValue, false), "bottomk_min": newAggrFuncRangeTopK(minValue, true), "bottomk_max": newAggrFuncRangeTopK(maxValue, true), "bottomk_avg": newAggrFuncRangeTopK(avgValue, true), "bottomk_median": newAggrFuncRangeTopK(medianValue, true), + "bottomk_last": newAggrFuncRangeTopK(lastValue, true), "any": aggrFuncAny, "mad": newAggrFunc(aggrFuncMAD), "outliers_mad": aggrFuncOutliersMAD, @@ -803,6 +805,14 @@ func medianValue(values []float64) float64 { return quantile(0.5, values) } +func lastValue(values []float64) float64 { + values = skipTrailingNaNs(values) + if len(values) == 0 { + return nan + } + return values[len(values)-1] +} + // quantiles calculates the given phis from originValues without modifying originValues, appends them to qs and returns the result. func quantiles(qs, phis []float64, originValues []float64) []float64 { a := getFloat64s() diff --git a/app/vmselect/promql/exec.go b/app/vmselect/promql/exec.go index 85b30277e..a1eb6316e 100644 --- a/app/vmselect/promql/exec.go +++ b/app/vmselect/promql/exec.go @@ -81,8 +81,8 @@ func maySortResults(e metricsql.Expr, tss []*timeseries) bool { case *metricsql.AggrFuncExpr: switch strings.ToLower(v.Name) { case "topk", "bottomk", "outliersk", - "topk_max", "topk_min", "topk_avg", "topk_median", - "bottomk_max", "bottomk_min", "bottomk_avg", "bottomk_median": + "topk_max", "topk_min", "topk_avg", "topk_median", "topk_last", + "bottomk_max", "bottomk_min", "bottomk_avg", "bottomk_median", "bottomk_last": return false } } diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 9eaae7fd7..f335e3e07 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -5299,6 +5299,21 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r1} f(q, resultExpected) }) + t.Run(`topk_last(1)`, func(t *testing.T) { + t.Parallel() + q := `sort(topk_last(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))` + r1 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334}, + Timestamps: timestampsExpected, + } + r1.MetricName.Tags = []storage.Tag{{ + Key: []byte("baz"), + Value: []byte("sss"), + }} + resultExpected := []netstorage.Result{r1} + f(q, resultExpected) + }) t.Run(`bottomk_median(1)`, func(t *testing.T) { t.Parallel() q := `sort(bottomk_median(1, label_set(10, "foo", "bar") or label_set(time()/15, "baz", "sss")))` @@ -5314,6 +5329,21 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r1} f(q, resultExpected) }) + t.Run(`bottomk_last(1)`, func(t *testing.T) { + t.Parallel() + q := `sort(bottomk_last(1, label_set(10, "foo", "bar") or label_set(time()/15, "baz", "sss")))` + r1 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{10, 10, 10, 10, 10, 10}, + Timestamps: timestampsExpected, + } + r1.MetricName.Tags = []storage.Tag{{ + Key: []byte("foo"), + Value: []byte("bar"), + }} + resultExpected := []netstorage.Result{r1} + f(q, resultExpected) + }) t.Run(`topk(1, nan_timeseries)`, func(t *testing.T) { t.Parallel() q := `topk(1, label_set(NaN, "foo", "bar") or label_set(time()/150, "baz", "sss")) default 0` @@ -7191,12 +7221,14 @@ func TestExecError(t *testing.T) { f(`topk_max()`) f(`topk_avg()`) f(`topk_median()`) + f(`topk_last()`) f(`limitk()`) f(`bottomk()`) f(`bottomk_min()`) f(`bottomk_max()`) f(`bottomk_avg()`) f(`bottomk_median()`) + f(`bottomk_last()`) f(`time(123)`) f(`start(1)`) f(`end(1)`) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 87ed683db..09b607825 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,6 +10,7 @@ sort: 15 * FEATURE: vmagent [enterprise](https://victoriametrics.com/enterprise.html): add support for data reading from [Apache Kafka](https://kafka.apache.org/). See [these docs](https://docs.victoriametrics.com/vmagent.html#kafka-integration). * FEATURE: calculate quantiles in the same way as Prometheus does in such functions as [quantile_over_time](https://docs.victoriametrics.com/MetricsQL.html#quantile_over_time) and [quantile](https://docs.victoriametrics.com/MetricsQL.html#quantile). Previously results from VictoriaMetrics could be slightly different than results from Prometheus. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1625) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1612) issues. * FEATURE: add `rollup_scrape_interval(m[d])` function to [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which returns `min`, `max` and `avg` values for the interval between samples for `m` on the given lookbehind window `d`. +* FEATURE: add `topk_last(k, q)` and `bottomk_last(k, q)` functions to [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which return up to `k` time series from `q` with the maximum / minimum last value on the graph. * FEATURE: vmui: add ability to naturally scroll and zoom graphs. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1634). * BUGFIX: align behavior of the queries `a or on (labels) b`, `a and on (labels) b` and `a unless on (labels) b` where `b` has multiple time series with the given `labels` to Prometheus behavior. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1643). diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index db135d71c..e700f951c 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -718,19 +718,23 @@ See also [implicit query conversions](#implicit-query-conversions). #### bottomk_avg -`bottomk_avg(k, q, "other_label=other_value")` returns up to `k` time series with the smallest averages. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_avg(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest averages plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_avg](#topk_avg). +`bottomk_avg(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the smallest averages. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_avg(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest averages plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_avg](#topk_avg). + +#### bottomk_last + +`bottomk_last(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the smallest last values. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest maximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_last](#topk_last). #### bottomk_max -`bottomk_max(k, q, "other_label=other_value")` returns up to `k` time series with the smallest maximums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest maximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_max](#topk_max). +`bottomk_max(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the smallest maximums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest maximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_max](#topk_max). #### bottomk_median -`bottomk_median(k, q, "other_label=other_value")` returns up to `k` time series with the smallest medians. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_median(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest medians plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_median](#topk_median). +`bottomk_median(k, q, "other_label=other_value")` returns up to `k` time series from `q with the smallest medians. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_median(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest medians plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_median](#topk_median). #### bottomk_min -`bottomk_min(k, q, "other_label=other_value")` returns up to `k` time series with the smallest minimums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_min(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest minimums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_min](#topk_min). +`bottomk_min(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the smallest minimums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_min(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest minimums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_min](#topk_min). #### count @@ -818,19 +822,23 @@ See also [implicit query conversions](#implicit-query-conversions). #### topk_avg -`topk_avg(k, q, "other_label=other_value")` returns up to `k` time series with the biggest averages. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_avg(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest averages plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_avg](#bottomk_avg). +`topk_avg(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest averages. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_avg(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest averages plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_avg](#bottomk_avg). + +#### topk_last + +`topk_last(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest last values. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest amaximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_last](#bottomk_last). #### topk_max -`topk_max(k, q, "other_label=other_value")` returns up to `k` time series with the biggest maximums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest amaximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_max](#bottomk_max). +`topk_max(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest maximums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest amaximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_max](#bottomk_max). #### topk_median -`topk_median(k, q, "other_label=other_value")` returns up to `k` time series with the biggest medians. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_median(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest medians plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_median](#bottomk_median). +`topk_median(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest medians. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_median(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest medians plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_median](#bottomk_median). #### topk_min -`topk_min(k, q, "other_label=other_value")` returns up to `k` time series with the biggest minimums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_min(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest minimums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_min](#bottomk_min). +`topk_min(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest minimums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_min(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest minimums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_min](#bottomk_min). #### zscore diff --git a/go.mod b/go.mod index d2318d75a..5c1dd3a67 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/metrics v1.18.0 - github.com/VictoriaMetrics/metricsql v0.25.0 + github.com/VictoriaMetrics/metricsql v0.26.0 github.com/VividCortex/ewma v1.2.0 // indirect github.com/aws/aws-sdk-go v1.40.49 github.com/cespare/xxhash/v2 v2.1.2 diff --git a/go.sum b/go.sum index 7efbddfb9..4033eb9ed 100644 --- a/go.sum +++ b/go.sum @@ -110,8 +110,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= github.com/VictoriaMetrics/metrics v1.18.0 h1:vov5NxDHRSXFbdiH4dYLYEjKLoAXXSQ7hcnG8TSD9JQ= github.com/VictoriaMetrics/metrics v1.18.0/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= -github.com/VictoriaMetrics/metricsql v0.25.0 h1:7bjs82RYWpO/myqfXLZIIn7VMWwWd9qPgiBhsIG7c8s= -github.com/VictoriaMetrics/metricsql v0.25.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8= +github.com/VictoriaMetrics/metricsql v0.26.0 h1:lJBRn9vn9kst7hfNzSsQorulzNYQtX7JxWWWxh/udfI= +github.com/VictoriaMetrics/metricsql v0.26.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/aggr.go b/vendor/github.com/VictoriaMetrics/metricsql/aggr.go index 7d6d7dceb..68e9c47d3 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/aggr.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/aggr.go @@ -30,10 +30,12 @@ var aggrFuncs = map[string]bool{ "topk_max": true, "topk_avg": true, "topk_median": true, + "topk_last": true, "bottomk_min": true, "bottomk_max": true, "bottomk_avg": true, "bottomk_median": true, + "bottomk_last": true, "any": true, "mad": true, "outliers_mad": true, diff --git a/vendor/modules.txt b/vendor/modules.txt index 0876e2ec9..5f301feb3 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -22,7 +22,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.18.0 ## explicit github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.25.0 +# github.com/VictoriaMetrics/metricsql v0.26.0 ## explicit github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop