app/vmselect/promql: add topk_last and bottomk_last functions

This commit is contained in:
Aliaksandr Valialkin 2021-09-30 13:22:52 +03:00
parent a31407006c
commit 0e3de5a0cc
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
9 changed files with 67 additions and 14 deletions

View file

@ -39,10 +39,12 @@ var aggrFuncs = map[string]aggrFunc{
"topk_max": newAggrFuncRangeTopK(maxValue, false),
"topk_avg": newAggrFuncRangeTopK(avgValue, false),
"topk_median": newAggrFuncRangeTopK(medianValue, false),
"topk_last": newAggrFuncRangeTopK(lastValue, false),
"bottomk_min": newAggrFuncRangeTopK(minValue, true),
"bottomk_max": newAggrFuncRangeTopK(maxValue, true),
"bottomk_avg": newAggrFuncRangeTopK(avgValue, true),
"bottomk_median": newAggrFuncRangeTopK(medianValue, true),
"bottomk_last": newAggrFuncRangeTopK(lastValue, true),
"any": aggrFuncAny,
"mad": newAggrFunc(aggrFuncMAD),
"outliers_mad": aggrFuncOutliersMAD,
@ -803,6 +805,14 @@ func medianValue(values []float64) float64 {
return quantile(0.5, values)
}
func lastValue(values []float64) float64 {
values = skipTrailingNaNs(values)
if len(values) == 0 {
return nan
}
return values[len(values)-1]
}
// quantiles calculates the given phis from originValues without modifying originValues, appends them to qs and returns the result.
func quantiles(qs, phis []float64, originValues []float64) []float64 {
a := getFloat64s()

View file

@ -81,8 +81,8 @@ func maySortResults(e metricsql.Expr, tss []*timeseries) bool {
case *metricsql.AggrFuncExpr:
switch strings.ToLower(v.Name) {
case "topk", "bottomk", "outliersk",
"topk_max", "topk_min", "topk_avg", "topk_median",
"bottomk_max", "bottomk_min", "bottomk_avg", "bottomk_median":
"topk_max", "topk_min", "topk_avg", "topk_median", "topk_last",
"bottomk_max", "bottomk_min", "bottomk_avg", "bottomk_median", "bottomk_last":
return false
}
}

View file

@ -5299,6 +5299,21 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`topk_last(1)`, func(t *testing.T) {
t.Parallel()
q := `sort(topk_last(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10.666666666666666, 12, 13.333333333333334},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("baz"),
Value: []byte("sss"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`bottomk_median(1)`, func(t *testing.T) {
t.Parallel()
q := `sort(bottomk_median(1, label_set(10, "foo", "bar") or label_set(time()/15, "baz", "sss")))`
@ -5314,6 +5329,21 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`bottomk_last(1)`, func(t *testing.T) {
t.Parallel()
q := `sort(bottomk_last(1, label_set(10, "foo", "bar") or label_set(time()/15, "baz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`topk(1, nan_timeseries)`, func(t *testing.T) {
t.Parallel()
q := `topk(1, label_set(NaN, "foo", "bar") or label_set(time()/150, "baz", "sss")) default 0`
@ -7191,12 +7221,14 @@ func TestExecError(t *testing.T) {
f(`topk_max()`)
f(`topk_avg()`)
f(`topk_median()`)
f(`topk_last()`)
f(`limitk()`)
f(`bottomk()`)
f(`bottomk_min()`)
f(`bottomk_max()`)
f(`bottomk_avg()`)
f(`bottomk_median()`)
f(`bottomk_last()`)
f(`time(123)`)
f(`start(1)`)
f(`end(1)`)

View file

@ -10,6 +10,7 @@ sort: 15
* FEATURE: vmagent [enterprise](https://victoriametrics.com/enterprise.html): add support for data reading from [Apache Kafka](https://kafka.apache.org/). See [these docs](https://docs.victoriametrics.com/vmagent.html#kafka-integration).
* FEATURE: calculate quantiles in the same way as Prometheus does in such functions as [quantile_over_time](https://docs.victoriametrics.com/MetricsQL.html#quantile_over_time) and [quantile](https://docs.victoriametrics.com/MetricsQL.html#quantile). Previously results from VictoriaMetrics could be slightly different than results from Prometheus. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1625) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1612) issues.
* FEATURE: add `rollup_scrape_interval(m[d])` function to [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which returns `min`, `max` and `avg` values for the interval between samples for `m` on the given lookbehind window `d`.
* FEATURE: add `topk_last(k, q)` and `bottomk_last(k, q)` functions to [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html), which return up to `k` time series from `q` with the maximum / minimum last value on the graph.
* FEATURE: vmui: add ability to naturally scroll and zoom graphs. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1634).
* BUGFIX: align behavior of the queries `a or on (labels) b`, `a and on (labels) b` and `a unless on (labels) b` where `b` has multiple time series with the given `labels` to Prometheus behavior. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1643).

View file

@ -718,19 +718,23 @@ See also [implicit query conversions](#implicit-query-conversions).
#### bottomk_avg
`bottomk_avg(k, q, "other_label=other_value")` returns up to `k` time series with the smallest averages. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_avg(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest averages plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_avg](#topk_avg).
`bottomk_avg(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the smallest averages. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_avg(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest averages plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_avg](#topk_avg).
#### bottomk_last
`bottomk_last(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the smallest last values. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest maximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_last](#topk_last).
#### bottomk_max
`bottomk_max(k, q, "other_label=other_value")` returns up to `k` time series with the smallest maximums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest maximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_max](#topk_max).
`bottomk_max(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the smallest maximums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest maximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_max](#topk_max).
#### bottomk_median
`bottomk_median(k, q, "other_label=other_value")` returns up to `k` time series with the smallest medians. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_median(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest medians plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_median](#topk_median).
`bottomk_median(k, q, "other_label=other_value")` returns up to `k` time series from `q with the smallest medians. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_median(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest medians plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_median](#topk_median).
#### bottomk_min
`bottomk_min(k, q, "other_label=other_value")` returns up to `k` time series with the smallest minimums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_min(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest minimums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_min](#topk_min).
`bottomk_min(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the smallest minimums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `bottomk_min(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the smallest minimums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [topk_min](#topk_min).
#### count
@ -818,19 +822,23 @@ See also [implicit query conversions](#implicit-query-conversions).
#### topk_avg
`topk_avg(k, q, "other_label=other_value")` returns up to `k` time series with the biggest averages. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_avg(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest averages plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_avg](#bottomk_avg).
`topk_avg(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest averages. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_avg(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest averages plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_avg](#bottomk_avg).
#### topk_last
`topk_last(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest last values. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest amaximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_last](#bottomk_last).
#### topk_max
`topk_max(k, q, "other_label=other_value")` returns up to `k` time series with the biggest maximums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest amaximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_max](#bottomk_max).
`topk_max(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest maximums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_max(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest amaximums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_max](#bottomk_max).
#### topk_median
`topk_median(k, q, "other_label=other_value")` returns up to `k` time series with the biggest medians. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_median(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest medians plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_median](#bottomk_median).
`topk_median(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest medians. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_median(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest medians plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_median](#bottomk_median).
#### topk_min
`topk_min(k, q, "other_label=other_value")` returns up to `k` time series with the biggest minimums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_min(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest minimums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_min](#bottomk_min).
`topk_min(k, q, "other_label=other_value")` returns up to `k` time series from `q` with the biggest minimums. If an optional `other_label=other_value` arg is set, then the sum of the remaining time series is returned with the given label. For example, `topk_min(3, sum(process_resident_memory_bytes) by (job), "job=other")` would return up to 3 time series with the biggest minimums plus a time series with `{job="other"}` label with the sum of the remaining series if any. See also [bottomk_min](#bottomk_min).
#### zscore

2
go.mod
View file

@ -9,7 +9,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.18.0
github.com/VictoriaMetrics/metricsql v0.25.0
github.com/VictoriaMetrics/metricsql v0.26.0
github.com/VividCortex/ewma v1.2.0 // indirect
github.com/aws/aws-sdk-go v1.40.49
github.com/cespare/xxhash/v2 v2.1.2

4
go.sum
View file

@ -110,8 +110,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR
github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE=
github.com/VictoriaMetrics/metrics v1.18.0 h1:vov5NxDHRSXFbdiH4dYLYEjKLoAXXSQ7hcnG8TSD9JQ=
github.com/VictoriaMetrics/metrics v1.18.0/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metricsql v0.25.0 h1:7bjs82RYWpO/myqfXLZIIn7VMWwWd9qPgiBhsIG7c8s=
github.com/VictoriaMetrics/metricsql v0.25.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8=
github.com/VictoriaMetrics/metricsql v0.26.0 h1:lJBRn9vn9kst7hfNzSsQorulzNYQtX7JxWWWxh/udfI=
github.com/VictoriaMetrics/metricsql v0.26.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=

View file

@ -30,10 +30,12 @@ var aggrFuncs = map[string]bool{
"topk_max": true,
"topk_avg": true,
"topk_median": true,
"topk_last": true,
"bottomk_min": true,
"bottomk_max": true,
"bottomk_avg": true,
"bottomk_median": true,
"bottomk_last": true,
"any": true,
"mad": true,
"outliers_mad": true,

2
vendor/modules.txt vendored
View file

@ -22,7 +22,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.18.0
## explicit
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.25.0
# github.com/VictoriaMetrics/metricsql v0.26.0
## explicit
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop