app/vmselect/promql: add count_values_over_time() MetricsQL function

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5847
This commit is contained in:
Aliaksandr Valialkin 2024-02-22 13:39:07 +02:00
parent a6eacfdb11
commit f7c3dee1c3
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
8 changed files with 134 additions and 12 deletions

View file

@ -5822,6 +5822,57 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`count_values_over_time`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label(count_values_over_time(round(label_set(rand(0), "x", "y"), 0.4)[200s:5s], "foo"), "foo")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{4, 8, 7, 6, 10, 9},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("0"),
},
{
Key: []byte("x"),
Value: []byte("y"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{20, 13, 19, 18, 14, 13},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("0.4"),
},
{
Key: []byte("x"),
Value: []byte("y"),
},
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{16, 19, 14, 16, 16, 18},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("0.8"),
},
{
Key: []byte("x"),
Value: []byte("y"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected)
})
t.Run(`histogram_over_time`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]), "vmrange")`

View file

@ -4,12 +4,14 @@ import (
"flag"
"fmt"
"math"
"strconv"
"strings"
"sync"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/metricsql"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
@ -31,6 +33,7 @@ var rollupFuncs = map[string]newRollupFunc{
"count_le_over_time": newRollupCountLE,
"count_ne_over_time": newRollupCountNE,
"count_over_time": newRollupFuncOneArg(rollupCount),
"count_values_over_time": newRollupCountValues,
"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
"default_rollup": newRollupFuncOneArg(rollupDefault), // default rollup func
"delta": newRollupFuncOneArg(rollupDelta),
@ -609,7 +612,7 @@ type timeseriesMap struct {
func newTimeseriesMap(funcName string, keepMetricNames bool, sharedTimestamps []int64, mnSrc *storage.MetricName) *timeseriesMap {
funcName = strings.ToLower(funcName)
switch funcName {
case "histogram_over_time", "quantiles_over_time":
case "histogram_over_time", "quantiles_over_time", "count_values_over_time":
default:
return nil
}
@ -643,10 +646,16 @@ func (tsm *timeseriesMap) GetOrCreateTimeseries(labelName, labelValue string) *t
if ts != nil {
return ts
}
// Make a clone of labelValue in order to use it as map key, since it may point to unsafe string,
// which refers some other byte slice, which can change in the future.
labelValue = strings.Clone(labelValue)
ts = &timeseries{}
ts.CopyFromShallowTimestamps(tsm.origin)
ts.MetricName.RemoveTag(labelName)
ts.MetricName.AddTag(labelName, labelValue)
tsm.m[labelValue] = ts
return ts
}
@ -1436,6 +1445,42 @@ func mad(values []float64) float64 {
return v
}
func newRollupCountValues(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err
}
tssLabelNum, ok := args[1].([]*timeseries)
if !ok {
return nil, fmt.Errorf(`unexpected type for labelName arg; got %T; want %T`, args[1], tssLabelNum)
}
labelName, err := getString(tssLabelNum, 1)
if err != nil {
return nil, fmt.Errorf("cannot get labelName: %w", err)
}
f := func(rfa *rollupFuncArg) float64 {
tsm := rfa.tsm
idx := rfa.idx
bb := bbPool.Get()
// Note: the code below may create very big number of time series
// if the number of unique values in rfa.values is big.
for _, v := range rfa.values {
bb.B = strconv.AppendFloat(bb.B[:0], v, 'g', -1, 64)
labelValue := bytesutil.ToUnsafeString(bb.B)
ts := tsm.GetOrCreateTimeseries(labelName, labelValue)
count := ts.Values[idx]
if math.IsNaN(count) {
count = 1
} else {
count++
}
ts.Values[idx] = count
}
bbPool.Put(bb)
return nan
}
return f, nil
}
func rollupHistogram(rfa *rollupFuncArg) float64 {
values := rfa.values
tsm := rfa.tsm

View file

@ -31,6 +31,7 @@ See also [LTS releases](https://docs.victoriametrics.com/LTS-releases.html).
## tip
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): propagate [label filters](https://docs.victoriametrics.com/keyconcepts/#filtering) via all the [label manipulation functions](https://docs.victoriametrics.com/metricsql/#label-manipulation-functions). For example, `label_del(some_metric{job="foo"}, "instance") + other_metric{pod="bar"}` is now transformed to `label_del(some_metric{job="foo",pod="bar"}, "instance") + other_metric{job="foo",pod="bar"}`. This should reduce the amounts of time series processed during query execution.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [count_values_over_time](https://docs.victoriametrics.com/MetricsQL.html#count_values_over_time) function. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5847).
* FEATURE: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): expose `vm_last_partition_parts` [metrics](https://docs.victoriametrics.com/#monitoring), which show the number of [parts in the latest partition](https://docs.victoriametrics.com/#storage). These metrics may help debugging query performance slowdown related to the increased number of parts in the last partition, since usually all the ingested data is written to the last partition and all the queries are performed over the recently ingested data, e.g. the last partition.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for `client_id` option into [kuma_sd_configs](https://docs.victoriametrics.com/sd_configs/#kuma_sd_configs) in the same way as Prometheus does. See [this pull request](https://github.com/prometheus/prometheus/pull/13278).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for `enable_compression` option in [scrape_configs](https://docs.victoriametrics.com/sd_configs/#scrape_configs) in order to be compatible with Prometheus scrape configs. See [this pull request](https://github.com/prometheus/prometheus/pull/13166) and [this feature request](https://github.com/prometheus/prometheus/issues/12319). Note that `vmagent` was always supporting [`disable_compression` option](https://docs.victoriametrics.com/vmagent/#scrape_config-enhancements) before Prometheus added `enable_compression` option.

View file

@ -252,7 +252,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
See also [count_over_time](#count_over_time) and [share_eq_over_time](#share_eq_over_time).
See also [count_over_time](#count_over_time), [share_eq_over_time](#share_eq_over_time) and [count_values_over_time](#count_values_over_time).
#### count_gt_over_time
@ -294,6 +294,16 @@ Metric names are stripped from the resulting rollups. Add [keep_metric_names](#k
This function is supported by PromQL. See also [count_le_over_time](#count_le_over_time), [count_gt_over_time](#count_gt_over_time),
[count_eq_over_time](#count_eq_over_time) and [count_ne_over_time](#count_ne_over_time).
#### count_values_over_time
`count_values_over_time("label", series_selector[d])` is a [rollup function](#rollup-functions), which counts the number of raw samples
with the same value over the given lookbehind window and stores the counts in a time series with an additional `label`, which contains each initial value.
The results are calculated independently per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
See also [count_eq_over_time](#count_eq_over_time), [count_values](#count_values) and [distinct_over_time](#distinct_over_time).
#### decreases_over_time
`decreases_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates the number of raw sample value decreases
@ -378,6 +388,8 @@ on the given lookbehind window `d` per each time series returned from the given
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
See also [count_values_over_time](#count_values_over_time).
#### duration_over_time
`duration_over_time(series_selector[d], max_interval)` is a [rollup function](#rollup-functions), which returns the duration in seconds
@ -671,6 +683,7 @@ on the given lookbehind window `d` and returns them in time series with `rollup=
These values are calculated individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label.
See also [label_match](#label_match).
#### rollup_candlestick
@ -679,7 +692,8 @@ over raw samples on the given lookbehind window `d` and returns them in time ser
The calculations are performed individually per each time series returned
from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). This function is useful for financial applications.
Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label.
Optional 2nd argument `"open"`, `"high"` or `"low"` or `"close"` can be passed to keep only one calculation result and without adding a label.
See also [label_match](#label_match).
#### rollup_delta
@ -689,6 +703,7 @@ and returns them in time series with `rollup="min"`, `rollup="max"` and `rollup=
The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label.
See also [label_match](#label_match).
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
@ -702,6 +717,7 @@ and returns them in time series with `rollup="min"`, `rollup="max"` and `rollup=
The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label.
See also [label_match](#label_match).
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
@ -713,6 +729,7 @@ and returns them in time series with `rollup="min"`, `rollup="max"` and `rollup=
The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label.
See also [label_match](#label_match).
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. See also [rollup_delta](#rollup_delta).
@ -726,10 +743,10 @@ See [this article](https://valyala.medium.com/why-irate-from-prometheus-doesnt-c
when to use `rollup_rate()`.
Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label.
See also [label_match](#label_match).
The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
#### rollup_scrape_interval
@ -740,6 +757,7 @@ and returns them in time series with `rollup="min"`, `rollup="max"` and `rollup=
The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label.
See also [label_match](#label_match).
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. See also [scrape_interval](#scrape_interval).
@ -863,7 +881,7 @@ on the given lookbehind window `d` per each time series returned from the given
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
This function is supported by PromQL. See also [timestamp_with_name](#timestamp_with_name).
This function is supported by PromQL. See also [time](#time) and [now](#now).
#### timestamp_with_name
@ -872,7 +890,7 @@ on the given lookbehind window `d` per each time series returned from the given
Metric names are preserved in the resulting rollups.
See also [timestamp](#timestamp).
See also [timestamp](#timestamp) and [keep_metric_names](#keep_metric_names) modifier.
#### tfirst_over_time
@ -1550,7 +1568,7 @@ See also [start](#start) and [end](#end).
`time()` is a [transform function](#transform-functions), which returns unix timestamp for every returned point.
This function is supported by PromQL. See also [now](#now), [start](#start) and [end](#end).
This function is supported by PromQL. See also [timestamp](#timestamp), [now](#now), [start](#start) and [end](#end).
#### timezone_offset
@ -1866,10 +1884,14 @@ The aggregate is calculated individually per each group of points with the same
This function is supported by PromQL.
See also [count_values_over_time](#count_values_over_time).
#### distinct
`distinct(q)` is [aggregate function](#aggregate-functions), which calculates the number of unique values per each group of points with the same timestamp.
See also [distinct_over_time](#distinct_over_time).
#### geomean
`geomean(q)` is [aggregate function](#aggregate-functions), which calculates geometric mean per each group of points with the same timestamp.
@ -1961,7 +1983,9 @@ See also [outliers_iqr](#outliers_iqr) and [outliers_mad](#outliers_mad).
for all the time series returned by `q`. `phi` must be in the range `[0...1]`.
The aggregate is calculated individually per each group of points with the same timestamp.
This function is supported by PromQL. See also [quantiles](#quantiles) and [histogram_quantile](#histogram_quantile).
This function is supported by PromQL.
See also [quantiles](#quantiles) and [histogram_quantile](#histogram_quantile).
#### quantiles

2
go.mod
View file

@ -9,7 +9,7 @@ require (
github.com/VictoriaMetrics/easyproto v0.1.4
github.com/VictoriaMetrics/fastcache v1.12.2
github.com/VictoriaMetrics/metrics v1.32.0
github.com/VictoriaMetrics/metricsql v0.73.0
github.com/VictoriaMetrics/metricsql v0.74.0
github.com/aws/aws-sdk-go-v2 v1.25.0
github.com/aws/aws-sdk-go-v2/config v1.27.0
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.16.0

4
go.sum
View file

@ -71,8 +71,8 @@ github.com/VictoriaMetrics/fastcache v1.12.2/go.mod h1:AmC+Nzz1+3G2eCPapF6UcsnkT
github.com/VictoriaMetrics/metrics v1.24.0/go.mod h1:eFT25kvsTidQFHb6U0oa0rTrDRdz4xTYjpL8+UPohys=
github.com/VictoriaMetrics/metrics v1.32.0 h1:r9JK2zndYv0TIxFXLEHwhQqRdnu8/O3cwJiCBX4vJCM=
github.com/VictoriaMetrics/metrics v1.32.0/go.mod h1:r7hveu6xMdUACXvB8TYdAj8WEsKzWB0EkpJN+RDtOf8=
github.com/VictoriaMetrics/metricsql v0.73.0 h1:MvYnUIZHWD+Kj+sKuBSI1asR1fw1BxQPGshs32C7FIk=
github.com/VictoriaMetrics/metricsql v0.73.0/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I=
github.com/VictoriaMetrics/metricsql v0.74.0 h1:bVO7USXBBYEuEHQ3PZg/6216j0DvblZM+Q8sTRECkv0=
github.com/VictoriaMetrics/metricsql v0.74.0/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=

View file

@ -16,6 +16,7 @@ var rollupFuncs = map[string]bool{
"count_le_over_time": true,
"count_ne_over_time": true,
"count_over_time": true,
"count_values_over_time": true,
"decreases_over_time": true,
"default_rollup": true,
"delta": true,

2
vendor/modules.txt vendored
View file

@ -102,7 +102,7 @@ github.com/VictoriaMetrics/fastcache
# github.com/VictoriaMetrics/metrics v1.32.0
## explicit; go 1.17
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.73.0
# github.com/VictoriaMetrics/metricsql v0.74.0
## explicit; go 1.13
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop