From f7c3dee1c3808ea298fa3ff621069432e0717d83 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 22 Feb 2024 13:39:07 +0200 Subject: [PATCH] app/vmselect/promql: add `count_values_over_time()` MetricsQL function See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5847 --- app/vmselect/promql/exec_test.go | 51 +++++++++++++++++++ app/vmselect/promql/rollup.go | 47 ++++++++++++++++- docs/CHANGELOG.md | 1 + docs/MetricsQL.md | 38 +++++++++++--- go.mod | 2 +- go.sum | 4 +- .../VictoriaMetrics/metricsql/rollup.go | 1 + vendor/modules.txt | 2 +- 8 files changed, 134 insertions(+), 12 deletions(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 9f2dfc186..472f7d210 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -5822,6 +5822,57 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r1, r2} f(q, resultExpected) }) + t.Run(`count_values_over_time`, func(t *testing.T) { + t.Parallel() + q := `sort_by_label(count_values_over_time(round(label_set(rand(0), "x", "y"), 0.4)[200s:5s], "foo"), "foo")` + r1 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{4, 8, 7, 6, 10, 9}, + Timestamps: timestampsExpected, + } + r1.MetricName.Tags = []storage.Tag{ + { + Key: []byte("foo"), + Value: []byte("0"), + }, + { + Key: []byte("x"), + Value: []byte("y"), + }, + } + r2 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{20, 13, 19, 18, 14, 13}, + Timestamps: timestampsExpected, + } + r2.MetricName.Tags = []storage.Tag{ + { + Key: []byte("foo"), + Value: []byte("0.4"), + }, + { + Key: []byte("x"), + Value: []byte("y"), + }, + } + r3 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{16, 19, 14, 16, 16, 18}, + Timestamps: timestampsExpected, + } + r3.MetricName.Tags = []storage.Tag{ + { + Key: []byte("foo"), + Value: []byte("0.8"), + }, + { + Key: []byte("x"), + Value: []byte("y"), + }, + } + resultExpected := []netstorage.Result{r1, r2, r3} + f(q, resultExpected) + }) t.Run(`histogram_over_time`, func(t *testing.T) { t.Parallel() q := `sort_by_label(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]), "vmrange")` diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index 52b0a69e5..6e7c04a8a 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -4,12 +4,14 @@ import ( "flag" "fmt" "math" + "strconv" "strings" "sync" "github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metricsql" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" @@ -31,6 +33,7 @@ var rollupFuncs = map[string]newRollupFunc{ "count_le_over_time": newRollupCountLE, "count_ne_over_time": newRollupCountNE, "count_over_time": newRollupFuncOneArg(rollupCount), + "count_values_over_time": newRollupCountValues, "decreases_over_time": newRollupFuncOneArg(rollupDecreases), "default_rollup": newRollupFuncOneArg(rollupDefault), // default rollup func "delta": newRollupFuncOneArg(rollupDelta), @@ -609,7 +612,7 @@ type timeseriesMap struct { func newTimeseriesMap(funcName string, keepMetricNames bool, sharedTimestamps []int64, mnSrc *storage.MetricName) *timeseriesMap { funcName = strings.ToLower(funcName) switch funcName { - case "histogram_over_time", "quantiles_over_time": + case "histogram_over_time", "quantiles_over_time", "count_values_over_time": default: return nil } @@ -643,10 +646,16 @@ func (tsm *timeseriesMap) GetOrCreateTimeseries(labelName, labelValue string) *t if ts != nil { return ts } + + // Make a clone of labelValue in order to use it as map key, since it may point to unsafe string, + // which refers some other byte slice, which can change in the future. + labelValue = strings.Clone(labelValue) + ts = ×eries{} ts.CopyFromShallowTimestamps(tsm.origin) ts.MetricName.RemoveTag(labelName) ts.MetricName.AddTag(labelName, labelValue) + tsm.m[labelValue] = ts return ts } @@ -1436,6 +1445,42 @@ func mad(values []float64) float64 { return v } +func newRollupCountValues(args []interface{}) (rollupFunc, error) { + if err := expectRollupArgsNum(args, 2); err != nil { + return nil, err + } + tssLabelNum, ok := args[1].([]*timeseries) + if !ok { + return nil, fmt.Errorf(`unexpected type for labelName arg; got %T; want %T`, args[1], tssLabelNum) + } + labelName, err := getString(tssLabelNum, 1) + if err != nil { + return nil, fmt.Errorf("cannot get labelName: %w", err) + } + f := func(rfa *rollupFuncArg) float64 { + tsm := rfa.tsm + idx := rfa.idx + bb := bbPool.Get() + // Note: the code below may create very big number of time series + // if the number of unique values in rfa.values is big. + for _, v := range rfa.values { + bb.B = strconv.AppendFloat(bb.B[:0], v, 'g', -1, 64) + labelValue := bytesutil.ToUnsafeString(bb.B) + ts := tsm.GetOrCreateTimeseries(labelName, labelValue) + count := ts.Values[idx] + if math.IsNaN(count) { + count = 1 + } else { + count++ + } + ts.Values[idx] = count + } + bbPool.Put(bb) + return nan + } + return f, nil +} + func rollupHistogram(rfa *rollupFuncArg) float64 { values := rfa.values tsm := rfa.tsm diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 2b788d4d9..dea139352 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -31,6 +31,7 @@ See also [LTS releases](https://docs.victoriametrics.com/LTS-releases.html). ## tip * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): propagate [label filters](https://docs.victoriametrics.com/keyconcepts/#filtering) via all the [label manipulation functions](https://docs.victoriametrics.com/metricsql/#label-manipulation-functions). For example, `label_del(some_metric{job="foo"}, "instance") + other_metric{pod="bar"}` is now transformed to `label_del(some_metric{job="foo",pod="bar"}, "instance") + other_metric{job="foo",pod="bar"}`. This should reduce the amounts of time series processed during query execution. +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [count_values_over_time](https://docs.victoriametrics.com/MetricsQL.html#count_values_over_time) function. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5847). * FEATURE: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): expose `vm_last_partition_parts` [metrics](https://docs.victoriametrics.com/#monitoring), which show the number of [parts in the latest partition](https://docs.victoriametrics.com/#storage). These metrics may help debugging query performance slowdown related to the increased number of parts in the last partition, since usually all the ingested data is written to the last partition and all the queries are performed over the recently ingested data, e.g. the last partition. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for `client_id` option into [kuma_sd_configs](https://docs.victoriametrics.com/sd_configs/#kuma_sd_configs) in the same way as Prometheus does. See [this pull request](https://github.com/prometheus/prometheus/pull/13278). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for `enable_compression` option in [scrape_configs](https://docs.victoriametrics.com/sd_configs/#scrape_configs) in order to be compatible with Prometheus scrape configs. See [this pull request](https://github.com/prometheus/prometheus/pull/13166) and [this feature request](https://github.com/prometheus/prometheus/issues/12319). Note that `vmagent` was always supporting [`disable_compression` option](https://docs.victoriametrics.com/vmagent/#scrape_config-enhancements) before Prometheus added `enable_compression` option. diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 7bebe89b2..4a72d2d3f 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -252,7 +252,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. -See also [count_over_time](#count_over_time) and [share_eq_over_time](#share_eq_over_time). +See also [count_over_time](#count_over_time), [share_eq_over_time](#share_eq_over_time) and [count_values_over_time](#count_values_over_time). #### count_gt_over_time @@ -294,6 +294,16 @@ Metric names are stripped from the resulting rollups. Add [keep_metric_names](#k This function is supported by PromQL. See also [count_le_over_time](#count_le_over_time), [count_gt_over_time](#count_gt_over_time), [count_eq_over_time](#count_eq_over_time) and [count_ne_over_time](#count_ne_over_time). +#### count_values_over_time + +`count_values_over_time("label", series_selector[d])` is a [rollup function](#rollup-functions), which counts the number of raw samples +with the same value over the given lookbehind window and stores the counts in a time series with an additional `label`, which contains each initial value. +The results are calculated independently per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). + +Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. + +See also [count_eq_over_time](#count_eq_over_time), [count_values](#count_values) and [distinct_over_time](#distinct_over_time). + #### decreases_over_time `decreases_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates the number of raw sample value decreases @@ -378,6 +388,8 @@ on the given lookbehind window `d` per each time series returned from the given Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. +See also [count_values_over_time](#count_values_over_time). + #### duration_over_time `duration_over_time(series_selector[d], max_interval)` is a [rollup function](#rollup-functions), which returns the duration in seconds @@ -671,6 +683,7 @@ on the given lookbehind window `d` and returns them in time series with `rollup= These values are calculated individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label. +See also [label_match](#label_match). #### rollup_candlestick @@ -679,7 +692,8 @@ over raw samples on the given lookbehind window `d` and returns them in time ser The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). This function is useful for financial applications. -Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label. +Optional 2nd argument `"open"`, `"high"` or `"low"` or `"close"` can be passed to keep only one calculation result and without adding a label. +See also [label_match](#label_match). #### rollup_delta @@ -689,6 +703,7 @@ and returns them in time series with `rollup="min"`, `rollup="max"` and `rollup= The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label. +See also [label_match](#label_match). Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. @@ -702,6 +717,7 @@ and returns them in time series with `rollup="min"`, `rollup="max"` and `rollup= The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label. +See also [label_match](#label_match). Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. @@ -713,6 +729,7 @@ and returns them in time series with `rollup="min"`, `rollup="max"` and `rollup= The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label. +See also [label_match](#label_match). Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. See also [rollup_delta](#rollup_delta). @@ -726,10 +743,10 @@ See [this article](https://valyala.medium.com/why-irate-from-prometheus-doesnt-c when to use `rollup_rate()`. Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label. +See also [label_match](#label_match). The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). - Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. #### rollup_scrape_interval @@ -740,6 +757,7 @@ and returns them in time series with `rollup="min"`, `rollup="max"` and `rollup= The calculations are performed individually per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). Optional 2nd argument `"min"`, `"max"` or `"avg"` can be passed to keep only one calculation result and without adding a label. +See also [label_match](#label_match). Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. See also [scrape_interval](#scrape_interval). @@ -863,7 +881,7 @@ on the given lookbehind window `d` per each time series returned from the given Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. -This function is supported by PromQL. See also [timestamp_with_name](#timestamp_with_name). +This function is supported by PromQL. See also [time](#time) and [now](#now). #### timestamp_with_name @@ -872,7 +890,7 @@ on the given lookbehind window `d` per each time series returned from the given Metric names are preserved in the resulting rollups. -See also [timestamp](#timestamp). +See also [timestamp](#timestamp) and [keep_metric_names](#keep_metric_names) modifier. #### tfirst_over_time @@ -1550,7 +1568,7 @@ See also [start](#start) and [end](#end). `time()` is a [transform function](#transform-functions), which returns unix timestamp for every returned point. -This function is supported by PromQL. See also [now](#now), [start](#start) and [end](#end). +This function is supported by PromQL. See also [timestamp](#timestamp), [now](#now), [start](#start) and [end](#end). #### timezone_offset @@ -1866,10 +1884,14 @@ The aggregate is calculated individually per each group of points with the same This function is supported by PromQL. +See also [count_values_over_time](#count_values_over_time). + #### distinct `distinct(q)` is [aggregate function](#aggregate-functions), which calculates the number of unique values per each group of points with the same timestamp. +See also [distinct_over_time](#distinct_over_time). + #### geomean `geomean(q)` is [aggregate function](#aggregate-functions), which calculates geometric mean per each group of points with the same timestamp. @@ -1961,7 +1983,9 @@ See also [outliers_iqr](#outliers_iqr) and [outliers_mad](#outliers_mad). for all the time series returned by `q`. `phi` must be in the range `[0...1]`. The aggregate is calculated individually per each group of points with the same timestamp. -This function is supported by PromQL. See also [quantiles](#quantiles) and [histogram_quantile](#histogram_quantile). +This function is supported by PromQL. + +See also [quantiles](#quantiles) and [histogram_quantile](#histogram_quantile). #### quantiles diff --git a/go.mod b/go.mod index 96828789c..01a23707b 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/VictoriaMetrics/easyproto v0.1.4 github.com/VictoriaMetrics/fastcache v1.12.2 github.com/VictoriaMetrics/metrics v1.32.0 - github.com/VictoriaMetrics/metricsql v0.73.0 + github.com/VictoriaMetrics/metricsql v0.74.0 github.com/aws/aws-sdk-go-v2 v1.25.0 github.com/aws/aws-sdk-go-v2/config v1.27.0 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.16.0 diff --git a/go.sum b/go.sum index 8ea40ede8..d4874d507 100644 --- a/go.sum +++ b/go.sum @@ -71,8 +71,8 @@ github.com/VictoriaMetrics/fastcache v1.12.2/go.mod h1:AmC+Nzz1+3G2eCPapF6UcsnkT github.com/VictoriaMetrics/metrics v1.24.0/go.mod h1:eFT25kvsTidQFHb6U0oa0rTrDRdz4xTYjpL8+UPohys= github.com/VictoriaMetrics/metrics v1.32.0 h1:r9JK2zndYv0TIxFXLEHwhQqRdnu8/O3cwJiCBX4vJCM= github.com/VictoriaMetrics/metrics v1.32.0/go.mod h1:r7hveu6xMdUACXvB8TYdAj8WEsKzWB0EkpJN+RDtOf8= -github.com/VictoriaMetrics/metricsql v0.73.0 h1:MvYnUIZHWD+Kj+sKuBSI1asR1fw1BxQPGshs32C7FIk= -github.com/VictoriaMetrics/metricsql v0.73.0/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I= +github.com/VictoriaMetrics/metricsql v0.74.0 h1:bVO7USXBBYEuEHQ3PZg/6216j0DvblZM+Q8sTRECkv0= +github.com/VictoriaMetrics/metricsql v0.74.0/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/rollup.go b/vendor/github.com/VictoriaMetrics/metricsql/rollup.go index f650ee5e6..a7b5b9f57 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/rollup.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/rollup.go @@ -16,6 +16,7 @@ var rollupFuncs = map[string]bool{ "count_le_over_time": true, "count_ne_over_time": true, "count_over_time": true, + "count_values_over_time": true, "decreases_over_time": true, "default_rollup": true, "delta": true, diff --git a/vendor/modules.txt b/vendor/modules.txt index 6ea7db837..d183be4bf 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -102,7 +102,7 @@ github.com/VictoriaMetrics/fastcache # github.com/VictoriaMetrics/metrics v1.32.0 ## explicit; go 1.17 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.73.0 +# github.com/VictoriaMetrics/metricsql v0.74.0 ## explicit; go 1.13 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop