diff --git a/app/vmselect/promql/aggr.go b/app/vmselect/promql/aggr.go index d432e5806..802411ef0 100644 --- a/app/vmselect/promql/aggr.go +++ b/app/vmselect/promql/aggr.go @@ -40,6 +40,7 @@ var aggrFuncs = map[string]aggrFunc{ "outliersk": aggrFuncOutliersK, "quantile": aggrFuncQuantile, "quantiles": aggrFuncQuantiles, + "share": aggrFuncShare, "stddev": newAggrFunc(aggrFuncStddev), "stdvar": newAggrFunc(aggrFuncStdvar), "sum": newAggrFunc(aggrFuncSum), @@ -455,6 +456,37 @@ func aggrFuncMode(tss []*timeseries) []*timeseries { return tss[:1] } +func aggrFuncShare(afa *aggrFuncArg) ([]*timeseries, error) { + tss, err := getAggrTimeseries(afa.args) + if err != nil { + return nil, err + } + afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries { + for i := range tss[0].Values { + // Calculate sum for non-negative points at position i. + var sum float64 + for _, ts := range tss { + v := ts.Values[i] + if math.IsNaN(v) || v < 0 { + continue + } + sum += v + } + // Divide every non-negative value at poisition i by sum in order to get its' share. + for _, ts := range tss { + v := ts.Values[i] + if math.IsNaN(v) || v < 0 { + ts.Values[i] = nan + } else { + ts.Values[i] = v / sum + } + } + } + return tss + } + return aggrFuncExt(afe, tss, &afa.ae.Modifier, afa.ae.Limit, true) +} + func aggrFuncZScore(afa *aggrFuncArg) ([]*timeseries, error) { tss, err := getAggrTimeseries(afa.args) if err != nil { @@ -491,10 +523,6 @@ func aggrFuncZScore(afa *aggrFuncArg) ([]*timeseries, error) { ts.Values[i] = (v - avg) / stddev } } - // Remove MetricGroup from all the tss. - for _, ts := range tss { - ts.MetricName.ResetMetricGroup() - } return tss } return aggrFuncExt(afe, tss, &afa.ae.Modifier, afa.ae.Limit, true) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 55d2bcb86..c8ff5e8d1 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -4795,6 +4795,85 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`share()`, func(t *testing.T) { + t.Parallel() + q := `sort_by_label(round(share(( + label_set(time()/100+10, "k", "v1"), + label_set(time()/200+5, "k", "v2"), + label_set(time()/110-10, "k", "v3"), + label_set(time()/90-5, "k", "v4"), + )), 0.001), "k")` + r1 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.554, 0.521, 0.487, 0.462, 0.442, 0.426}, + Timestamps: timestampsExpected, + } + r1.MetricName.Tags = []storage.Tag{{ + Key: []byte("k"), + Value: []byte("v1"), + }} + r2 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.277, 0.26, 0.243, 0.231, 0.221, 0.213}, + Timestamps: timestampsExpected, + } + r2.MetricName.Tags = []storage.Tag{{ + Key: []byte("k"), + Value: []byte("v2"), + }} + r3 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{nan, 0.022, 0.055, 0.081, 0.1, 0.116}, + Timestamps: timestampsExpected, + } + r3.MetricName.Tags = []storage.Tag{{ + Key: []byte("k"), + Value: []byte("v3"), + }} + r4 := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.169, 0.197, 0.214, 0.227, 0.237, 0.245}, + Timestamps: timestampsExpected, + } + r4.MetricName.Tags = []storage.Tag{{ + Key: []byte("k"), + Value: []byte("v4"), + }} + resultExpected := []netstorage.Result{r1, r2, r3, r4} + f(q, resultExpected) + }) + t.Run(`sum(share())`, func(t *testing.T) { + t.Parallel() + q := `round(sum(share(( + label_set(time()/100+10, "k", "v1"), + label_set(time()/200+5, "k", "v2"), + label_set(time()/110-10, "k", "v3"), + label_set(time()/90-5, "k", "v4"), + ))), 0.001)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{1, 1, 1, 1, 1, 1}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`sum(share() by (k))`, func(t *testing.T) { + t.Parallel() + q := `round(sum(share(( + label_set(time()/100+10, "k", "v1"), + label_set(time()/200+5, "k", "v2", "a", "b"), + label_set(time()/110-10, "k", "v1", "a", "b"), + label_set(time()/90-5, "k", "v2"), + )) by (k)), 0.001)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{2, 2, 2, 2, 2, 2}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`zscore()`, func(t *testing.T) { t.Parallel() q := `sort_by_label(round(zscore(( @@ -8421,6 +8500,7 @@ func TestExecError(t *testing.T) { f(`rate_over_sum()`) f(`zscore_over_time()`) f(`mode()`) + f(`share()`) f(`zscore()`) f(`prometheus_buckets()`) f(`buckets_limit()`) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 4b8b0bf24..6ba1a38f8 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -22,6 +22,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): automatically retry failing `GET` requests on all [the configured backends](https://docs.victoriametrics.com/vmauth.html#load-balancing). Previously the backend error has been immediately returned to the client without retrying the request on the remaining backends. * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): choose the backend with the minimum number of concurrently executed requests [among the configured backends](https://docs.victoriametrics.com/vmauth.html#load-balancing) in a round-robin manner for serving the incoming requests. This allows spreading the load among backends more evenly, while improving the response time. * FEATURE: [vmalert enterprise](https://docs.victoriametrics.com/vmalert.html): add ability to read alerting and recording rules from S3, GCS or S3-compatible object storage. See [these docs](https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage). +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [share(q)](https://docs.victoriametrics.com/MetricsQL.html#share) aggregate function. * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `mad_over_time(m[d])` function for calculating the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) over raw samples on the lookbehind window `d`. See [this feature request](https://github.com/prometheus/prometheus/issues/5514). * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_mad(q)` function for calculating the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) over points per each time series returned by `q`. * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_zscore(q)` function for calculating [z-score](https://en.wikipedia.org/wiki/Standard_score) over points per each time series returned from `q`. diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 34e82d5d5..bfd47dbd5 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -1247,6 +1247,8 @@ See also [mad](#mad) and [mad_over_time](#mad_over_time). `range_normalize(q1, ...)` is a [transform function](#transform-functions), which normalizes values for time series returned by `q1, ...` into `[0 ... 1]` range. This function is useful for correlating time series with distinct value ranges. +See also [share](#share). + #### range_quantile `range_quantile(phi, q)` is a [transform function](#transform-functions), which returns `phi`-quantile across points per each time series returned by `q`. @@ -1818,6 +1820,24 @@ The aggregate is calculated individually per each group of points with the same See also [quantile](#quantile). +#### share + +`share(q) by (group_labels)` is [aggregate function](#aggregate-functions), which returns shares in the range `[0..1]` +for every non-negative points returned by `q` per each timestamp, so the sum of shares per each `group_labels` equals 1. + +This function is useful for normalizing [histogram bucket](https://docs.victoriametrics.com/keyConcepts.html#histogram) shares +into `[0..1]` range: + +```metricsql +share( + sum( + rate(http_request_duration_seconds_bucket[5m]) + ) by (le, vmrange) +) +``` + +See also [range_normalize](#range_normalize). + #### stddev `stddev(q) by (group_labels)` is [aggregate function](#aggregate-functions), which calculates standard deviation per each `group_labels` diff --git a/go.mod b/go.mod index 340510374..fb162011a 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/metrics v1.23.1 - github.com/VictoriaMetrics/metricsql v0.55.0 + github.com/VictoriaMetrics/metricsql v0.56.1 github.com/aws/aws-sdk-go-v2 v1.17.4 github.com/aws/aws-sdk-go-v2/config v1.18.13 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.53 diff --git a/go.sum b/go.sum index a3fc01392..2136b23f8 100644 --- a/go.sum +++ b/go.sum @@ -69,8 +69,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= github.com/VictoriaMetrics/metrics v1.23.1 h1:/j8DzeJBxSpL2qSIdqnRFLvQQhbJyJbbEi22yMm7oL0= github.com/VictoriaMetrics/metrics v1.23.1/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc= -github.com/VictoriaMetrics/metricsql v0.55.0 h1:GZMZ1dUKPMhKsSPtVTRHfMChwRZ4KrXBxnSQgr3mjSg= -github.com/VictoriaMetrics/metricsql v0.55.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= +github.com/VictoriaMetrics/metricsql v0.56.1 h1:j+W4fA/gtozsZb4PlKDU0Ma2VOgl88xla4FEf29w94g= +github.com/VictoriaMetrics/metricsql v0.56.1/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/aggr.go b/vendor/github.com/VictoriaMetrics/metricsql/aggr.go index 5dfe5ae79..b8c77c349 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/aggr.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/aggr.go @@ -29,6 +29,7 @@ var aggrFuncs = map[string]bool{ "outliersk": true, "quantile": true, "quantiles": true, + "share": true, "stddev": true, "stdvar": true, "sum": true, diff --git a/vendor/modules.txt b/vendor/modules.txt index a4823bf82..ce27af556 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -71,7 +71,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.23.1 ## explicit; go 1.15 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.55.0 +# github.com/VictoriaMetrics/metricsql v0.56.1 ## explicit; go 1.13 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop