From a14053ffa07dab0a031974b47f721f2271367870 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 4 Mar 2021 14:12:07 +0200 Subject: [PATCH] app/vmselect/promql: add `histogram_avg()`, `histogram_stddev()` and `histogram_stdvar()` functions to MetricsQL --- app/vmselect/promql/exec_test.go | 66 ++++++++++ app/vmselect/promql/transform.go | 124 ++++++++++++++++++ docs/CHANGELOG.md | 4 + docs/MetricsQL.md | 5 +- go.mod | 2 +- go.sum | 4 +- .../VictoriaMetrics/metricsql/optimizer.go | 2 +- .../VictoriaMetrics/metricsql/transform.go | 3 + vendor/modules.txt | 2 +- 9 files changed, 206 insertions(+), 6 deletions(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 9a3664064..c50c64eea 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -2810,6 +2810,72 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`stdvar_over_time()`, func(t *testing.T) { + t.Parallel() + q := `round(stdvar_over_time(rand(0)[200s:5s]), 0.001)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.082, 0.088, 0.092, 0.075, 0.101, 0.08}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`histogram_stdvar()`, func(t *testing.T) { + t.Parallel() + q := `round(histogram_stdvar(histogram_over_time(rand(0)[200s:5s])), 0.001)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.079, 0.089, 0.089, 0.071, 0.1, 0.082}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`stddev_over_time()`, func(t *testing.T) { + t.Parallel() + q := `round(stddev_over_time(rand(0)[200s:5s]), 0.001)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.286, 0.297, 0.303, 0.274, 0.318, 0.283}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`histogram_stddev()`, func(t *testing.T) { + t.Parallel() + q := `round(histogram_stddev(histogram_over_time(rand(0)[200s:5s])), 0.001)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.281, 0.299, 0.298, 0.267, 0.316, 0.286}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`avg_over_time()`, func(t *testing.T) { + t.Parallel() + q := `round(avg_over_time(rand(0)[200s:5s]), 0.001)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.521, 0.518, 0.509, 0.544, 0.511, 0.504}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`histogram_avg()`, func(t *testing.T) { + t.Parallel() + q := `round(histogram_avg(histogram_over_time(rand(0)[200s:5s])), 0.001)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{0.519, 0.521, 0.503, 0.543, 0.511, 0.506}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`histogram_share(single-value-valid-le)`, func(t *testing.T) { t.Parallel() q := `histogram_share(80, label_set(100, "le", "200"))` diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index 714f3cf80..a0e0d23c0 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -118,6 +118,9 @@ var transformFuncs = map[string]transformFunc{ "prometheus_buckets": transformPrometheusBuckets, "buckets_limit": transformBucketsLimit, "histogram_share": transformHistogramShare, + "histogram_avg": transformHistogramAvg, + "histogram_stdvar": transformHistogramStdvar, + "histogram_stddev": transformHistogramStddev, "sort_by_label": newTransformFuncSortByLabel(false), "sort_by_label_desc": newTransformFuncSortByLabel(true), } @@ -657,6 +660,127 @@ func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) { return rvs, nil } +func transformHistogramAvg(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 1); err != nil { + return nil, err + } + tss := vmrangeBucketsToLE(args[0]) + m := groupLeTimeseries(tss) + rvs := make([]*timeseries, 0, len(m)) + for _, xss := range m { + sort.Slice(xss, func(i, j int) bool { + return xss[i].le < xss[j].le + }) + dst := xss[0].ts + for i := range dst.Values { + dst.Values[i] = avgForLeTimeseries(i, xss) + } + rvs = append(rvs, dst) + } + return rvs, nil +} + +func transformHistogramStddev(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 1); err != nil { + return nil, err + } + tss := vmrangeBucketsToLE(args[0]) + m := groupLeTimeseries(tss) + rvs := make([]*timeseries, 0, len(m)) + for _, xss := range m { + sort.Slice(xss, func(i, j int) bool { + return xss[i].le < xss[j].le + }) + dst := xss[0].ts + for i := range dst.Values { + v := stdvarForLeTimeseries(i, xss) + dst.Values[i] = math.Sqrt(v) + } + rvs = append(rvs, dst) + } + return rvs, nil +} + +func transformHistogramStdvar(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 1); err != nil { + return nil, err + } + tss := vmrangeBucketsToLE(args[0]) + m := groupLeTimeseries(tss) + rvs := make([]*timeseries, 0, len(m)) + for _, xss := range m { + sort.Slice(xss, func(i, j int) bool { + return xss[i].le < xss[j].le + }) + dst := xss[0].ts + for i := range dst.Values { + dst.Values[i] = stdvarForLeTimeseries(i, xss) + } + rvs = append(rvs, dst) + } + return rvs, nil +} + +func avgForLeTimeseries(i int, xss []leTimeseries) float64 { + lePrev := float64(0) + vPrev := float64(0) + sum := float64(0) + weightTotal := float64(0) + for _, xs := range xss { + if math.IsInf(xs.le, 0) { + continue + } + le := xs.le + n := (le + lePrev) / 2 + v := xs.ts.Values[i] + weight := v - vPrev + sum += n * weight + weightTotal += weight + lePrev = le + vPrev = v + } + if weightTotal == 0 { + return nan + } + return sum / weightTotal +} + +func stdvarForLeTimeseries(i int, xss []leTimeseries) float64 { + lePrev := float64(0) + vPrev := float64(0) + sum := float64(0) + sum2 := float64(0) + weightTotal := float64(0) + for _, xs := range xss { + if math.IsInf(xs.le, 0) { + continue + } + le := xs.le + n := (le + lePrev) / 2 + v := xs.ts.Values[i] + weight := v - vPrev + sum += n * weight + sum2 += n * n * weight + weightTotal += weight + lePrev = le + vPrev = v + } + if weightTotal == 0 { + return nan + } + avg := sum / weightTotal + avg2 := sum2 / weightTotal + stdvar := avg2 - avg*avg + if stdvar < 0 { + // Correct possible calculation error. + stdvar = 0 + } + return stdvar +} + func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) { args := tfa.args if len(args) < 2 || len(args) > 3 { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 2372889c8..2b2efeb0a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -2,6 +2,10 @@ # tip +* FEATURE: add the following functions to [MetricsQL](https://victoriametrics.github.io/MetricsQL.html): + - `histogram_avg(buckets)` - returns the average value for the given buckets. + - `histogram_stdvar(buckets)` - returns standard variance for the given buckets. + - `histogram_stddev(buckets)` - returns standard deviation for the given buckets. * FEATURE: vmagent: add ability to replicate scrape targets among `vmagent` instances in the cluster with `-promscrape.cluster.replicationFactor` command-line flag. See [these docs](https://victoriametrics.github.io/vmagent.html#scraping-big-number-of-targets). diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 15bf2638c..dbf8dfd4c 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -104,9 +104,12 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g - `histogram(q)` - calculates aggregate histogram over `q` time series for each point on the graph. See [this article](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) for more details. - `histogram_over_time(m[d])` - calculates [VictoriaMetrics histogram](https://godoc.org/github.com/VictoriaMetrics/metrics#Histogram) for `m` over `d`. For example, the following query calculates median temperature by country over the last 24 hours: - `histogram_quantile(0.5, sum(histogram_over_time(temperature[24h])) by (vmbucket, country))`. + `histogram_quantile(0.5, sum(histogram_over_time(temperature[24h])) by (vmrange,country))`. - `histogram_share(le, buckets)` - returns share (in the range 0..1) for `buckets` that fall below `le`. Useful for calculating SLI and SLO. For instance, the following query returns the share of requests which are performed under 1.5 seconds during the last 5 minutes: `histogram_share(1.5, sum(rate(request_duration_seconds_bucket[5m])) by (le))`. +- `histogram_avg(buckets)` - returns the average value for the given buckets. It can be used for calculating the average over the given time range across multiple time series. For exmple, `histogram_avg(sum(histogram_over_time(response_time_duration_seconds[5m])) by (vmrange,job))` would return the average response time per each `job` over the last 5 minutes. +- `histogram_stdvar(buckets)` - returns standard variance for the given buckets. It can be used for calculating standard deviation over the given time range across multiple time series. For example, `histogram_stdvar(sum(histogram_over_time(temperature[24])) by (vmrange,country))` would return standard deviation for the temperature per each country over the last 24 hours. +- `histogram_stddev(buckets)` - returns standard deviation for the given buckets. - `topk_*` and `bottomk_*` aggregate functions, which return up to K time series. Note that the standard `topk` function may return more than K time series - see [this article](https://www.robustperception.io/graph-top-n-time-series-in-grafana) for details. - `topk_min(k, q)` - returns top K time series with the max minimums on the given time range diff --git a/go.mod b/go.mod index 995af9a83..3809d9ecf 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.0.12 github.com/VictoriaMetrics/metrics v1.15.2 - github.com/VictoriaMetrics/metricsql v0.12.0 + github.com/VictoriaMetrics/metricsql v0.14.0 github.com/aws/aws-sdk-go v1.37.22 github.com/cespare/xxhash/v2 v2.1.1 github.com/cheggaaa/pb/v3 v3.0.6 diff --git a/go.sum b/go.sum index b6629e2cd..5af43090d 100644 --- a/go.sum +++ b/go.sum @@ -87,8 +87,8 @@ github.com/VictoriaMetrics/fasthttp v1.0.12/go.mod h1:3SeUL4zwB/p/a9aEeRc6gdlbrt github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= github.com/VictoriaMetrics/metrics v1.15.2 h1:w/GD8L9tm+gvx1oZvAofRRXwammiicdI0jgLghA2Gdo= github.com/VictoriaMetrics/metrics v1.15.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= -github.com/VictoriaMetrics/metricsql v0.12.0 h1:NMIu0MPBmGP34g4RUjI1U0xW5XYp7IVNXe9KtZI3PFQ= -github.com/VictoriaMetrics/metricsql v0.12.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8= +github.com/VictoriaMetrics/metricsql v0.14.0 h1:XGbpZJVskUPJFo2C7vG6ATxXBwkBFPe7EWZXB2HZt2U= +github.com/VictoriaMetrics/metricsql v0.14.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8= github.com/VividCortex/ewma v1.1.1 h1:MnEK4VOv6n0RSY4vtRe3h11qjxL3+t0B8yOL8iMXdcM= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go b/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go index 4f559f066..09afe6747 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go @@ -112,7 +112,7 @@ func getMetricExprForOptimization(e Expr) *MetricExpr { case "absent", "histogram_quantile", "label_join", "label_replace", "scalar", "vector", "label_set", "label_map", "label_uppercase", "label_lowercase", "label_del", "label_keep", "label_copy", "label_move", "label_transform", "label_value", "label_match", "label_mismatch", - "prometheus_buckets", "buckets_limit", "histogram_share", "union", "": + "prometheus_buckets", "buckets_limit", "histogram_share", "histogram_avg", "histogram_stdvar", "histogram_stddev", "union", "": // metric expressions for these functions cannot be optimized. return nil } diff --git a/vendor/github.com/VictoriaMetrics/metricsql/transform.go b/vendor/github.com/VictoriaMetrics/metricsql/transform.go index f8dd7cc84..df4f9b2b5 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/transform.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/transform.go @@ -83,6 +83,9 @@ var transformFuncs = map[string]bool{ "prometheus_buckets": true, "buckets_limit": true, "histogram_share": true, + "histogram_avg": true, + "histogram_stdvar": true, + "histogram_stddev": true, "sort_by_label": true, "sort_by_label_desc": true, } diff --git a/vendor/modules.txt b/vendor/modules.txt index 9d9b60eb3..5c3949879 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -21,7 +21,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.15.2 ## explicit github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.12.0 +# github.com/VictoriaMetrics/metricsql v0.14.0 ## explicit github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop