app/vmselect/promql: add histogram_avg(), histogram_stddev() and histogram_stdvar() functions to MetricsQL

This commit is contained in:
Aliaksandr Valialkin 2021-03-04 14:12:07 +02:00
parent 423cd981fb
commit a14053ffa0
9 changed files with 206 additions and 6 deletions

View file

@ -2810,6 +2810,72 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`stdvar_over_time()`, func(t *testing.T) {
t.Parallel()
q := `round(stdvar_over_time(rand(0)[200s:5s]), 0.001)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.082, 0.088, 0.092, 0.075, 0.101, 0.08},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_stdvar()`, func(t *testing.T) {
t.Parallel()
q := `round(histogram_stdvar(histogram_over_time(rand(0)[200s:5s])), 0.001)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.079, 0.089, 0.089, 0.071, 0.1, 0.082},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`stddev_over_time()`, func(t *testing.T) {
t.Parallel()
q := `round(stddev_over_time(rand(0)[200s:5s]), 0.001)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.286, 0.297, 0.303, 0.274, 0.318, 0.283},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_stddev()`, func(t *testing.T) {
t.Parallel()
q := `round(histogram_stddev(histogram_over_time(rand(0)[200s:5s])), 0.001)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.281, 0.299, 0.298, 0.267, 0.316, 0.286},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`avg_over_time()`, func(t *testing.T) {
t.Parallel()
q := `round(avg_over_time(rand(0)[200s:5s]), 0.001)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.521, 0.518, 0.509, 0.544, 0.511, 0.504},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_avg()`, func(t *testing.T) {
t.Parallel()
q := `round(histogram_avg(histogram_over_time(rand(0)[200s:5s])), 0.001)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0.519, 0.521, 0.503, 0.543, 0.511, 0.506},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`histogram_share(single-value-valid-le)`, func(t *testing.T) {
t.Parallel()
q := `histogram_share(80, label_set(100, "le", "200"))`

View file

@ -118,6 +118,9 @@ var transformFuncs = map[string]transformFunc{
"prometheus_buckets": transformPrometheusBuckets,
"buckets_limit": transformBucketsLimit,
"histogram_share": transformHistogramShare,
"histogram_avg": transformHistogramAvg,
"histogram_stdvar": transformHistogramStdvar,
"histogram_stddev": transformHistogramStddev,
"sort_by_label": newTransformFuncSortByLabel(false),
"sort_by_label_desc": newTransformFuncSortByLabel(true),
}
@ -657,6 +660,127 @@ func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
return rvs, nil
}
func transformHistogramAvg(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {
return nil, err
}
tss := vmrangeBucketsToLE(args[0])
m := groupLeTimeseries(tss)
rvs := make([]*timeseries, 0, len(m))
for _, xss := range m {
sort.Slice(xss, func(i, j int) bool {
return xss[i].le < xss[j].le
})
dst := xss[0].ts
for i := range dst.Values {
dst.Values[i] = avgForLeTimeseries(i, xss)
}
rvs = append(rvs, dst)
}
return rvs, nil
}
func transformHistogramStddev(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {
return nil, err
}
tss := vmrangeBucketsToLE(args[0])
m := groupLeTimeseries(tss)
rvs := make([]*timeseries, 0, len(m))
for _, xss := range m {
sort.Slice(xss, func(i, j int) bool {
return xss[i].le < xss[j].le
})
dst := xss[0].ts
for i := range dst.Values {
v := stdvarForLeTimeseries(i, xss)
dst.Values[i] = math.Sqrt(v)
}
rvs = append(rvs, dst)
}
return rvs, nil
}
func transformHistogramStdvar(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {
return nil, err
}
tss := vmrangeBucketsToLE(args[0])
m := groupLeTimeseries(tss)
rvs := make([]*timeseries, 0, len(m))
for _, xss := range m {
sort.Slice(xss, func(i, j int) bool {
return xss[i].le < xss[j].le
})
dst := xss[0].ts
for i := range dst.Values {
dst.Values[i] = stdvarForLeTimeseries(i, xss)
}
rvs = append(rvs, dst)
}
return rvs, nil
}
func avgForLeTimeseries(i int, xss []leTimeseries) float64 {
lePrev := float64(0)
vPrev := float64(0)
sum := float64(0)
weightTotal := float64(0)
for _, xs := range xss {
if math.IsInf(xs.le, 0) {
continue
}
le := xs.le
n := (le + lePrev) / 2
v := xs.ts.Values[i]
weight := v - vPrev
sum += n * weight
weightTotal += weight
lePrev = le
vPrev = v
}
if weightTotal == 0 {
return nan
}
return sum / weightTotal
}
func stdvarForLeTimeseries(i int, xss []leTimeseries) float64 {
lePrev := float64(0)
vPrev := float64(0)
sum := float64(0)
sum2 := float64(0)
weightTotal := float64(0)
for _, xs := range xss {
if math.IsInf(xs.le, 0) {
continue
}
le := xs.le
n := (le + lePrev) / 2
v := xs.ts.Values[i]
weight := v - vPrev
sum += n * weight
sum2 += n * n * weight
weightTotal += weight
lePrev = le
vPrev = v
}
if weightTotal == 0 {
return nan
}
avg := sum / weightTotal
avg2 := sum2 / weightTotal
stdvar := avg2 - avg*avg
if stdvar < 0 {
// Correct possible calculation error.
stdvar = 0
}
return stdvar
}
func transformHistogramQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if len(args) < 2 || len(args) > 3 {

View file

@ -2,6 +2,10 @@
# tip
* FEATURE: add the following functions to [MetricsQL](https://victoriametrics.github.io/MetricsQL.html):
- `histogram_avg(buckets)` - returns the average value for the given buckets.
- `histogram_stdvar(buckets)` - returns standard variance for the given buckets.
- `histogram_stddev(buckets)` - returns standard deviation for the given buckets.
* FEATURE: vmagent: add ability to replicate scrape targets among `vmagent` instances in the cluster with `-promscrape.cluster.replicationFactor` command-line flag. See [these docs](https://victoriametrics.github.io/vmagent.html#scraping-big-number-of-targets).

View file

@ -104,9 +104,12 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g
- `histogram(q)` - calculates aggregate histogram over `q` time series for each point on the graph. See [this article](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) for more details.
- `histogram_over_time(m[d])` - calculates [VictoriaMetrics histogram](https://godoc.org/github.com/VictoriaMetrics/metrics#Histogram) for `m` over `d`.
For example, the following query calculates median temperature by country over the last 24 hours:
`histogram_quantile(0.5, sum(histogram_over_time(temperature[24h])) by (vmbucket, country))`.
`histogram_quantile(0.5, sum(histogram_over_time(temperature[24h])) by (vmrange,country))`.
- `histogram_share(le, buckets)` - returns share (in the range 0..1) for `buckets` that fall below `le`. Useful for calculating SLI and SLO.
For instance, the following query returns the share of requests which are performed under 1.5 seconds during the last 5 minutes: `histogram_share(1.5, sum(rate(request_duration_seconds_bucket[5m])) by (le))`.
- `histogram_avg(buckets)` - returns the average value for the given buckets. It can be used for calculating the average over the given time range across multiple time series. For exmple, `histogram_avg(sum(histogram_over_time(response_time_duration_seconds[5m])) by (vmrange,job))` would return the average response time per each `job` over the last 5 minutes.
- `histogram_stdvar(buckets)` - returns standard variance for the given buckets. It can be used for calculating standard deviation over the given time range across multiple time series. For example, `histogram_stdvar(sum(histogram_over_time(temperature[24])) by (vmrange,country))` would return standard deviation for the temperature per each country over the last 24 hours.
- `histogram_stddev(buckets)` - returns standard deviation for the given buckets.
- `topk_*` and `bottomk_*` aggregate functions, which return up to K time series. Note that the standard `topk` function may return more than K time series -
see [this article](https://www.robustperception.io/graph-top-n-time-series-in-grafana) for details.
- `topk_min(k, q)` - returns top K time series with the max minimums on the given time range

2
go.mod
View file

@ -9,7 +9,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.0.12
github.com/VictoriaMetrics/metrics v1.15.2
github.com/VictoriaMetrics/metricsql v0.12.0
github.com/VictoriaMetrics/metricsql v0.14.0
github.com/aws/aws-sdk-go v1.37.22
github.com/cespare/xxhash/v2 v2.1.1
github.com/cheggaaa/pb/v3 v3.0.6

4
go.sum
View file

@ -87,8 +87,8 @@ github.com/VictoriaMetrics/fasthttp v1.0.12/go.mod h1:3SeUL4zwB/p/a9aEeRc6gdlbrt
github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE=
github.com/VictoriaMetrics/metrics v1.15.2 h1:w/GD8L9tm+gvx1oZvAofRRXwammiicdI0jgLghA2Gdo=
github.com/VictoriaMetrics/metrics v1.15.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE=
github.com/VictoriaMetrics/metricsql v0.12.0 h1:NMIu0MPBmGP34g4RUjI1U0xW5XYp7IVNXe9KtZI3PFQ=
github.com/VictoriaMetrics/metricsql v0.12.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8=
github.com/VictoriaMetrics/metricsql v0.14.0 h1:XGbpZJVskUPJFo2C7vG6ATxXBwkBFPe7EWZXB2HZt2U=
github.com/VictoriaMetrics/metricsql v0.14.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8=
github.com/VividCortex/ewma v1.1.1 h1:MnEK4VOv6n0RSY4vtRe3h11qjxL3+t0B8yOL8iMXdcM=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=

View file

@ -112,7 +112,7 @@ func getMetricExprForOptimization(e Expr) *MetricExpr {
case "absent", "histogram_quantile", "label_join", "label_replace", "scalar", "vector",
"label_set", "label_map", "label_uppercase", "label_lowercase", "label_del", "label_keep", "label_copy",
"label_move", "label_transform", "label_value", "label_match", "label_mismatch",
"prometheus_buckets", "buckets_limit", "histogram_share", "union", "":
"prometheus_buckets", "buckets_limit", "histogram_share", "histogram_avg", "histogram_stdvar", "histogram_stddev", "union", "":
// metric expressions for these functions cannot be optimized.
return nil
}

View file

@ -83,6 +83,9 @@ var transformFuncs = map[string]bool{
"prometheus_buckets": true,
"buckets_limit": true,
"histogram_share": true,
"histogram_avg": true,
"histogram_stdvar": true,
"histogram_stddev": true,
"sort_by_label": true,
"sort_by_label_desc": true,
}

2
vendor/modules.txt vendored
View file

@ -21,7 +21,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.15.2
## explicit
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.12.0
# github.com/VictoriaMetrics/metricsql v0.14.0
## explicit
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop