mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-11 15:34:56 +00:00
app/vmselect: quantile
func compatiblity with Prometheus (#1646)
* app/vmselect: `quantile` func compatiblity with Prometheus The `quantile` func was previously calculated by https://github.com/valyala/histogram package. The result of such calculation was always the closest real value to requested quantile. While in Prometheus implementation interpolation is used. Such difference may result into discrepancy in output between Prometheus and VictoriaMetrics. This commit adds a Prometheus-like `quantile` function. It also used by other functions which depend on it, such as `quantiles`, `quantile_over_time`, `median` etc. https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1625 Signed-off-by: hagen1778 <roman@victoriametrics.com> * app/vmselect: `quantile` review fixes * quantile functions were split into multiple to provide different API for already sorted data; * float64sPool is used for reducing allocations. Items in pool may have different sizes, but defining a new pool was complicates due to name collisions; Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
80b0b92d2f
commit
526dd93b32
5 changed files with 116 additions and 63 deletions
|
@ -2,16 +2,15 @@ package promql
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
"github.com/valyala/histogram"
|
||||
"math"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var aggrFuncs = map[string]aggrFunc{
|
||||
|
@ -801,15 +800,71 @@ func avgValue(values []float64) float64 {
|
|||
}
|
||||
|
||||
func medianValue(values []float64) float64 {
|
||||
h := histogram.GetFast()
|
||||
for _, v := range values {
|
||||
if !math.IsNaN(v) {
|
||||
h.Update(v)
|
||||
}
|
||||
return quantile(0.5, values)
|
||||
}
|
||||
|
||||
// quantiles calculates the given phis from originValues
|
||||
// without modifying originValues
|
||||
func quantiles(phis []float64, originValues []float64) []float64 {
|
||||
a := float64sPool.Get().(*float64s)
|
||||
a.A = prepareForQuantileFloat64(a.A[:0], originValues)
|
||||
res := quantilesSorted(phis, a.A)
|
||||
float64sPool.Put(a)
|
||||
return res
|
||||
}
|
||||
|
||||
func quantilesSorted(phis []float64, values []float64) []float64 {
|
||||
res := make([]float64, len(phis))
|
||||
for i, phi := range phis {
|
||||
res[i] = quantileSorted(phi, values)
|
||||
}
|
||||
value := h.Quantile(0.5)
|
||||
histogram.PutFast(h)
|
||||
return value
|
||||
return res
|
||||
}
|
||||
|
||||
// quantile calculates the given phi from originValues
|
||||
// without modifying originValues
|
||||
func quantile(phi float64, originValues []float64) float64 {
|
||||
a := float64sPool.Get().(*float64s)
|
||||
a.A = prepareForQuantileFloat64(a.A[:0], originValues)
|
||||
res := quantileSorted(phi, a.A)
|
||||
float64sPool.Put(a)
|
||||
return res
|
||||
}
|
||||
|
||||
// prepareForQuantileFloat64 copies items from src
|
||||
// to dst but removes NaNs and sorts the dst
|
||||
func prepareForQuantileFloat64(dst, src []float64) []float64 {
|
||||
for _, v := range src {
|
||||
if math.IsNaN(v) {
|
||||
continue
|
||||
}
|
||||
dst = append(dst, v)
|
||||
}
|
||||
sort.Float64s(dst)
|
||||
return dst
|
||||
}
|
||||
|
||||
// quantileSorted calculates the given quantile of a sorted list of values.
|
||||
// It is expected that values won't contain NaN items.
|
||||
// The implementation mimics Prometheus implementation for compatibility's sake.
|
||||
func quantileSorted(phi float64, values []float64) float64 {
|
||||
if len(values) == 0 || math.IsNaN(phi) {
|
||||
return nan
|
||||
}
|
||||
if phi < 0 {
|
||||
return math.Inf(-1)
|
||||
}
|
||||
if phi > 1 {
|
||||
return math.Inf(+1)
|
||||
}
|
||||
n := float64(len(values))
|
||||
rank := phi * (n - 1)
|
||||
|
||||
lowerIndex := math.Max(0, math.Floor(rank))
|
||||
upperIndex := math.Min(n-1, lowerIndex+1)
|
||||
|
||||
weight := rank - math.Floor(rank)
|
||||
return values[int(lowerIndex)]*(1-weight) + values[int(upperIndex)]*weight
|
||||
}
|
||||
|
||||
func aggrFuncMAD(tss []*timeseries) []*timeseries {
|
||||
|
@ -987,22 +1042,25 @@ func aggrFuncQuantiles(afa *aggrFuncArg) ([]*timeseries, error) {
|
|||
ts.MetricName.AddTag(dstLabel, fmt.Sprintf("%g", phis[j]))
|
||||
tssDst[j] = ts
|
||||
}
|
||||
h := histogram.GetFast()
|
||||
defer histogram.PutFast(h)
|
||||
|
||||
var qs []float64
|
||||
values := float64sPool.Get().(*float64s)
|
||||
for n := range tss[0].Values {
|
||||
h.Reset()
|
||||
values.A = values.A[:0]
|
||||
for j := range tss {
|
||||
v := tss[j].Values[n]
|
||||
if !math.IsNaN(v) {
|
||||
h.Update(v)
|
||||
if math.IsNaN(v) {
|
||||
continue
|
||||
}
|
||||
values.A = append(values.A, v)
|
||||
}
|
||||
qs = h.Quantiles(qs[:0], phis)
|
||||
sort.Float64s(values.A)
|
||||
qs = quantilesSorted(phis, values.A)
|
||||
for j := range tssDst {
|
||||
tssDst[j].Values[n] = qs[j]
|
||||
}
|
||||
}
|
||||
float64sPool.Put(values)
|
||||
return tssDst
|
||||
}
|
||||
return aggrFuncExt(afe, argOrig, &afa.ae.Modifier, afa.ae.Limit, false)
|
||||
|
@ -1034,18 +1092,19 @@ func aggrFuncMedian(afa *aggrFuncArg) ([]*timeseries, error) {
|
|||
func newAggrQuantileFunc(phis []float64) func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
|
||||
return func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
|
||||
dst := tss[0]
|
||||
h := histogram.GetFast()
|
||||
defer histogram.PutFast(h)
|
||||
var values []float64
|
||||
for n := range dst.Values {
|
||||
h.Reset()
|
||||
values = values[:0]
|
||||
for j := range tss {
|
||||
v := tss[j].Values[n]
|
||||
if !math.IsNaN(v) {
|
||||
h.Update(v)
|
||||
if math.IsNaN(v) {
|
||||
continue
|
||||
}
|
||||
values = append(values, v)
|
||||
}
|
||||
phi := phis[n]
|
||||
dst.Values[n] = h.Quantile(phi)
|
||||
sort.Float64s(values)
|
||||
dst.Values[n] = quantileSorted(phi, values)
|
||||
}
|
||||
tss[0] = dst
|
||||
return tss[:1]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package promql
|
||||
|
||||
import (
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -4620,7 +4621,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
q := `quantile_over_time(0.9, label_set(round(rand(0), 0.01), "__name__", "foo", "xx", "yy")[200s:5s])`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.89, 0.89, 0.95, 0.87, 0.92, 0.89},
|
||||
Values: []float64{0.893, 0.892, 0.9510000000000001, 0.8730000000000001, 0.9250000000000002, 0.891},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.MetricGroup = []byte("foo")
|
||||
|
@ -4643,7 +4644,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
)`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.47, 0.57, 0.49, 0.54, 0.56, 0.53},
|
||||
Values: []float64{0.46499999999999997, 0.57, 0.485, 0.54, 0.555, 0.515},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.MetricGroup = []byte("foo")
|
||||
|
@ -4659,7 +4660,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0.89, 0.89, 0.95, 0.87, 0.92, 0.89},
|
||||
Values: []float64{0.893, 0.892, 0.9510000000000001, 0.8730000000000001, 0.9250000000000002, 0.891},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.MetricGroup = []byte("foo")
|
||||
|
@ -5278,7 +5279,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
})
|
||||
t.Run(`bottomk_median(1)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(bottomk_median(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
|
||||
q := `sort(bottomk_median(1, label_set(10, "foo", "bar") or label_set(time()/15, "baz", "sss")))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, nan, nan, nan},
|
||||
|
@ -5532,9 +5533,10 @@ func TestExecSuccess(t *testing.T) {
|
|||
t.Run(`quantile(-2)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `quantile(-2, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
inf := math.Inf(-1)
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10, 10, 10},
|
||||
Values: []float64{inf, inf, inf, inf, inf, inf},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
|
@ -5545,7 +5547,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
q := `quantile(0.2, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10, 10, 10},
|
||||
Values: []float64{7.333333333333334, 8.4, 9.466666666666669, 10.133333333333333, 10.4, 10.666666666666668},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
|
@ -5556,7 +5558,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
q := `quantile(0.5, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
|
||||
Values: []float64{8.333333333333334, 9, 9.666666666666668, 10.333333333333332, 11, 11.666666666666668},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
|
@ -5567,7 +5569,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
q := `sort(quantiles("phi", 0.2, 0.5, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{6.666666666666667, 8, 9.333333333333334, 10, 10, 10},
|
||||
Values: []float64{7.333333333333334, 8.4, 9.466666666666669, 10.133333333333333, 10.4, 10.666666666666668},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.Tags = []storage.Tag{{
|
||||
|
@ -5576,7 +5578,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
}}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
|
||||
Values: []float64{8.333333333333334, 9, 9.666666666666668, 10.333333333333332, 11, 11.666666666666668},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.Tags = []storage.Tag{{
|
||||
|
@ -5591,7 +5593,7 @@ func TestExecSuccess(t *testing.T) {
|
|||
q := `median(label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
|
||||
Values: []float64{8.333333333333334, 9, 9.666666666666668, 10.333333333333332, 11, 11.666666666666668},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
|
@ -5611,9 +5613,10 @@ func TestExecSuccess(t *testing.T) {
|
|||
t.Run(`quantile(3)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `quantile(3, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
|
||||
inf := math.Inf(+1)
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{10, 10, 10, 10.666666666666666, 12, 13.333333333333334},
|
||||
Values: []float64{inf, inf, inf, inf, inf, inf},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
|
@ -5725,7 +5728,8 @@ func TestExecSuccess(t *testing.T) {
|
|||
q := `range_quantile(0.5, time())`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1600, 1600, 1600, 1600, 1600, 1600},
|
||||
// time() results in [1000 1200 1400 1600 1800 2000]
|
||||
Values: []float64{1500, 1500, 1500, 1500, 1500, 1500},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
|
@ -5736,7 +5740,8 @@ func TestExecSuccess(t *testing.T) {
|
|||
q := `range_median(time())`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{1600, 1600, 1600, 1600, 1600, 1600},
|
||||
// time() results in [1000 1200 1400 1600 1800 2000]
|
||||
Values: []float64{1500, 1500, 1500, 1500, 1500, 1500},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
|
|
|
@ -1066,12 +1066,7 @@ func newRollupQuantiles(args []interface{}) (rollupFunc, error) {
|
|||
// Fast path - only a single value.
|
||||
return values[0]
|
||||
}
|
||||
hf := histogram.GetFast()
|
||||
for _, v := range values {
|
||||
hf.Update(v)
|
||||
}
|
||||
qs := hf.Quantiles(nil, phis)
|
||||
histogram.PutFast(hf)
|
||||
qs := quantiles(phis, values)
|
||||
idx := rfa.idx
|
||||
tsm := rfa.tsm
|
||||
for i, phiStr := range phiStrs {
|
||||
|
@ -1102,13 +1097,8 @@ func newRollupQuantile(args []interface{}) (rollupFunc, error) {
|
|||
// Fast path - only a single value.
|
||||
return values[0]
|
||||
}
|
||||
hf := histogram.GetFast()
|
||||
for _, v := range values {
|
||||
hf.Update(v)
|
||||
}
|
||||
phi := phis[rfa.idx]
|
||||
qv := hf.Quantile(phi)
|
||||
histogram.PutFast(hf)
|
||||
qv := quantile(phi, values)
|
||||
return qv
|
||||
}
|
||||
return rf, nil
|
||||
|
|
|
@ -335,14 +335,14 @@ func TestRollupQuantileOverTime(t *testing.T) {
|
|||
testRollupFunc(t, "quantile_over_time", args, &me, vExpected)
|
||||
}
|
||||
|
||||
f(-123, 12)
|
||||
f(-0.5, 12)
|
||||
f(-123, math.Inf(-1))
|
||||
f(-0.5, math.Inf(-1))
|
||||
f(0, 12)
|
||||
f(0.1, 21)
|
||||
f(0.1, 22.1)
|
||||
f(0.5, 34)
|
||||
f(0.9, 99)
|
||||
f(0.9, 94.50000000000001)
|
||||
f(1, 123)
|
||||
f(234, 123)
|
||||
f(234, math.Inf(+1))
|
||||
}
|
||||
|
||||
func TestRollupPredictLinear(t *testing.T) {
|
||||
|
|
|
@ -14,7 +14,6 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
"github.com/valyala/histogram"
|
||||
)
|
||||
|
||||
var transformFuncsKeepMetricGroup = map[string]bool{
|
||||
|
@ -1178,23 +1177,23 @@ func transformRangeQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
|
|||
}
|
||||
phi := phis[0]
|
||||
rvs := args[1]
|
||||
hf := histogram.GetFast()
|
||||
var values []float64
|
||||
for _, ts := range rvs {
|
||||
hf.Reset()
|
||||
lastIdx := -1
|
||||
values := ts.Values
|
||||
for i, v := range values {
|
||||
originValues := ts.Values
|
||||
values = values[:0]
|
||||
for i, v := range originValues {
|
||||
if math.IsNaN(v) {
|
||||
continue
|
||||
}
|
||||
hf.Update(v)
|
||||
values = append(values, v)
|
||||
lastIdx = i
|
||||
}
|
||||
if lastIdx >= 0 {
|
||||
values[lastIdx] = hf.Quantile(phi)
|
||||
sort.Float64s(values)
|
||||
originValues[lastIdx] = quantileSorted(phi, values)
|
||||
}
|
||||
}
|
||||
histogram.PutFast(hf)
|
||||
setLastValues(rvs)
|
||||
return rvs, nil
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue