app/vmselect/promql: increase accuracy for buckets_limit() function for small limits by skipping the first and the last buckets during merge

The first and the last buckets are usually `[0 ... leMin]` and `(leMax ... +Inf)`. If they are merged with adjancent buckets,
then the resulting accuracy can suffer.
This commit is contained in:
Aliaksandr Valialkin 2021-02-26 22:53:41 +02:00
parent ed8441ec52
commit d86e9b49c4
3 changed files with 37 additions and 13 deletions

View file

@ -3337,14 +3337,14 @@ func TestExecSuccess(t *testing.T) {
)))` )))`
r1 := netstorage.Result{ r1 := netstorage.Result{
MetricName: metricNameExpected, MetricName: metricNameExpected,
Values: []float64{52, 52, 52, 52, 52, 52}, Values: []float64{9, 9, 9, 9, 9, 9},
Timestamps: timestampsExpected, Timestamps: timestampsExpected,
} }
r1.MetricName.MetricGroup = []byte("metric") r1.MetricName.MetricGroup = []byte("metric")
r1.MetricName.Tags = []storage.Tag{ r1.MetricName.Tags = []storage.Tag{
{ {
Key: []byte("le"), Key: []byte("le"),
Value: []byte("200"), Value: []byte("10"),
}, },
{ {
Key: []byte("x"), Key: []byte("x"),
@ -3353,11 +3353,27 @@ func TestExecSuccess(t *testing.T) {
} }
r2 := netstorage.Result{ r2 := netstorage.Result{
MetricName: metricNameExpected, MetricName: metricNameExpected,
Values: []float64{100, 100, 100, 100, 100, 100}, Values: []float64{98, 98, 98, 98, 98, 98},
Timestamps: timestampsExpected, Timestamps: timestampsExpected,
} }
r2.MetricName.MetricGroup = []byte("metric") r2.MetricName.MetricGroup = []byte("metric")
r2.MetricName.Tags = []storage.Tag{ r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("le"),
Value: []byte("300"),
},
{
Key: []byte("x"),
Value: []byte("y"),
},
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{100, 100, 100, 100, 100, 100},
Timestamps: timestampsExpected,
}
r3.MetricName.MetricGroup = []byte("metric")
r3.MetricName.Tags = []storage.Tag{
{ {
Key: []byte("le"), Key: []byte("le"),
Value: []byte("inf"), Value: []byte("inf"),
@ -3367,7 +3383,7 @@ func TestExecSuccess(t *testing.T) {
Value: []byte("y"), Value: []byte("y"),
}, },
} }
resultExpected := []netstorage.Result{r1, r2} resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected) f(q, resultExpected)
}) })
t.Run(`prometheus_buckets(missing-vmrange)`, func(t *testing.T) { t.Run(`prometheus_buckets(missing-vmrange)`, func(t *testing.T) {
@ -4175,11 +4191,11 @@ func TestExecSuccess(t *testing.T) {
}) })
t.Run(`sum(histogram_over_time) by (vmrange)`, func(t *testing.T) { t.Run(`sum(histogram_over_time) by (vmrange)`, func(t *testing.T) {
t.Parallel() t.Parallel()
q := `sort_desc( q := `sort_by_label(
buckets_limit( buckets_limit(
3, 3,
sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s])) by (vmrange) sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s])) by (vmrange)
) ), "le"
)` )`
r1 := netstorage.Result{ r1 := netstorage.Result{
MetricName: metricNameExpected, MetricName: metricNameExpected,
@ -4194,24 +4210,24 @@ func TestExecSuccess(t *testing.T) {
} }
r2 := netstorage.Result{ r2 := netstorage.Result{
MetricName: metricNameExpected, MetricName: metricNameExpected,
Values: []float64{24, 22, 26, 25, 24, 24}, Values: []float64{0, 0, 0, 0, 0, 0},
Timestamps: timestampsExpected, Timestamps: timestampsExpected,
} }
r2.MetricName.Tags = []storage.Tag{ r2.MetricName.Tags = []storage.Tag{
{ {
Key: []byte("le"), Key: []byte("le"),
Value: []byte("1.896e+00"), Value: []byte("1.000e+00"),
}, },
} }
r3 := netstorage.Result{ r3 := netstorage.Result{
MetricName: metricNameExpected, MetricName: metricNameExpected,
Values: []float64{11, 12, 11, 7, 11, 13}, Values: []float64{40, 40, 40, 40, 40, 40},
Timestamps: timestampsExpected, Timestamps: timestampsExpected,
} }
r3.MetricName.Tags = []storage.Tag{ r3.MetricName.Tags = []storage.Tag{
{ {
Key: []byte("le"), Key: []byte("le"),
Value: []byte("1.468e+00"), Value: []byte("2.448e+00"),
}, },
} }
resultExpected := []netstorage.Result{r1, r2, r3} resultExpected := []netstorage.Result{r1, r2, r3}

View file

@ -343,6 +343,11 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
if limit <= 0 { if limit <= 0 {
return nil, nil return nil, nil
} }
if limit < 3 {
// Preserve the first and the last bucket for better accuracy,
// since these buckets are usually `[0...leMin]` and `(leMax ... +Inf]`
limit = 3
}
tss := vmrangeBucketsToLE(args[1]) tss := vmrangeBucketsToLE(args[1])
if len(tss) == 0 { if len(tss) == 0 {
return nil, nil return nil, nil
@ -404,15 +409,18 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
} }
} }
for len(leGroup) > limit { for len(leGroup) > limit {
// Preserve the first and the last bucket for better accuracy,
// since these buckets are usually `[0...leMin]` and `(leMax ... +Inf]`
xxMinIdx := 0 xxMinIdx := 0
for i, xx := range leGroup { for i, xx := range leGroup[1 : len(leGroup)-1] {
if xx.hits < leGroup[xxMinIdx].hits { if xx.hits < leGroup[xxMinIdx].hits {
xxMinIdx = i xxMinIdx = i
} }
} }
xxMinIdx++
// Merge the leGroup[xxMinIdx] bucket with the smallest adjacent bucket in order to preserve // Merge the leGroup[xxMinIdx] bucket with the smallest adjacent bucket in order to preserve
// the maximum accuracy. // the maximum accuracy.
if xxMinIdx+1 == len(leGroup) || (xxMinIdx > 0 && leGroup[xxMinIdx-1].hits < leGroup[xxMinIdx+1].hits) { if xxMinIdx > 1 && leGroup[xxMinIdx-1].hits < leGroup[xxMinIdx+1].hits {
xxMinIdx-- xxMinIdx--
} }
leGroup[xxMinIdx+1].hits += leGroup[xxMinIdx].hits leGroup[xxMinIdx+1].hits += leGroup[xxMinIdx].hits
@ -578,7 +586,6 @@ func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
m := groupLeTimeseries(tss) m := groupLeTimeseries(tss)
// Calculate share for les // Calculate share for les
share := func(i int, les []float64, xss []leTimeseries) (q, lower, upper float64) { share := func(i int, les []float64, xss []leTimeseries) (q, lower, upper float64) {
leReq := les[i] leReq := les[i]
if math.IsNaN(leReq) || len(xss) == 0 { if math.IsNaN(leReq) || len(xss) == 0 {

View file

@ -15,6 +15,7 @@
* FEATURE: vmagent: export `vm_promscrape_target_relabel_duration_seconds` metric, which can be used for monitoring the time spend on relabeling for discovered targets. * FEATURE: vmagent: export `vm_promscrape_target_relabel_duration_seconds` metric, which can be used for monitoring the time spend on relabeling for discovered targets.
* FEATURE: vmagent: optimize [relabeling](https://victoriametrics.github.io/vmagent.html#relabeling) performance for common cases. * FEATURE: vmagent: optimize [relabeling](https://victoriametrics.github.io/vmagent.html#relabeling) performance for common cases.
* FEATURE: add `increase_pure(m[d])` function to MetricsQL. It works the same as `increase(m[d])` except of various edge cases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/962) for details. * FEATURE: add `increase_pure(m[d])` function to MetricsQL. It works the same as `increase(m[d])` except of various edge cases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/962) for details.
* FEATURE: increase accuracy for `buckets_limit(limit, buckets)` results for small `limit` values. See [MetricsQL docs](https://victoriametrics.github.io/MetricsQL.html) for details.
* BUGFIX: vmagent: properly perform graceful shutdown on `SIGINT` and `SIGTERM` signals. The graceful shutdown has been broken in `v1.54.0`. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1065 * BUGFIX: vmagent: properly perform graceful shutdown on `SIGINT` and `SIGTERM` signals. The graceful shutdown has been broken in `v1.54.0`. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1065