app/vmselect/promql: increase accuracy for buckets_limit() function for small limits by skipping the first and the last buckets during merge

The first and the last buckets are usually `[0 ... leMin]` and `(leMax ... +Inf)`. If they are merged with adjancent buckets, then the resulting accuracy can suffer.
2025-03-11 15:34:56 +00:00 · 2021-02-26 22:53:41 +02:00 · 2021-02-26 22:53:41 +02:00 · d86e9b49c4
commit d86e9b49c4
parent ed8441ec52
3 changed files with 37 additions and 13 deletions
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@ -3337,14 +3337,14 @@ func TestExecSuccess(t *testing.T) {
 		)))`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{52, 52, 52, 52, 52, 52},
+			Values:     []float64{9, 9, 9, 9, 9, 9},
 			Timestamps: timestampsExpected,
 		}
 		r1.MetricName.MetricGroup = []byte("metric")
 		r1.MetricName.Tags = []storage.Tag{
 			{
 				Key:   []byte("le"),
-				Value: []byte("200"),
+				Value: []byte("10"),
 			},
 			{
 				Key:   []byte("x"),
@ -3353,11 +3353,27 @@ func TestExecSuccess(t *testing.T) {
 		}
 		r2 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{100, 100, 100, 100, 100, 100},
+			Values:     []float64{98, 98, 98, 98, 98, 98},
 			Timestamps: timestampsExpected,
 		}
 		r2.MetricName.MetricGroup = []byte("metric")
 		r2.MetricName.Tags = []storage.Tag{
+			{
+				Key:   []byte("le"),
+				Value: []byte("300"),
+			},
+			{
+				Key:   []byte("x"),
+				Value: []byte("y"),
+			},
+		}
+		r3 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{100, 100, 100, 100, 100, 100},
+			Timestamps: timestampsExpected,
+		}
+		r3.MetricName.MetricGroup = []byte("metric")
+		r3.MetricName.Tags = []storage.Tag{
 			{
 				Key:   []byte("le"),
 				Value: []byte("inf"),
@ -3367,7 +3383,7 @@ func TestExecSuccess(t *testing.T) {
 				Value: []byte("y"),
 			},
 		}
-		resultExpected := []netstorage.Result{r1, r2}
+		resultExpected := []netstorage.Result{r1, r2, r3}
 		f(q, resultExpected)
 	})
 	t.Run(`prometheus_buckets(missing-vmrange)`, func(t *testing.T) {
@ -4175,11 +4191,11 @@ func TestExecSuccess(t *testing.T) {
 	})
 	t.Run(`sum(histogram_over_time) by (vmrange)`, func(t *testing.T) {
 		t.Parallel()
-		q := `sort_desc(
+		q := `sort_by_label(
 			buckets_limit(
 				3,
 				sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s])) by (vmrange)
-			)
+			), "le"
 		)`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
@ -4194,24 +4210,24 @@ func TestExecSuccess(t *testing.T) {
 		}
 		r2 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{24, 22, 26, 25, 24, 24},
+			Values:     []float64{0, 0, 0, 0, 0, 0},
 			Timestamps: timestampsExpected,
 		}
 		r2.MetricName.Tags = []storage.Tag{
 			{
 				Key:   []byte("le"),
-				Value: []byte("1.896e+00"),
+				Value: []byte("1.000e+00"),
 			},
 		}
 		r3 := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values:     []float64{11, 12, 11, 7, 11, 13},
+			Values:     []float64{40, 40, 40, 40, 40, 40},
 			Timestamps: timestampsExpected,
 		}
 		r3.MetricName.Tags = []storage.Tag{
 			{
 				Key:   []byte("le"),
-				Value: []byte("1.468e+00"),
+				Value: []byte("2.448e+00"),
 			},
 		}
 		resultExpected := []netstorage.Result{r1, r2, r3}
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@ -343,6 +343,11 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
 	if limit <= 0 {
 		return nil, nil
 	}
+	if limit < 3 {
+		// Preserve the first and the last bucket for better accuracy,
+		// since these buckets are usually `[0...leMin]` and `(leMax ... +Inf]`
+		limit = 3
+	}
 	tss := vmrangeBucketsToLE(args[1])
 	if len(tss) == 0 {
 		return nil, nil
@ -404,15 +409,18 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
 			}
 		}
 		for len(leGroup) > limit {
+			// Preserve the first and the last bucket for better accuracy,
+			// since these buckets are usually `[0...leMin]` and `(leMax ... +Inf]`
 			xxMinIdx := 0
-			for i, xx := range leGroup {
+			for i, xx := range leGroup[1 : len(leGroup)-1] {
 				if xx.hits < leGroup[xxMinIdx].hits {
 					xxMinIdx = i
 				}
 			}
+			xxMinIdx++
 			// Merge the leGroup[xxMinIdx] bucket with the smallest adjacent bucket in order to preserve
 			// the maximum accuracy.
-			if xxMinIdx+1 == len(leGroup) || (xxMinIdx > 0 && leGroup[xxMinIdx-1].hits < leGroup[xxMinIdx+1].hits) {
+			if xxMinIdx > 1 && leGroup[xxMinIdx-1].hits < leGroup[xxMinIdx+1].hits {
 				xxMinIdx--
 			}
 			leGroup[xxMinIdx+1].hits += leGroup[xxMinIdx].hits
@ -578,7 +586,6 @@ func transformHistogramShare(tfa *transformFuncArg) ([]*timeseries, error) {
 	m := groupLeTimeseries(tss)

 	// Calculate share for les
-
 	share := func(i int, les []float64, xss []leTimeseries) (q, lower, upper float64) {
 		leReq := les[i]
 		if math.IsNaN(leReq) || len(xss) == 0 {
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -15,6 +15,7 @@
 * FEATURE: vmagent: export `vm_promscrape_target_relabel_duration_seconds` metric, which can be used for monitoring the time spend on relabeling for discovered targets.
 * FEATURE: vmagent: optimize [relabeling](https://victoriametrics.github.io/vmagent.html#relabeling) performance for common cases.
 * FEATURE: add `increase_pure(m[d])` function to MetricsQL. It works the same as `increase(m[d])` except of various edge cases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/962) for details.
+* FEATURE: increase accuracy for `buckets_limit(limit, buckets)` results for small `limit` values. See [MetricsQL docs](https://victoriametrics.github.io/MetricsQL.html) for details.


 * BUGFIX: vmagent: properly perform graceful shutdown on `SIGINT` and `SIGTERM` signals. The graceful shutdown has been broken in `v1.54.0`. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1065