app/vmselect/promql: optimize search for timestamp boundaries in rollupConfig.Do

This should improve the performance of queries over big number of time series with big number of output points.
2025-01-30 15:22:07 +00:00 · 2019-08-19 13:01:37 +03:00 · 2019-08-19 13:01:37 +03:00 · 6a27657d73
commit 6a27657d73
parent c23b66a1ad
2 changed files with 62 additions and 11 deletions
--- a/app/vmselect/promql/rollup.go
+++ b/app/vmselect/promql/rollup.go
@ -197,18 +197,15 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i

 	i := 0
 	j := 0
+	n := 0
 	for _, tEnd := range rc.Timestamps {
 		tStart := tEnd - window
-		n := sort.Search(len(timestamps)-i, func(n int) bool {
-			return timestamps[i+n] > tStart
-		})
+		n = seekFirstTimestampIdxAfter(timestamps[i:], tStart, n)
 		i += n
 		if j < i {
 			j = i
 		}
-		n = sort.Search(len(timestamps)-j, func(n int) bool {
-			return timestamps[j+n] > tEnd
-		})
+		n = seekFirstTimestampIdxAfter(timestamps[j:], tEnd, n)
 		j += n

 		rfa.prevValue = nan
@ -229,6 +226,46 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
 	return dstValues
 }

+func seekFirstTimestampIdxAfter(timestamps []int64, seekTimestamp int64, nHint int) int {
+	if len(timestamps) == 0 || timestamps[0] > seekTimestamp {
+		return 0
+	}
+	startIdx := nHint - 2
+	if startIdx < 0 {
+		startIdx = 0
+	}
+	if startIdx >= len(timestamps) {
+		startIdx = len(timestamps) - 1
+	}
+	endIdx := nHint + 2
+	if endIdx > len(timestamps) {
+		endIdx = len(timestamps)
+	}
+	if startIdx > 0 && timestamps[startIdx] <= seekTimestamp {
+		timestamps = timestamps[startIdx:]
+		endIdx -= startIdx
+	} else {
+		startIdx = 0
+	}
+	if endIdx < len(timestamps) && timestamps[endIdx] > seekTimestamp {
+		timestamps = timestamps[:endIdx]
+	}
+	if len(timestamps) < 16 {
+		// Fast path: the number of timestamps to search is small, so scan them all.
+		for i, timestamp := range timestamps {
+			if timestamp > seekTimestamp {
+				return startIdx + i
+			}
+		}
+		return startIdx + len(timestamps)
+	}
+	// Slow path: too big len(timestamps), so use binary search.
+	i := sort.Search(len(timestamps), func(n int) bool {
+		return n >= 0 && n < len(timestamps) && timestamps[n] > seekTimestamp
+	})
+	return startIdx + i
+}
+
 func getMaxPrevInterval(timestamps []int64) int64 {
 	if len(timestamps) < 2 {
 		return int64(maxSilenceInterval)
--- a/app/vmselect/promql/rollup_test.go
+++ b/app/vmselect/promql/rollup_test.go
@ -584,7 +584,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{10, 50, 90, 130}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
-	t.Run("lifetime", func(t *testing.T) {
+	t.Run("lifetime_1", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupLifetime,
 			Start:  0,
@ -598,7 +598,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
-	t.Run("lifetime", func(t *testing.T) {
+	t.Run("lifetime_2", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupLifetime,
 			Start:  0,
@ -612,7 +612,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
-	t.Run("scrape_interval", func(t *testing.T) {
+	t.Run("scrape_interval_1", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupScrapeInterval,
 			Start:  0,
@ -626,7 +626,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
-	t.Run("scrape_interval", func(t *testing.T) {
+	t.Run("scrape_interval_2", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupScrapeInterval,
 			Start:  0,
@ -752,7 +752,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
-	t.Run("distinct", func(t *testing.T) {
+	t.Run("distinct_over_time_1", func(t *testing.T) {
 		rc := rollupConfig{
 			Func:   rollupDistinct,
 			Start:  0,
@ -766,6 +766,20 @@ func TestRollupFuncsNoWindow(t *testing.T) {
 		timestampsExpected := []int64{0, 40, 80, 120, 160}
 		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
 	})
+	t.Run("distinct_over_time_2", func(t *testing.T) {
+		rc := rollupConfig{
+			Func:   rollupDistinct,
+			Start:  0,
+			End:    160,
+			Step:   40,
+			Window: 80,
+		}
+		rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
+		values := rc.Do(nil, testValues, testTimestamps)
+		valuesExpected := []float64{nan, 4, 7, 6, 3}
+		timestampsExpected := []int64{0, 40, 80, 120, 160}
+		testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
+	})
 }

 func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExpected []float64, timestampsExpected []int64) {