From 6ae4b4190fdb177a870a7957cebc4c867d5e9604 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 19 Aug 2019 13:01:37 +0300 Subject: [PATCH] app/vmselect/promql: optimize search for timestamp boundaries in rollupConfig.Do This should improve the performance of queries over big number of time series with big number of output points. --- app/vmselect/promql/rollup.go | 49 ++++++++++++++++++++++++++---- app/vmselect/promql/rollup_test.go | 24 ++++++++++++--- 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index c1d82237e..081c425f4 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -197,18 +197,15 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i i := 0 j := 0 + n := 0 for _, tEnd := range rc.Timestamps { tStart := tEnd - window - n := sort.Search(len(timestamps)-i, func(n int) bool { - return timestamps[i+n] > tStart - }) + n = seekFirstTimestampIdxAfter(timestamps[i:], tStart, n) i += n if j < i { j = i } - n = sort.Search(len(timestamps)-j, func(n int) bool { - return timestamps[j+n] > tEnd - }) + n = seekFirstTimestampIdxAfter(timestamps[j:], tEnd, n) j += n rfa.prevValue = nan @@ -229,6 +226,46 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i return dstValues } +func seekFirstTimestampIdxAfter(timestamps []int64, seekTimestamp int64, nHint int) int { + if len(timestamps) == 0 || timestamps[0] > seekTimestamp { + return 0 + } + startIdx := nHint - 2 + if startIdx < 0 { + startIdx = 0 + } + if startIdx >= len(timestamps) { + startIdx = len(timestamps) - 1 + } + endIdx := nHint + 2 + if endIdx > len(timestamps) { + endIdx = len(timestamps) + } + if startIdx > 0 && timestamps[startIdx] <= seekTimestamp { + timestamps = timestamps[startIdx:] + endIdx -= startIdx + } else { + startIdx = 0 + } + if endIdx < len(timestamps) && timestamps[endIdx] > seekTimestamp { + timestamps = timestamps[:endIdx] + } + if len(timestamps) < 16 { + // Fast path: the number of timestamps to search is small, so scan them all. + for i, timestamp := range timestamps { + if timestamp > seekTimestamp { + return startIdx + i + } + } + return startIdx + len(timestamps) + } + // Slow path: too big len(timestamps), so use binary search. + i := sort.Search(len(timestamps), func(n int) bool { + return n >= 0 && n < len(timestamps) && timestamps[n] > seekTimestamp + }) + return startIdx + i +} + func getMaxPrevInterval(timestamps []int64) int64 { if len(timestamps) < 2 { return int64(maxSilenceInterval) diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index f13dc617b..275210ffe 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -584,7 +584,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { timestampsExpected := []int64{10, 50, 90, 130} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) - t.Run("lifetime", func(t *testing.T) { + t.Run("lifetime_1", func(t *testing.T) { rc := rollupConfig{ Func: rollupLifetime, Start: 0, @@ -598,7 +598,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) - t.Run("lifetime", func(t *testing.T) { + t.Run("lifetime_2", func(t *testing.T) { rc := rollupConfig{ Func: rollupLifetime, Start: 0, @@ -612,7 +612,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) - t.Run("scrape_interval", func(t *testing.T) { + t.Run("scrape_interval_1", func(t *testing.T) { rc := rollupConfig{ Func: rollupScrapeInterval, Start: 0, @@ -626,7 +626,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) - t.Run("scrape_interval", func(t *testing.T) { + t.Run("scrape_interval_2", func(t *testing.T) { rc := rollupConfig{ Func: rollupScrapeInterval, Start: 0, @@ -752,7 +752,7 @@ func TestRollupFuncsNoWindow(t *testing.T) { timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) - t.Run("distinct", func(t *testing.T) { + t.Run("distinct_over_time_1", func(t *testing.T) { rc := rollupConfig{ Func: rollupDistinct, Start: 0, @@ -766,6 +766,20 @@ func TestRollupFuncsNoWindow(t *testing.T) { timestampsExpected := []int64{0, 40, 80, 120, 160} testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) }) + t.Run("distinct_over_time_2", func(t *testing.T) { + rc := rollupConfig{ + Func: rollupDistinct, + Start: 0, + End: 160, + Step: 40, + Window: 80, + } + rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step) + values := rc.Do(nil, testValues, testTimestamps) + valuesExpected := []float64{nan, 4, 7, 6, 3} + timestampsExpected := []int64{0, 40, 80, 120, 160} + testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) + }) } func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExpected []float64, timestampsExpected []int64) {