app/vmselect/promql: optimize search for timestamp boundaries in rollupConfig.Do

This should improve the performance of queries over big number of time series
with big number of output points.
This commit is contained in:
Aliaksandr Valialkin 2019-08-19 13:01:37 +03:00
parent c23b66a1ad
commit 6a27657d73
2 changed files with 62 additions and 11 deletions

View file

@ -197,18 +197,15 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
i := 0 i := 0
j := 0 j := 0
n := 0
for _, tEnd := range rc.Timestamps { for _, tEnd := range rc.Timestamps {
tStart := tEnd - window tStart := tEnd - window
n := sort.Search(len(timestamps)-i, func(n int) bool { n = seekFirstTimestampIdxAfter(timestamps[i:], tStart, n)
return timestamps[i+n] > tStart
})
i += n i += n
if j < i { if j < i {
j = i j = i
} }
n = sort.Search(len(timestamps)-j, func(n int) bool { n = seekFirstTimestampIdxAfter(timestamps[j:], tEnd, n)
return timestamps[j+n] > tEnd
})
j += n j += n
rfa.prevValue = nan rfa.prevValue = nan
@ -229,6 +226,46 @@ func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []i
return dstValues return dstValues
} }
func seekFirstTimestampIdxAfter(timestamps []int64, seekTimestamp int64, nHint int) int {
if len(timestamps) == 0 || timestamps[0] > seekTimestamp {
return 0
}
startIdx := nHint - 2
if startIdx < 0 {
startIdx = 0
}
if startIdx >= len(timestamps) {
startIdx = len(timestamps) - 1
}
endIdx := nHint + 2
if endIdx > len(timestamps) {
endIdx = len(timestamps)
}
if startIdx > 0 && timestamps[startIdx] <= seekTimestamp {
timestamps = timestamps[startIdx:]
endIdx -= startIdx
} else {
startIdx = 0
}
if endIdx < len(timestamps) && timestamps[endIdx] > seekTimestamp {
timestamps = timestamps[:endIdx]
}
if len(timestamps) < 16 {
// Fast path: the number of timestamps to search is small, so scan them all.
for i, timestamp := range timestamps {
if timestamp > seekTimestamp {
return startIdx + i
}
}
return startIdx + len(timestamps)
}
// Slow path: too big len(timestamps), so use binary search.
i := sort.Search(len(timestamps), func(n int) bool {
return n >= 0 && n < len(timestamps) && timestamps[n] > seekTimestamp
})
return startIdx + i
}
func getMaxPrevInterval(timestamps []int64) int64 { func getMaxPrevInterval(timestamps []int64) int64 {
if len(timestamps) < 2 { if len(timestamps) < 2 {
return int64(maxSilenceInterval) return int64(maxSilenceInterval)

View file

@ -584,7 +584,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
timestampsExpected := []int64{10, 50, 90, 130} timestampsExpected := []int64{10, 50, 90, 130}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
}) })
t.Run("lifetime", func(t *testing.T) { t.Run("lifetime_1", func(t *testing.T) {
rc := rollupConfig{ rc := rollupConfig{
Func: rollupLifetime, Func: rollupLifetime,
Start: 0, Start: 0,
@ -598,7 +598,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
timestampsExpected := []int64{0, 40, 80, 120, 160} timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
}) })
t.Run("lifetime", func(t *testing.T) { t.Run("lifetime_2", func(t *testing.T) {
rc := rollupConfig{ rc := rollupConfig{
Func: rollupLifetime, Func: rollupLifetime,
Start: 0, Start: 0,
@ -612,7 +612,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
timestampsExpected := []int64{0, 40, 80, 120, 160} timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
}) })
t.Run("scrape_interval", func(t *testing.T) { t.Run("scrape_interval_1", func(t *testing.T) {
rc := rollupConfig{ rc := rollupConfig{
Func: rollupScrapeInterval, Func: rollupScrapeInterval,
Start: 0, Start: 0,
@ -626,7 +626,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
timestampsExpected := []int64{0, 40, 80, 120, 160} timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
}) })
t.Run("scrape_interval", func(t *testing.T) { t.Run("scrape_interval_2", func(t *testing.T) {
rc := rollupConfig{ rc := rollupConfig{
Func: rollupScrapeInterval, Func: rollupScrapeInterval,
Start: 0, Start: 0,
@ -752,7 +752,7 @@ func TestRollupFuncsNoWindow(t *testing.T) {
timestampsExpected := []int64{0, 40, 80, 120, 160} timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
}) })
t.Run("distinct", func(t *testing.T) { t.Run("distinct_over_time_1", func(t *testing.T) {
rc := rollupConfig{ rc := rollupConfig{
Func: rollupDistinct, Func: rollupDistinct,
Start: 0, Start: 0,
@ -766,6 +766,20 @@ func TestRollupFuncsNoWindow(t *testing.T) {
timestampsExpected := []int64{0, 40, 80, 120, 160} timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected) testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
}) })
t.Run("distinct_over_time_2", func(t *testing.T) {
rc := rollupConfig{
Func: rollupDistinct,
Start: 0,
End: 160,
Step: 40,
Window: 80,
}
rc.Timestamps = getTimestamps(rc.Start, rc.End, rc.Step)
values := rc.Do(nil, testValues, testTimestamps)
valuesExpected := []float64{nan, 4, 7, 6, 3}
timestampsExpected := []int64{0, 40, 80, 120, 160}
testRowsEqual(t, values, rc.Timestamps, valuesExpected, timestampsExpected)
})
} }
func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExpected []float64, timestampsExpected []int64) { func testRowsEqual(t *testing.T, values []float64, timestamps []int64, valuesExpected []float64, timestampsExpected []int64) {