lib/storage: skip missing tsids in the current block header by using binary search

This improves performance by up to 10x when big number of the requested TSIDs
are missing in the searched parts.

This should help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
This commit is contained in:
Aliaksandr Valialkin 2022-12-14 22:04:25 -08:00
parent 4de9d35458
commit ad8852759d
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
2 changed files with 93 additions and 8 deletions

View file

@ -69,7 +69,7 @@ func (ps *partSearch) Init(p *part, tsids []TSID, tr TimeRange) {
if isInTest && !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) {
logger.Panicf("BUG: tsids must be sorted; got %+v", tsids)
}
// take ownership of of tsids.
// take ownership of tsids.
ps.tsids = tsids
}
ps.tr = tr
@ -120,14 +120,38 @@ func (ps *partSearch) nextTSID() bool {
return true
}
func (ps *partSearch) skipTSIDsSmallerThan(tsid *TSID) bool {
if !ps.BlockRef.bh.TSID.Less(tsid) {
return true
}
if !ps.nextTSID() {
return false
}
if !ps.BlockRef.bh.TSID.Less(tsid) {
// Fast path: the next TSID isn't smaller than the tsid.
return true
}
// Slower path - binary search for the next TSID, which isn't smaller than the tsid.
tsids := ps.tsids[ps.tsidIdx:]
ps.tsidIdx += sort.Search(len(tsids), func(i int) bool {
return !tsids[i].Less(tsid)
})
if ps.tsidIdx >= len(ps.tsids) {
ps.tsidIdx = len(ps.tsids)
ps.err = io.EOF
return false
}
ps.BlockRef.bh.TSID = ps.tsids[ps.tsidIdx]
ps.tsidIdx++
return true
}
func (ps *partSearch) nextBHS() bool {
for len(ps.metaindex) > 0 {
// Optimization: skip tsid values smaller than the minimum value
// from ps.metaindex.
for ps.BlockRef.bh.TSID.Less(&ps.metaindex[0].TSID) {
if !ps.nextTSID() {
return false
}
// Optimization: skip tsid values smaller than the minimum value from ps.metaindex.
if !ps.skipTSIDsSmallerThan(&ps.metaindex[0].TSID) {
return false
}
// Invariant: ps.BlockRef.bh.TSID >= ps.metaindex[0].TSID
@ -247,7 +271,7 @@ func (ps *partSearch) searchBHS() bool {
if bh.TSID.MetricID != tsid.MetricID {
// tsid < bh.TSID: no more blocks with the given tsid.
// Proceed to the next (bigger) tsid.
if !ps.nextTSID() {
if !ps.skipTSIDsSmallerThan(&bh.TSID) {
return false
}
continue

View file

@ -0,0 +1,61 @@
package storage
import (
"fmt"
"testing"
)
func BenchmarkPartSearch(b *testing.B) {
for _, sparseness := range []int{1, 2, 10, 100} {
b.Run(fmt.Sprintf("sparseness-%d", sparseness), func(b *testing.B) {
benchmarkPartSearchWithSparseness(b, sparseness)
})
}
}
func benchmarkPartSearchWithSparseness(b *testing.B, sparseness int) {
blocksCount := 100000
rows := make([]rawRow, blocksCount)
for i := 0; i < blocksCount; i++ {
r := &rows[i]
r.PrecisionBits = defaultPrecisionBits
r.TSID.MetricID = uint64(i * sparseness)
r.Timestamp = int64(i) * 1000
r.Value = float64(i)
}
tr := TimeRange{
MinTimestamp: rows[0].Timestamp,
MaxTimestamp: rows[len(rows)-1].Timestamp,
}
p := newTestPart(rows)
for _, tsidsCount := range []int{100, 1000, 10000, 100000} {
b.Run(fmt.Sprintf("tsids-%d", tsidsCount), func(b *testing.B) {
tsids := make([]TSID, tsidsCount)
for i := 0; i < tsidsCount; i++ {
tsids[i].MetricID = uint64(i)
}
benchmarkPartSearch(b, p, tsids, tr, sparseness)
})
}
}
func benchmarkPartSearch(b *testing.B, p *part, tsids []TSID, tr TimeRange, sparseness int) {
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
var ps partSearch
for pb.Next() {
blocksRead := 0
ps.Init(p, tsids, tr)
for ps.NextBlock() {
blocksRead++
}
if err := ps.Error(); err != nil {
panic(fmt.Errorf("BUG: unexpected error: %s", err))
}
blocksWant := len(tsids) / sparseness
if blocksRead != blocksWant {
panic(fmt.Errorf("BUG: unexpected blocks read; got %d; want %d", blocksRead, blocksWant))
}
}
})
}