lib/storage: skip missing tsids in the current block header by using binary search

This improves performance by up to 10x when big number of the requested TSIDs
are missing in the searched parts.

This should help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
This commit is contained in:
Aliaksandr Valialkin 2022-12-14 22:04:25 -08:00
parent 1a88fe5b1f
commit fbeebe4869
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
2 changed files with 93 additions and 8 deletions

View file

@ -69,7 +69,7 @@ func (ps *partSearch) Init(p *part, tsids []TSID, tr TimeRange) {
if isInTest && !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) { if isInTest && !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) {
logger.Panicf("BUG: tsids must be sorted; got %+v", tsids) logger.Panicf("BUG: tsids must be sorted; got %+v", tsids)
} }
// take ownership of of tsids. // take ownership of tsids.
ps.tsids = tsids ps.tsids = tsids
} }
ps.tr = tr ps.tr = tr
@ -120,14 +120,38 @@ func (ps *partSearch) nextTSID() bool {
return true return true
} }
func (ps *partSearch) skipTSIDsSmallerThan(tsid *TSID) bool {
if !ps.BlockRef.bh.TSID.Less(tsid) {
return true
}
if !ps.nextTSID() {
return false
}
if !ps.BlockRef.bh.TSID.Less(tsid) {
// Fast path: the next TSID isn't smaller than the tsid.
return true
}
// Slower path - binary search for the next TSID, which isn't smaller than the tsid.
tsids := ps.tsids[ps.tsidIdx:]
ps.tsidIdx += sort.Search(len(tsids), func(i int) bool {
return !tsids[i].Less(tsid)
})
if ps.tsidIdx >= len(ps.tsids) {
ps.tsidIdx = len(ps.tsids)
ps.err = io.EOF
return false
}
ps.BlockRef.bh.TSID = ps.tsids[ps.tsidIdx]
ps.tsidIdx++
return true
}
func (ps *partSearch) nextBHS() bool { func (ps *partSearch) nextBHS() bool {
for len(ps.metaindex) > 0 { for len(ps.metaindex) > 0 {
// Optimization: skip tsid values smaller than the minimum value // Optimization: skip tsid values smaller than the minimum value from ps.metaindex.
// from ps.metaindex. if !ps.skipTSIDsSmallerThan(&ps.metaindex[0].TSID) {
for ps.BlockRef.bh.TSID.Less(&ps.metaindex[0].TSID) { return false
if !ps.nextTSID() {
return false
}
} }
// Invariant: ps.BlockRef.bh.TSID >= ps.metaindex[0].TSID // Invariant: ps.BlockRef.bh.TSID >= ps.metaindex[0].TSID
@ -247,7 +271,7 @@ func (ps *partSearch) searchBHS() bool {
if bh.TSID.MetricID != tsid.MetricID { if bh.TSID.MetricID != tsid.MetricID {
// tsid < bh.TSID: no more blocks with the given tsid. // tsid < bh.TSID: no more blocks with the given tsid.
// Proceed to the next (bigger) tsid. // Proceed to the next (bigger) tsid.
if !ps.nextTSID() { if !ps.skipTSIDsSmallerThan(&bh.TSID) {
return false return false
} }
continue continue

View file

@ -0,0 +1,61 @@
package storage
import (
"fmt"
"testing"
)
func BenchmarkPartSearch(b *testing.B) {
for _, sparseness := range []int{1, 2, 10, 100} {
b.Run(fmt.Sprintf("sparseness-%d", sparseness), func(b *testing.B) {
benchmarkPartSearchWithSparseness(b, sparseness)
})
}
}
func benchmarkPartSearchWithSparseness(b *testing.B, sparseness int) {
blocksCount := 100000
rows := make([]rawRow, blocksCount)
for i := 0; i < blocksCount; i++ {
r := &rows[i]
r.PrecisionBits = defaultPrecisionBits
r.TSID.MetricID = uint64(i * sparseness)
r.Timestamp = int64(i) * 1000
r.Value = float64(i)
}
tr := TimeRange{
MinTimestamp: rows[0].Timestamp,
MaxTimestamp: rows[len(rows)-1].Timestamp,
}
p := newTestPart(rows)
for _, tsidsCount := range []int{100, 1000, 10000, 100000} {
b.Run(fmt.Sprintf("tsids-%d", tsidsCount), func(b *testing.B) {
tsids := make([]TSID, tsidsCount)
for i := 0; i < tsidsCount; i++ {
tsids[i].MetricID = uint64(i)
}
benchmarkPartSearch(b, p, tsids, tr, sparseness)
})
}
}
func benchmarkPartSearch(b *testing.B, p *part, tsids []TSID, tr TimeRange, sparseness int) {
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
var ps partSearch
for pb.Next() {
blocksRead := 0
ps.Init(p, tsids, tr)
for ps.NextBlock() {
blocksRead++
}
if err := ps.Error(); err != nil {
panic(fmt.Errorf("BUG: unexpected error: %s", err))
}
blocksWant := len(tsids) / sparseness
if blocksRead != blocksWant {
panic(fmt.Errorf("BUG: unexpected blocks read; got %d; want %d", blocksRead, blocksWant))
}
}
})
}