mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib/storage: skip missing tsids in the current block header by using binary search
This improves performance by up to 10x when big number of the requested TSIDs are missing in the searched parts. This should help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
This commit is contained in:
parent
1a88fe5b1f
commit
fbeebe4869
2 changed files with 93 additions and 8 deletions
|
@ -69,7 +69,7 @@ func (ps *partSearch) Init(p *part, tsids []TSID, tr TimeRange) {
|
|||
if isInTest && !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) {
|
||||
logger.Panicf("BUG: tsids must be sorted; got %+v", tsids)
|
||||
}
|
||||
// take ownership of of tsids.
|
||||
// take ownership of tsids.
|
||||
ps.tsids = tsids
|
||||
}
|
||||
ps.tr = tr
|
||||
|
@ -120,14 +120,38 @@ func (ps *partSearch) nextTSID() bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func (ps *partSearch) skipTSIDsSmallerThan(tsid *TSID) bool {
|
||||
if !ps.BlockRef.bh.TSID.Less(tsid) {
|
||||
return true
|
||||
}
|
||||
if !ps.nextTSID() {
|
||||
return false
|
||||
}
|
||||
if !ps.BlockRef.bh.TSID.Less(tsid) {
|
||||
// Fast path: the next TSID isn't smaller than the tsid.
|
||||
return true
|
||||
}
|
||||
|
||||
// Slower path - binary search for the next TSID, which isn't smaller than the tsid.
|
||||
tsids := ps.tsids[ps.tsidIdx:]
|
||||
ps.tsidIdx += sort.Search(len(tsids), func(i int) bool {
|
||||
return !tsids[i].Less(tsid)
|
||||
})
|
||||
if ps.tsidIdx >= len(ps.tsids) {
|
||||
ps.tsidIdx = len(ps.tsids)
|
||||
ps.err = io.EOF
|
||||
return false
|
||||
}
|
||||
ps.BlockRef.bh.TSID = ps.tsids[ps.tsidIdx]
|
||||
ps.tsidIdx++
|
||||
return true
|
||||
}
|
||||
|
||||
func (ps *partSearch) nextBHS() bool {
|
||||
for len(ps.metaindex) > 0 {
|
||||
// Optimization: skip tsid values smaller than the minimum value
|
||||
// from ps.metaindex.
|
||||
for ps.BlockRef.bh.TSID.Less(&ps.metaindex[0].TSID) {
|
||||
if !ps.nextTSID() {
|
||||
return false
|
||||
}
|
||||
// Optimization: skip tsid values smaller than the minimum value from ps.metaindex.
|
||||
if !ps.skipTSIDsSmallerThan(&ps.metaindex[0].TSID) {
|
||||
return false
|
||||
}
|
||||
// Invariant: ps.BlockRef.bh.TSID >= ps.metaindex[0].TSID
|
||||
|
||||
|
@ -247,7 +271,7 @@ func (ps *partSearch) searchBHS() bool {
|
|||
if bh.TSID.MetricID != tsid.MetricID {
|
||||
// tsid < bh.TSID: no more blocks with the given tsid.
|
||||
// Proceed to the next (bigger) tsid.
|
||||
if !ps.nextTSID() {
|
||||
if !ps.skipTSIDsSmallerThan(&bh.TSID) {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
|
|
61
lib/storage/part_search_timing_test.go
Normal file
61
lib/storage/part_search_timing_test.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkPartSearch(b *testing.B) {
|
||||
for _, sparseness := range []int{1, 2, 10, 100} {
|
||||
b.Run(fmt.Sprintf("sparseness-%d", sparseness), func(b *testing.B) {
|
||||
benchmarkPartSearchWithSparseness(b, sparseness)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkPartSearchWithSparseness(b *testing.B, sparseness int) {
|
||||
blocksCount := 100000
|
||||
rows := make([]rawRow, blocksCount)
|
||||
for i := 0; i < blocksCount; i++ {
|
||||
r := &rows[i]
|
||||
r.PrecisionBits = defaultPrecisionBits
|
||||
r.TSID.MetricID = uint64(i * sparseness)
|
||||
r.Timestamp = int64(i) * 1000
|
||||
r.Value = float64(i)
|
||||
}
|
||||
tr := TimeRange{
|
||||
MinTimestamp: rows[0].Timestamp,
|
||||
MaxTimestamp: rows[len(rows)-1].Timestamp,
|
||||
}
|
||||
p := newTestPart(rows)
|
||||
for _, tsidsCount := range []int{100, 1000, 10000, 100000} {
|
||||
b.Run(fmt.Sprintf("tsids-%d", tsidsCount), func(b *testing.B) {
|
||||
tsids := make([]TSID, tsidsCount)
|
||||
for i := 0; i < tsidsCount; i++ {
|
||||
tsids[i].MetricID = uint64(i)
|
||||
}
|
||||
benchmarkPartSearch(b, p, tsids, tr, sparseness)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkPartSearch(b *testing.B, p *part, tsids []TSID, tr TimeRange, sparseness int) {
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var ps partSearch
|
||||
for pb.Next() {
|
||||
blocksRead := 0
|
||||
ps.Init(p, tsids, tr)
|
||||
for ps.NextBlock() {
|
||||
blocksRead++
|
||||
}
|
||||
if err := ps.Error(); err != nil {
|
||||
panic(fmt.Errorf("BUG: unexpected error: %s", err))
|
||||
}
|
||||
blocksWant := len(tsids) / sparseness
|
||||
if blocksRead != blocksWant {
|
||||
panic(fmt.Errorf("BUG: unexpected blocks read; got %d; want %d", blocksRead, blocksWant))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
Loading…
Reference in a new issue