mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/storage: skip missing tsids in the current block header by using binary search
This improves performance by up to 10x when big number of the requested TSIDs are missing in the searched parts. This should help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
This commit is contained in:
parent
4de9d35458
commit
ad8852759d
2 changed files with 93 additions and 8 deletions
|
@ -69,7 +69,7 @@ func (ps *partSearch) Init(p *part, tsids []TSID, tr TimeRange) {
|
|||
if isInTest && !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) {
|
||||
logger.Panicf("BUG: tsids must be sorted; got %+v", tsids)
|
||||
}
|
||||
// take ownership of of tsids.
|
||||
// take ownership of tsids.
|
||||
ps.tsids = tsids
|
||||
}
|
||||
ps.tr = tr
|
||||
|
@ -120,14 +120,38 @@ func (ps *partSearch) nextTSID() bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func (ps *partSearch) skipTSIDsSmallerThan(tsid *TSID) bool {
|
||||
if !ps.BlockRef.bh.TSID.Less(tsid) {
|
||||
return true
|
||||
}
|
||||
if !ps.nextTSID() {
|
||||
return false
|
||||
}
|
||||
if !ps.BlockRef.bh.TSID.Less(tsid) {
|
||||
// Fast path: the next TSID isn't smaller than the tsid.
|
||||
return true
|
||||
}
|
||||
|
||||
// Slower path - binary search for the next TSID, which isn't smaller than the tsid.
|
||||
tsids := ps.tsids[ps.tsidIdx:]
|
||||
ps.tsidIdx += sort.Search(len(tsids), func(i int) bool {
|
||||
return !tsids[i].Less(tsid)
|
||||
})
|
||||
if ps.tsidIdx >= len(ps.tsids) {
|
||||
ps.tsidIdx = len(ps.tsids)
|
||||
ps.err = io.EOF
|
||||
return false
|
||||
}
|
||||
ps.BlockRef.bh.TSID = ps.tsids[ps.tsidIdx]
|
||||
ps.tsidIdx++
|
||||
return true
|
||||
}
|
||||
|
||||
func (ps *partSearch) nextBHS() bool {
|
||||
for len(ps.metaindex) > 0 {
|
||||
// Optimization: skip tsid values smaller than the minimum value
|
||||
// from ps.metaindex.
|
||||
for ps.BlockRef.bh.TSID.Less(&ps.metaindex[0].TSID) {
|
||||
if !ps.nextTSID() {
|
||||
return false
|
||||
}
|
||||
// Optimization: skip tsid values smaller than the minimum value from ps.metaindex.
|
||||
if !ps.skipTSIDsSmallerThan(&ps.metaindex[0].TSID) {
|
||||
return false
|
||||
}
|
||||
// Invariant: ps.BlockRef.bh.TSID >= ps.metaindex[0].TSID
|
||||
|
||||
|
@ -247,7 +271,7 @@ func (ps *partSearch) searchBHS() bool {
|
|||
if bh.TSID.MetricID != tsid.MetricID {
|
||||
// tsid < bh.TSID: no more blocks with the given tsid.
|
||||
// Proceed to the next (bigger) tsid.
|
||||
if !ps.nextTSID() {
|
||||
if !ps.skipTSIDsSmallerThan(&bh.TSID) {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
|
|
61
lib/storage/part_search_timing_test.go
Normal file
61
lib/storage/part_search_timing_test.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkPartSearch(b *testing.B) {
|
||||
for _, sparseness := range []int{1, 2, 10, 100} {
|
||||
b.Run(fmt.Sprintf("sparseness-%d", sparseness), func(b *testing.B) {
|
||||
benchmarkPartSearchWithSparseness(b, sparseness)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkPartSearchWithSparseness(b *testing.B, sparseness int) {
|
||||
blocksCount := 100000
|
||||
rows := make([]rawRow, blocksCount)
|
||||
for i := 0; i < blocksCount; i++ {
|
||||
r := &rows[i]
|
||||
r.PrecisionBits = defaultPrecisionBits
|
||||
r.TSID.MetricID = uint64(i * sparseness)
|
||||
r.Timestamp = int64(i) * 1000
|
||||
r.Value = float64(i)
|
||||
}
|
||||
tr := TimeRange{
|
||||
MinTimestamp: rows[0].Timestamp,
|
||||
MaxTimestamp: rows[len(rows)-1].Timestamp,
|
||||
}
|
||||
p := newTestPart(rows)
|
||||
for _, tsidsCount := range []int{100, 1000, 10000, 100000} {
|
||||
b.Run(fmt.Sprintf("tsids-%d", tsidsCount), func(b *testing.B) {
|
||||
tsids := make([]TSID, tsidsCount)
|
||||
for i := 0; i < tsidsCount; i++ {
|
||||
tsids[i].MetricID = uint64(i)
|
||||
}
|
||||
benchmarkPartSearch(b, p, tsids, tr, sparseness)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkPartSearch(b *testing.B, p *part, tsids []TSID, tr TimeRange, sparseness int) {
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var ps partSearch
|
||||
for pb.Next() {
|
||||
blocksRead := 0
|
||||
ps.Init(p, tsids, tr)
|
||||
for ps.NextBlock() {
|
||||
blocksRead++
|
||||
}
|
||||
if err := ps.Error(); err != nil {
|
||||
panic(fmt.Errorf("BUG: unexpected error: %s", err))
|
||||
}
|
||||
blocksWant := len(tsids) / sparseness
|
||||
if blocksRead != blocksWant {
|
||||
panic(fmt.Errorf("BUG: unexpected blocks read; got %d; want %d", blocksRead, blocksWant))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
Loading…
Reference in a new issue