From fbeebe4869892a9dd3c684da5fbaa0a6c511a651 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 14 Dec 2022 22:04:25 -0800 Subject: [PATCH] lib/storage: skip missing tsids in the current block header by using binary search This improves performance by up to 10x when big number of the requested TSIDs are missing in the searched parts. This should help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425 --- lib/storage/part_search.go | 40 +++++++++++++---- lib/storage/part_search_timing_test.go | 61 ++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 lib/storage/part_search_timing_test.go diff --git a/lib/storage/part_search.go b/lib/storage/part_search.go index 88e92c501b..572b45553b 100644 --- a/lib/storage/part_search.go +++ b/lib/storage/part_search.go @@ -69,7 +69,7 @@ func (ps *partSearch) Init(p *part, tsids []TSID, tr TimeRange) { if isInTest && !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) { logger.Panicf("BUG: tsids must be sorted; got %+v", tsids) } - // take ownership of of tsids. + // take ownership of tsids. ps.tsids = tsids } ps.tr = tr @@ -120,14 +120,38 @@ func (ps *partSearch) nextTSID() bool { return true } +func (ps *partSearch) skipTSIDsSmallerThan(tsid *TSID) bool { + if !ps.BlockRef.bh.TSID.Less(tsid) { + return true + } + if !ps.nextTSID() { + return false + } + if !ps.BlockRef.bh.TSID.Less(tsid) { + // Fast path: the next TSID isn't smaller than the tsid. + return true + } + + // Slower path - binary search for the next TSID, which isn't smaller than the tsid. + tsids := ps.tsids[ps.tsidIdx:] + ps.tsidIdx += sort.Search(len(tsids), func(i int) bool { + return !tsids[i].Less(tsid) + }) + if ps.tsidIdx >= len(ps.tsids) { + ps.tsidIdx = len(ps.tsids) + ps.err = io.EOF + return false + } + ps.BlockRef.bh.TSID = ps.tsids[ps.tsidIdx] + ps.tsidIdx++ + return true +} + func (ps *partSearch) nextBHS() bool { for len(ps.metaindex) > 0 { - // Optimization: skip tsid values smaller than the minimum value - // from ps.metaindex. - for ps.BlockRef.bh.TSID.Less(&ps.metaindex[0].TSID) { - if !ps.nextTSID() { - return false - } + // Optimization: skip tsid values smaller than the minimum value from ps.metaindex. + if !ps.skipTSIDsSmallerThan(&ps.metaindex[0].TSID) { + return false } // Invariant: ps.BlockRef.bh.TSID >= ps.metaindex[0].TSID @@ -247,7 +271,7 @@ func (ps *partSearch) searchBHS() bool { if bh.TSID.MetricID != tsid.MetricID { // tsid < bh.TSID: no more blocks with the given tsid. // Proceed to the next (bigger) tsid. - if !ps.nextTSID() { + if !ps.skipTSIDsSmallerThan(&bh.TSID) { return false } continue diff --git a/lib/storage/part_search_timing_test.go b/lib/storage/part_search_timing_test.go new file mode 100644 index 0000000000..cacd73bd70 --- /dev/null +++ b/lib/storage/part_search_timing_test.go @@ -0,0 +1,61 @@ +package storage + +import ( + "fmt" + "testing" +) + +func BenchmarkPartSearch(b *testing.B) { + for _, sparseness := range []int{1, 2, 10, 100} { + b.Run(fmt.Sprintf("sparseness-%d", sparseness), func(b *testing.B) { + benchmarkPartSearchWithSparseness(b, sparseness) + }) + } +} + +func benchmarkPartSearchWithSparseness(b *testing.B, sparseness int) { + blocksCount := 100000 + rows := make([]rawRow, blocksCount) + for i := 0; i < blocksCount; i++ { + r := &rows[i] + r.PrecisionBits = defaultPrecisionBits + r.TSID.MetricID = uint64(i * sparseness) + r.Timestamp = int64(i) * 1000 + r.Value = float64(i) + } + tr := TimeRange{ + MinTimestamp: rows[0].Timestamp, + MaxTimestamp: rows[len(rows)-1].Timestamp, + } + p := newTestPart(rows) + for _, tsidsCount := range []int{100, 1000, 10000, 100000} { + b.Run(fmt.Sprintf("tsids-%d", tsidsCount), func(b *testing.B) { + tsids := make([]TSID, tsidsCount) + for i := 0; i < tsidsCount; i++ { + tsids[i].MetricID = uint64(i) + } + benchmarkPartSearch(b, p, tsids, tr, sparseness) + }) + } +} + +func benchmarkPartSearch(b *testing.B, p *part, tsids []TSID, tr TimeRange, sparseness int) { + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + var ps partSearch + for pb.Next() { + blocksRead := 0 + ps.Init(p, tsids, tr) + for ps.NextBlock() { + blocksRead++ + } + if err := ps.Error(); err != nil { + panic(fmt.Errorf("BUG: unexpected error: %s", err)) + } + blocksWant := len(tsids) / sparseness + if blocksRead != blocksWant { + panic(fmt.Errorf("BUG: unexpected blocks read; got %d; want %d", blocksRead, blocksWant)) + } + } + }) +}