lib/mergeset: unconditionally cache indexdb blocks

Production workloads show that indexdb blocks must be cached unconditionally for reducing CPU usage. This shouldn't increase memory usage too much, since unused blocks are removed from the cache every two minutes.
2025-03-11 15:34:56 +00:00 · 2021-02-09 00:43:19 +02:00 · 2021-02-09 00:43:19 +02:00 · a4140de9e6
commit a4140de9e6
parent cb96a1865b
8 changed files with 11 additions and 46 deletions
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -9,6 +9,7 @@

 * BUGFIX: do not spam error logs when discovering Docker Swarm targets without dedicated IP. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1028 .
 * BUGFIX: properly install timezone data inside base Docker image. This should fix `-loggerTimezone` usage inside Docker containers.
+* BUGFIX: reduce CPU usage when performing queries with individual filters matching big number of time series.


 # [v1.53.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.53.1)
--- a/lib/mergeset/part_search.go
+++ b/lib/mergeset/part_search.go
@ -28,8 +28,6 @@ type partSearch struct {
 	// Pointer to inmemory block, which may be reused.
 	inmemoryBlockReuse *inmemoryBlock

-	shouldCacheBlock func(item []byte) bool
-
 	idxbCache *indexBlockCache
 	ibCache   *inmemoryBlockCache

@ -54,7 +52,6 @@ func (ps *partSearch) reset() {
 		putInmemoryBlock(ps.inmemoryBlockReuse)
 		ps.inmemoryBlockReuse = nil
 	}
-	ps.shouldCacheBlock = nil
 	ps.idxbCache = nil
 	ps.ibCache = nil
 	ps.err = nil
@ -71,13 +68,12 @@ func (ps *partSearch) reset() {
 // Init initializes ps for search in the p.
 //
 // Use Seek for search in p.
-func (ps *partSearch) Init(p *part, shouldCacheBlock func(item []byte) bool) {
+func (ps *partSearch) Init(p *part) {
 	ps.reset()

 	ps.p = p
 	ps.idxbCache = p.idxbCache
 	ps.ibCache = p.ibCache
-	ps.shouldCacheBlock = shouldCacheBlock
 }

 // Seek seeks for the first item greater or equal to k in ps.
@ -306,16 +302,6 @@ func (ps *partSearch) readIndexBlock(mr *metaindexRow) (*indexBlock, error) {
 }

 func (ps *partSearch) getInmemoryBlock(bh *blockHeader) (*inmemoryBlock, bool, error) {
-	if ps.shouldCacheBlock != nil {
-		if !ps.shouldCacheBlock(bh.firstItem) {
-			ib, err := ps.readInmemoryBlock(bh)
-			if err != nil {
-				return nil, false, err
-			}
-			return ib, true, nil
-		}
-	}
-
 	var ibKey inmemoryBlockCacheKey
 	ibKey.Init(bh)
 	ib := ps.ibCache.Get(ibKey)
--- a/lib/mergeset/part_search_test.go
+++ b/lib/mergeset/part_search_test.go
@ -51,7 +51,7 @@ func testPartSearchConcurrent(p *part, items []string) error {
 func testPartSearchSerial(p *part, items []string) error {
 	var ps partSearch

-	ps.Init(p, nil)
+	ps.Init(p)
 	var k []byte

 	// Search for the item smaller than the items[0]
--- a/lib/mergeset/table_search.go
+++ b/lib/mergeset/table_search.go
@ -58,7 +58,7 @@ func (ts *TableSearch) reset() {
 // Init initializes ts for searching in the tb.
 //
 // MustClose must be called when the ts is no longer needed.
-func (ts *TableSearch) Init(tb *Table, shouldCacheBlock func(item []byte) bool) {
+func (ts *TableSearch) Init(tb *Table) {
 	if ts.needClosing {
 		logger.Panicf("BUG: missing MustClose call before the next call to Init")
 	}
@ -76,7 +76,7 @@ func (ts *TableSearch) Init(tb *Table, shouldCacheBlock func(item []byte) bool)
 	}
 	ts.psPool = ts.psPool[:len(ts.pws)]
 	for i, pw := range ts.pws {
-		ts.psPool[i].Init(pw.p, shouldCacheBlock)
+		ts.psPool[i].Init(pw.p)
 	}
 }

--- a/lib/mergeset/table_search_test.go
+++ b/lib/mergeset/table_search_test.go
@ -109,7 +109,7 @@ func testTableSearchConcurrent(tb *Table, items []string) error {

 func testTableSearchSerial(tb *Table, items []string) error {
 	var ts TableSearch
-	ts.Init(tb, nil)
+	ts.Init(tb)
 	for _, key := range []string{
 		"",
 		"123",
--- a/lib/mergeset/table_search_timing_test.go
+++ b/lib/mergeset/table_search_timing_test.go
@ -81,7 +81,7 @@ func benchmarkTableSearchKeysExt(b *testing.B, tb *Table, keys [][]byte, stripSu
 	b.SetBytes(int64(searchKeysCount * rowsToScan))
 	b.RunParallel(func(pb *testing.PB) {
 		var ts TableSearch
-		ts.Init(tb, nil)
+		ts.Init(tb)
 		defer ts.MustClose()
 		for pb.Next() {
 			startIdx := rand.Intn(len(keys) - searchKeysCount)
--- a/lib/mergeset/table_test.go
+++ b/lib/mergeset/table_test.go
@ -176,10 +176,10 @@ func TestTableCreateSnapshotAt(t *testing.T) {
 	defer tb2.MustClose()

 	var ts, ts1, ts2 TableSearch
-	ts.Init(tb, nil)
-	ts1.Init(tb1, nil)
+	ts.Init(tb)
+	ts1.Init(tb1)
 	defer ts1.MustClose()
-	ts2.Init(tb2, nil)
+	ts2.Init(tb2)
 	defer ts2.MustClose()
 	for i := 0; i < itemsCount; i++ {
 		key := []byte(fmt.Sprintf("item %d", i))
--- a/lib/storage/index_db.go
+++ b/lib/storage/index_db.go
@ -48,28 +48,6 @@ const (
 	nsPrefixDateTagToMetricIDs = 6
 )

-func shouldCacheBlock(item []byte) bool {
-	if len(item) == 0 {
-		return true
-	}
-	// Do not cache items starting from
-	switch item[0] {
-	case nsPrefixTagToMetricIDs, nsPrefixDateTagToMetricIDs:
-		// Do not cache blocks with tag->metricIDs and (date,tag)->metricIDs items, since:
-		// - these blocks are scanned sequentially, so the overhead
-		//   on their unmarshaling is amortized by the sequential scan.
-		// - these blocks can occupy high amounts of RAM in cache
-		//   and evict other frequently accessed blocks.
-		return false
-	case nsPrefixDeletedMetricID:
-		// Do not cache blocks with deleted metricIDs,
-		// since these metricIDs are loaded only once during app start.
-		return false
-	default:
-		return true
-	}
-}
-
 // indexDB represents an index db.
 type indexDB struct {
 	// Atomic counters must go at the top of the structure in order to properly align by 8 bytes on 32-bit archs.
@ -549,7 +527,7 @@ func (db *indexDB) getIndexSearch(deadline uint64) *indexSearch {
 		}
 	}
 	is := v.(*indexSearch)
-	is.ts.Init(db.tb, shouldCacheBlock)
+	is.ts.Init(db.tb)
 	is.deadline = deadline
 	return is
 }