VictoriaMetrics/lib/mergeset/part.go
Zakhar Bessarab 837d0d136d
lib/mergeset: add sparse indexdb cache (#7269)
Related issue:
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7182

- add a separate index cache for searches which might read through large
amounts of random entries. Primary use-case for this is retention and
downsampling filters, when applying filters background merge needs to
fetch large amount of random entries which pollutes an index cache.
Using different caches allows to reduce effect on memory usage and cache
efficiency of the main cache while still having high cache hit rate. A
separate cache size is 5% of allowed memory.

- reduce size of indexdb/dataBlocks cache in order to free memory for
new sparse cache. Reduced size by 5% and moved this to a separate cache.

- add a separate metricName search which does not cache metric names -
this is needed in order to allow disabling metric name caching when
applying downsampling/retention filters. Applying filters during
background merge accesses random entries, this fills up cache and does
not provide an actual improvement due to random access nature.


Merge performance and memory usage stats before and after the change:

- before

![image](https://github.com/user-attachments/assets/485fffbb-c225-47ae-b5c5-bc8a7c57b36e)


- after

![image](https://github.com/user-attachments/assets/f4ba3440-7c1c-4ec1-bc54-4d2ab431eef5)

---------

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
2024-10-24 15:21:17 +02:00

156 lines
3.9 KiB
Go

package mergeset
import (
"path/filepath"
"sync"
"unsafe"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/blockcache"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
)
var idxbCache = blockcache.NewCache(getMaxIndexBlocksCacheSize)
var ibCache = blockcache.NewCache(getMaxInmemoryBlocksCacheSize)
var ibSparseCache = blockcache.NewCache(getMaxInmemoryBlocksSparseCacheSize)
// SetIndexBlocksCacheSize overrides the default size of indexdb/indexBlocks cache
func SetIndexBlocksCacheSize(size int) {
maxIndexBlockCacheSize = size
}
func getMaxIndexBlocksCacheSize() int {
maxIndexBlockCacheSizeOnce.Do(func() {
if maxIndexBlockCacheSize <= 0 {
maxIndexBlockCacheSize = int(0.10 * float64(memory.Allowed()))
}
})
return maxIndexBlockCacheSize
}
var (
maxIndexBlockCacheSize int
maxIndexBlockCacheSizeOnce sync.Once
)
// SetDataBlocksCacheSize overrides the default size of indexdb/dataBlocks cache
func SetDataBlocksCacheSize(size int) {
maxInmemoryBlockCacheSize = size
}
func getMaxInmemoryBlocksCacheSize() int {
maxInmemoryBlockCacheSizeOnce.Do(func() {
if maxInmemoryBlockCacheSize <= 0 {
maxInmemoryBlockCacheSize = int(0.20 * float64(memory.Allowed()))
}
})
return maxInmemoryBlockCacheSize
}
// SetDataBlocksSparseCacheSize overrides the default size of indexdb/dataBlocksSparse cache
func SetDataBlocksSparseCacheSize(size int) {
maxInmemorySparseMergeCacheSize = size
}
func getMaxInmemoryBlocksSparseCacheSize() int {
maxInmemoryBlockSparseCacheSizeOnce.Do(func() {
if maxInmemorySparseMergeCacheSize <= 0 {
maxInmemorySparseMergeCacheSize = int(0.05 * float64(memory.Allowed()))
}
})
return maxInmemorySparseMergeCacheSize
}
var (
maxInmemoryBlockCacheSize int
maxInmemoryBlockCacheSizeOnce sync.Once
maxInmemorySparseMergeCacheSize int
maxInmemoryBlockSparseCacheSizeOnce sync.Once
)
type part struct {
ph partHeader
path string
size uint64
mrs []metaindexRow
indexFile fs.MustReadAtCloser
itemsFile fs.MustReadAtCloser
lensFile fs.MustReadAtCloser
}
func mustOpenFilePart(path string) *part {
var ph partHeader
ph.MustReadMetadata(path)
metaindexPath := filepath.Join(path, metaindexFilename)
metaindexFile := filestream.MustOpen(metaindexPath, true)
metaindexSize := fs.MustFileSize(metaindexPath)
indexPath := filepath.Join(path, indexFilename)
indexFile := fs.MustOpenReaderAt(indexPath)
indexSize := fs.MustFileSize(indexPath)
itemsPath := filepath.Join(path, itemsFilename)
itemsFile := fs.MustOpenReaderAt(itemsPath)
itemsSize := fs.MustFileSize(itemsPath)
lensPath := filepath.Join(path, lensFilename)
lensFile := fs.MustOpenReaderAt(lensPath)
lensSize := fs.MustFileSize(lensPath)
size := metaindexSize + indexSize + itemsSize + lensSize
return newPart(&ph, path, size, metaindexFile, indexFile, itemsFile, lensFile)
}
func newPart(ph *partHeader, path string, size uint64, metaindexReader filestream.ReadCloser, indexFile, itemsFile, lensFile fs.MustReadAtCloser) *part {
mrs, err := unmarshalMetaindexRows(nil, metaindexReader)
if err != nil {
logger.Panicf("FATAL: cannot unmarshal metaindexRows from %q: %s", path, err)
}
metaindexReader.MustClose()
var p part
p.path = path
p.size = size
p.mrs = mrs
p.indexFile = indexFile
p.itemsFile = itemsFile
p.lensFile = lensFile
p.ph.CopyFrom(ph)
return &p
}
func (p *part) MustClose() {
p.indexFile.MustClose()
p.itemsFile.MustClose()
p.lensFile.MustClose()
idxbCache.RemoveBlocksForPart(p)
ibCache.RemoveBlocksForPart(p)
ibSparseCache.RemoveBlocksForPart(p)
}
type indexBlock struct {
bhs []blockHeader
// The buffer for holding the data referrred by bhs
buf []byte
}
func (idxb *indexBlock) SizeBytes() int {
bhs := idxb.bhs[:cap(idxb.bhs)]
n := int(unsafe.Sizeof(*idxb))
for i := range bhs {
n += bhs[i].SizeBytes()
}
return n
}