lib/storage: restore ability to put empty metric ID list into tagFiltersToMetricIDsCache (#7064)

### Describe Your Changes

Currently it the metricID list is empty it won't be mashalled and as the
result won't be put into the tagFiltersToMetricIDsCache which causes the
cache misses for the corresponding tagFilters. In some setups this
causes severe search speed detradation (see #7009).

The empty metric IDs was covered before but then was accidentally
removed in 6c21439.

This PR restores the coverage of this case.

A new unit test can be used as a proof that empty metricID lists are not
added to the cache (just remove the fix in index_db.go and run the test
to see the result)

Also a benchmark has been added to see the implications of the
compression.

```
user@laptop:~/p/github.com/rtm0/VictoriaMetrics/01/src$ go test ./lib/storage/ -run=NONE -bench BenchmarkMarshalUnmarshalMetricIDs --loggerLevel=ERROR
goos: linux
goarch: amd64
pkg: github.com/VictoriaMetrics/VictoriaMetrics/lib/storage
cpu: 13th Gen Intel(R) Core(TM) i7-1355U
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-0-12             3237240               363.5 ns/op               0 compression-rate
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-1-12             2831049               451.8 ns/op               0.4706 compression-rate
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-10-12            1152764              1009 ns/op                 1.667 compression-rate
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-100-12            297055              3998 ns/op                 5.755 compression-rate
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-1000-12            31172             34566 ns/op                 8.484 compression-rate
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-10000-12            4900            289659 ns/op                 9.416 compression-rate
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-100000-12            447           2341173 ns/op                 9.456 compression-rate
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-1000000-12            42          24926928 ns/op                 9.468 compression-rate
BenchmarkMarshalUnmarshalMetricIDs/numMetricIDs-10000000-12            5         204098872 ns/op                 9.467 compression-rate
PASS
ok      github.com/VictoriaMetrics/VictoriaMetrics/lib/storage  15.018s
```

### Checklist

The following checks are **mandatory**:

- [x] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: Artem Fetishev <wwctrsrx@gmail.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
Artem Fetishev 2024-09-20 17:21:53 +02:00 committed by GitHub
parent a2ecba4154
commit 55febc0920
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 98 additions and 4 deletions

View file

@ -436,15 +436,34 @@ func invalidateTagFiltersCache() {
var tagFiltersKeyGen atomic.Uint64 var tagFiltersKeyGen atomic.Uint64
func marshalMetricIDs(dst []byte, metricIDs []uint64) []byte { func marshalMetricIDs(dst []byte, metricIDs []uint64) []byte {
if len(metricIDs) == 0 {
// Add one zero byte to indicate an empty metricID list and skip
// compression to save CPU cycles.
//
// An empty slice passed to ztsd won't be compressed and therefore
// nothing will be added to dst and if dst is empty the record won't be
// added to the cache. As the result, the search for a given filter will
// be performed again and again. This may lead to cases like this:
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7009
return append(dst, 0)
}
// Compress metricIDs, so they occupy less space in the cache. // Compress metricIDs, so they occupy less space in the cache.
// //
// The srcBuf is a []byte cast of metricIDs. // The srcBuf is a []byte cast of metricIDs.
srcBuf := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(metricIDs))), 8*len(metricIDs)) srcBuf := unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(metricIDs))), 8*len(metricIDs))
dst = encoding.CompressZSTDLevel(dst, srcBuf, 1) dst = encoding.CompressZSTDLevel(dst, srcBuf, 1)
return dst return dst
} }
func mustUnmarshalMetricIDs(dst []uint64, src []byte) []uint64 { func mustUnmarshalMetricIDs(dst []uint64, src []byte) []uint64 {
if len(src) == 1 && src[0] == 0 {
// One zero byte indicates an empty metricID list.
// See marshalMetricIDs().
return dst
}
// Decompress src into dstBuf. // Decompress src into dstBuf.
// //
// dstBuf is a []byte cast of dst. // dstBuf is a []byte cast of dst.
@ -456,10 +475,6 @@ func mustUnmarshalMetricIDs(dst []uint64, src []byte) []uint64 {
if err != nil { if err != nil {
logger.Panicf("FATAL: cannot decompress metricIDs: %s", err) logger.Panicf("FATAL: cannot decompress metricIDs: %s", err)
} }
if len(dstBuf) == dstBufLen {
// Zero metricIDs
return dst
}
if (len(dstBuf)-dstBufLen)%8 != 0 { if (len(dstBuf)-dstBufLen)%8 != 0 {
logger.Panicf("FATAL: cannot unmarshal metricIDs from buffer of %d bytes; the buffer length must divide by 8", len(dstBuf)-dstBufLen) logger.Panicf("FATAL: cannot unmarshal metricIDs from buffer of %d bytes; the buffer length must divide by 8", len(dstBuf)-dstBufLen)
} }

View file

@ -64,6 +64,54 @@ func TestMarshalUnmarshalMetricIDs(t *testing.T) {
f([]uint64{1, 2, 3, 4, 5, 6, 8989898, 823849234, 1<<64 - 1, 1<<32 - 1, 0}) f([]uint64{1, 2, 3, 4, 5, 6, 8989898, 823849234, 1<<64 - 1, 1<<32 - 1, 0})
} }
func TestTagFiltersToMetricIDsCache(t *testing.T) {
f := func(want []uint64) {
t.Helper()
path := t.Name()
defer fs.MustRemoveAll(path)
s := MustOpenStorage(path, 0, 0, 0)
defer s.MustClose()
idb := s.idb()
key := []byte("key")
idb.putMetricIDsToTagFiltersCache(nil, want, key)
got, ok := idb.getMetricIDsFromTagFiltersCache(nil, key)
if !ok {
t.Fatalf("expected metricIDs to be found in cache but they weren't: %v", want)
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("unexpected metricIDs in cache: got %v, want %v", got, want)
}
}
f([]uint64{0})
f([]uint64{1})
f([]uint64{1234, 678932943, 843289893843})
f([]uint64{1, 2, 3, 4, 5, 6, 8989898, 823849234, 1<<64 - 1, 1<<32 - 1, 0})
}
func TestTagFiltersToMetricIDsCache_EmptyMetricIDList(t *testing.T) {
path := t.Name()
defer fs.MustRemoveAll(path)
s := MustOpenStorage(path, 0, 0, 0)
defer s.MustClose()
idb := s.idb()
key := []byte("key")
emptyMetricIDs := []uint64(nil)
idb.putMetricIDsToTagFiltersCache(nil, emptyMetricIDs, key)
got, ok := idb.getMetricIDsFromTagFiltersCache(nil, key)
if !ok {
t.Fatalf("expected empty metricID list to be found in cache but it wasn't")
}
if len(got) > 0 {
t.Fatalf("unexpected found metricID list to be empty but got %v", got)
}
}
func TestMergeSortedMetricIDs(t *testing.T) { func TestMergeSortedMetricIDs(t *testing.T) {
f := func(a, b []uint64) { f := func(a, b []uint64) {
t.Helper() t.Helper()

View file

@ -2,6 +2,7 @@ package storage
import ( import (
"fmt" "fmt"
"math/rand"
"regexp" "regexp"
"strconv" "strconv"
"testing" "testing"
@ -314,3 +315,33 @@ func BenchmarkIndexDBGetTSIDs(b *testing.B) {
s.MustClose() s.MustClose()
fs.MustRemoveAll(path) fs.MustRemoveAll(path)
} }
func BenchmarkMarshalUnmarshalMetricIDs(b *testing.B) {
rng := rand.New(rand.NewSource(1))
f := func(b *testing.B, numMetricIDs int) {
metricIDs := make([]uint64, numMetricIDs)
// metric IDs need to be sorted.
ts := uint64(time.Now().UnixNano())
for i := range numMetricIDs {
metricIDs[i] = ts + uint64(rng.Intn(100))
}
var marshalledLen int
b.ResetTimer()
for range b.N {
marshalled := marshalMetricIDs(nil, metricIDs)
marshalledLen = len(marshalled)
_ = mustUnmarshalMetricIDs(nil, marshalled)
}
b.StopTimer()
compressionRate := float64(numMetricIDs*8) / float64(marshalledLen)
b.ReportMetric(compressionRate, "compression-rate")
}
for _, n := range []int{0, 1, 10, 100, 1e3, 1e4, 1e5, 1e6, 1e7} {
b.Run(fmt.Sprintf("numMetricIDs-%d", n), func(b *testing.B) {
f(b, n)
})
}
}