From b0131c79b6338647945bf9004101bcd1349901c8 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 27 May 2020 21:35:58 +0300 Subject: [PATCH] lib/storage: improve search speed for time series matching Graphite whildcards such as `foo.*.bar.baz` Add index for reverse Graphite-like metric names with dots. Use this index during search for filters like `__name__=~"foo\\.[^.]*\\.bar\\.baz"` which end with non-empty suffix with dots, i.e. `.bar.baz` in this case. This change may "hide" historical time series during queries. The workaround is to add `[.]*` to the end of regexp label filter, i.e. "foo\\.[^.]*\\.bar\\.baz" should be substituted with "foo\\.[^.]*\\.bar\\.baz[.]*". --- lib/storage/index_db.go | 34 ++++++++++++++ lib/storage/index_db_test.go | 16 ++++++- lib/storage/tag_filters.go | 79 +++++++++++++++++++++------------ lib/storage/tag_filters_test.go | 74 +++++++++++++++--------------- 4 files changed, 139 insertions(+), 64 deletions(-) diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index aa2f0a6b54..2b4e18bd8a 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -687,6 +687,7 @@ func (db *indexDB) createIndexes(tsid *TSID, mn *MetricName) error { items.B = marshalTagValue(items.B, mn.MetricGroup) items.B = encoding.MarshalUint64(items.B, tsid.MetricID) items.Next() + addReverseMetricGroupIfNeeded(items, commonPrefix.B, mn, tsid.MetricID) // For each tag create tag -> MetricID index. for i := range mn.Tags { @@ -2609,6 +2610,7 @@ func (is *indexSearch) storeDateMetricID(date, metricID uint64, accountID, proje items.B = marshalTagValue(items.B, mn.MetricGroup) items.B = encoding.MarshalUint64(items.B, metricID) items.Next() + addReverseMetricGroupIfNeeded(items, kb.B, mn, metricID) for i := range mn.Tags { tag := &mn.Tags[i] items.B = append(items.B, kb.B...) @@ -2622,6 +2624,38 @@ func (is *indexSearch) storeDateMetricID(date, metricID uint64, accountID, proje return nil } +func addReverseMetricGroupIfNeeded(items *indexItems, prefix []byte, mn *MetricName, metricID uint64) { + if bytes.IndexByte(mn.MetricGroup, '.') < 0 { + // The reverse metric group is needed only for Graphite-like metrics with points. + return + } + // This is most likely a Graphite metric like 'foo.bar.baz'. + // Store reverse metric name 'zab.rab.oof' in order to speed up search for '*.bar.baz' + // when the Graphite wildcard has a suffix matching small number of time series. + items.B = append(items.B, prefix...) + items.B = marshalTagValue(items.B, graphiteReverseTagKey) + revBuf := kbPool.Get() + revBuf.B = reverseBytes(revBuf.B[:0], mn.MetricGroup) + items.B = marshalTagValue(items.B, revBuf.B) + kbPool.Put(revBuf) + items.B = encoding.MarshalUint64(items.B, metricID) + items.Next() +} + +// The tag key for reverse metric name used for speeding up searching +// for Graphite wildcards with suffix matching small number of time series, +// i.e. '*.bar.baz'. +// +// It is expected that the given key isn't be used by users. +var graphiteReverseTagKey = []byte("\xff") + +func reverseBytes(dst, src []byte) []byte { + for i := len(src) - 1; i >= 0; i-- { + dst = append(dst, src[i]) + } + return dst +} + func (is *indexSearch) hasDateMetricID(date, metricID uint64, accountID, projectID uint32) (bool, error) { ts := &is.ts kb := &is.kb diff --git a/lib/storage/index_db_test.go b/lib/storage/index_db_test.go index 2783df6f39..02e87eaea7 100644 --- a/lib/storage/index_db_test.go +++ b/lib/storage/index_db_test.go @@ -17,6 +17,20 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache" ) +func TestReverseBytes(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + result := reverseBytes(nil, []byte(s)) + if string(result) != resultExpected { + t.Fatalf("unexpected result for reverseBytes(%q); got %q; want %q", s, result, resultExpected) + } + } + f("", "") + f("a", "a") + f("av", "va") + f("foo.bar", "rab.oof") +} + func TestMergeTagToMetricIDsRows(t *testing.T) { f := func(items []string, expectedItems []string) { t.Helper() @@ -679,7 +693,7 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount, mn.ProjectID = uint32((i + 1) % projectsCount) // Init MetricGroup. - mn.MetricGroup = []byte(fmt.Sprintf("metricGroup_%d\x00\x01\x02", i%metricGroups)) + mn.MetricGroup = []byte(fmt.Sprintf("metricGroup.%d\x00\x01\x02", i%metricGroups)) // Init other tags. tagsCount := rand.Intn(10) + 1 diff --git a/lib/storage/tag_filters.go b/lib/storage/tag_filters.go index 60e62cddd4..56126437ff 100644 --- a/lib/storage/tag_filters.go +++ b/lib/storage/tag_filters.go @@ -61,17 +61,26 @@ func (tfs *TagFilters) Add(key, value []byte, isNegative, isRegexp bool) error { // since it must filter out all the time series with the given key. } + tf := tfs.addTagFilter() + if err := tf.Init(tfs.commonPrefix, key, value, isNegative, isRegexp); err != nil { + return fmt.Errorf("cannot initialize tagFilter: %s", err) + } + if len(tf.graphiteReverseSuffix) > 0 { + tf = tfs.addTagFilter() + if err := tf.Init(tfs.commonPrefix, graphiteReverseTagKey, tf.graphiteReverseSuffix, false, false); err != nil { + return fmt.Errorf("cannot initialize reverse tag filter for Graphite wildcard: %s", err) + } + } + return nil +} + +func (tfs *TagFilters) addTagFilter() *tagFilter { if cap(tfs.tfs) > len(tfs.tfs) { tfs.tfs = tfs.tfs[:len(tfs.tfs)+1] } else { tfs.tfs = append(tfs.tfs, tagFilter{}) } - tf := &tfs.tfs[len(tfs.tfs)-1] - err := tf.Init(tfs.commonPrefix, key, value, isNegative, isRegexp) - if err != nil { - return fmt.Errorf("cannot initialize tagFilter: %s", err) - } - return nil + return &tfs.tfs[len(tfs.tfs)-1] } // Finalize finalizes tfs and may return complementary TagFilters, @@ -162,6 +171,10 @@ type tagFilter struct { // // Such a filter must be applied directly to metricNames. matchesEmptyValue bool + + // Contains reverse suffix for Graphite wildcard. + // I.e. for `{__name__=~"foo\\.[^.]*\\.bar\\.baz"}` the value will be `zab.rab.` + graphiteReverseSuffix []byte } func (tf *tagFilter) Less(other *tagFilter) bool { @@ -243,6 +256,7 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp tf.orSuffixes = tf.orSuffixes[:0] tf.reSuffixMatch = nil tf.matchesEmptyValue = false + tf.graphiteReverseSuffix = tf.graphiteReverseSuffix[:0] tf.prefix = append(tf.prefix, commonPrefix...) tf.prefix = marshalTagValue(tf.prefix, key) @@ -272,6 +286,10 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp if len(prefix) == 0 && !tf.isNegative && tf.reSuffixMatch(nil) { tf.matchesEmptyValue = true } + if !tf.isNegative && len(key) == 0 && strings.IndexByte(rcv.literalSuffix, '.') >= 0 { + // Reverse suffix is needed only for non-negative regexp filters on __name__ that contains dots. + tf.graphiteReverseSuffix = reverseBytes(tf.graphiteReverseSuffix[:0], []byte(rcv.literalSuffix)) + } return nil } @@ -331,6 +349,7 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) { sExpr := string(expr) orValues := getOrValues(sExpr) var reMatch func(b []byte) bool + var literalSuffix string if len(orValues) > 0 { if len(orValues) == 1 { v := orValues[0] @@ -348,12 +367,13 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) { } } } else { - reMatch = getOptimizedReMatchFunc(re.Match, sExpr) + reMatch, literalSuffix = getOptimizedReMatchFunc(re.Match, sExpr) } // Put the reMatch in the cache. rcv.orValues = orValues rcv.reMatch = reMatch + rcv.literalSuffix = literalSuffix regexpCacheLock.Lock() if overflow := len(regexpCacheMap) - getMaxRegexpCacheSize(); overflow > 0 { @@ -385,31 +405,33 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) { // '.+literal.+' // // It returns reMatch if it cannot find optimized function. -func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) func(b []byte) bool { +// +// It also returns literal suffix from the expr. +func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) (func(b []byte) bool, string) { sre, err := syntax.Parse(expr, syntax.Perl) if err != nil { logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err) } - if matchFunc := getOptimizedReMatchFuncExt(reMatch, sre); matchFunc != nil { + if matchFunc, literalSuffix := getOptimizedReMatchFuncExt(reMatch, sre); matchFunc != nil { // Found optimized function for matching the expr. - return matchFunc + return matchFunc, literalSuffix } // Fall back to un-optimized reMatch. - return reMatch + return reMatch, "" } -func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) func(b []byte) bool { +func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) (func(b []byte) bool, string) { if isDotStar(sre) { // '.*' return func(b []byte) bool { return true - } + }, "" } if isDotPlus(sre) { // '.+' return func(b []byte) bool { return len(b) > 0 - } + }, "" } switch sre.Op { case syntax.OpCapture: @@ -417,13 +439,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) return getOptimizedReMatchFuncExt(reMatch, sre.Sub[0]) case syntax.OpLiteral: if !isLiteral(sre) { - return nil + return nil, "" } s := string(sre.Rune) // Literal match return func(b []byte) bool { return string(b) == s - } + }, s case syntax.OpConcat: if len(sre.Sub) == 2 { if isLiteral(sre.Sub[0]) { @@ -432,13 +454,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // 'prefix.*' return func(b []byte) bool { return bytes.HasPrefix(b, prefix) - } + }, "" } if isDotPlus(sre.Sub[1]) { // 'prefix.+' return func(b []byte) bool { return len(b) > len(prefix) && bytes.HasPrefix(b, prefix) - } + }, "" } } if isLiteral(sre.Sub[1]) { @@ -447,13 +469,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // '.*suffix' return func(b []byte) bool { return bytes.HasSuffix(b, suffix) - } + }, string(suffix) } if isDotPlus(sre.Sub[0]) { // '.+suffix' return func(b []byte) bool { return len(b) > len(suffix) && bytes.HasSuffix(b[1:], suffix) - } + }, string(suffix) } } } @@ -464,13 +486,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // '.*middle.*' return func(b []byte) bool { return bytes.Contains(b, middle) - } + }, "" } if isDotPlus(sre.Sub[2]) { // '.*middle.+' return func(b []byte) bool { return len(b) > len(middle) && bytes.Contains(b[:len(b)-1], middle) - } + }, "" } } if isDotPlus(sre.Sub[0]) { @@ -478,13 +500,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // '.+middle.*' return func(b []byte) bool { return len(b) > len(middle) && bytes.Contains(b[1:], middle) - } + }, "" } if isDotPlus(sre.Sub[2]) { // '.+middle.+' return func(b []byte) bool { return len(b) > len(middle)+1 && bytes.Contains(b[1:len(b)-1], middle) - } + }, "" } } } @@ -518,9 +540,9 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) } // Fall back to slow path. return reMatch(bOrig) - } + }, string(suffix) default: - return nil + return nil, "" } } @@ -696,8 +718,9 @@ var ( ) type regexpCacheValue struct { - orValues []string - reMatch func(b []byte) bool + orValues []string + reMatch func(b []byte) bool + literalSuffix string } func getRegexpPrefix(b []byte) ([]byte, []byte) { diff --git a/lib/storage/tag_filters_test.go b/lib/storage/tag_filters_test.go index 8ff9345814..94fb7e8c8c 100644 --- a/lib/storage/tag_filters_test.go +++ b/lib/storage/tag_filters_test.go @@ -21,7 +21,7 @@ func TestExtractRegexpPrefix(t *testing.T) { } func TestGetRegexpFromCache(t *testing.T) { - f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string) { + f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string, suffixExpected string) { t.Helper() for i := 0; i < 3; i++ { @@ -32,6 +32,9 @@ func TestGetRegexpFromCache(t *testing.T) { if !reflect.DeepEqual(rcv.orValues, orValuesExpected) { t.Fatalf("unexpected orValues for s=%q; got %q; want %q", s, rcv.orValues, orValuesExpected) } + if rcv.literalSuffix != suffixExpected { + t.Fatalf("unexpected literal suffix for s=%q; got %q; want %q", s, rcv.literalSuffix, suffixExpected) + } for _, expectedMatch := range expectedMatches { if !rcv.reMatch([]byte(expectedMatch)) { t.Fatalf("s=%q must match %q", s, expectedMatch) @@ -45,44 +48,45 @@ func TestGetRegexpFromCache(t *testing.T) { } } - f("", []string{""}, []string{""}, []string{"foo", "x"}) - f("foo", []string{"foo"}, []string{"foo"}, []string{"", "bar"}) - f("(?s)(foo)?", nil, []string{"foo", ""}, []string{"s", "bar"}) - f("foo.*", nil, []string{"foo", "foobar"}, []string{"xfoo", "xfoobar", "", "a"}) - f("foo(a|b)?", nil, []string{"fooa", "foob", "foo"}, []string{"xfoo", "xfoobar", "", "fooc", "fooba"}) - f(".*foo", nil, []string{"foo", "xfoo"}, []string{"foox", "xfoobar", "", "a"}) - f("(a|b)?foo", nil, []string{"foo", "afoo", "bfoo"}, []string{"foox", "xfoobar", "", "a"}) - f(".*foo.*", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"}) - f(".*foo.+", nil, []string{"foo1", "xfoodff", "foox", "xfoobar"}, []string{"", "bar", "foo", "fox"}) - f(".+foo.+", nil, []string{"xfoo1", "xfoodff", "xfoox", "xfoobar"}, []string{"", "bar", "foo", "foox", "xfoo"}) - f(".+foo.*", nil, []string{"xfoo", "xfoox", "xfoobar"}, []string{"", "bar", "foo", "fox"}) - f(".+foo(a|b)?", nil, []string{"xfoo", "xfooa", "xafoob"}, []string{"", "bar", "foo", "foob"}) - f(".*foo(a|b)?", nil, []string{"foo", "foob", "xafoo", "xfooa"}, []string{"", "bar", "fooba"}) - f("(a|b)?foo(a|b)?", nil, []string{"foo", "foob", "afoo", "afooa"}, []string{"", "bar", "fooba", "xfoo"}) - f("((.*)foo(.*))", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"}) - f(".+foo", nil, []string{"afoo", "bbfoo"}, []string{"foo", "foobar", "afoox", ""}) - f("a|b", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""}) - f("(a|b)", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""}) - f("(a|b)foo(c|d)", []string{"afooc", "afood", "bfooc", "bfood"}, []string{"afooc", "bfood"}, []string{"foo", "", "afoo", "fooc", "xfood"}) - f("foo.+", nil, []string{"foox", "foobar"}, []string{"foo", "afoox", "afoo", ""}) - f(".*foo.*bar", nil, []string{"foobar", "xfoobar", "xfooxbar", "fooxbar"}, []string{"", "foobarx", "afoobarx", "aaa"}) - f("foo.*bar", nil, []string{"foobar", "fooxbar"}, []string{"xfoobar", "", "foobarx", "aaa"}) - f("foo.*bar.*", nil, []string{"foobar", "fooxbar", "foobarx", "fooxbarx"}, []string{"", "afoobarx", "aaa", "afoobar"}) - f("foo.*bar.*baz", nil, []string{"foobarbaz", "fooxbarxbaz", "foobarxbaz", "fooxbarbaz"}, []string{"", "afoobarx", "aaa", "afoobar", "foobarzaz"}) - f(".+foo.+(b|c).+", nil, []string{"xfooxbar", "xfooxca"}, []string{"", "foo", "foob", "xfooc", "xfoodc"}) + f("", []string{""}, []string{""}, []string{"foo", "x"}, "") + f("foo", []string{"foo"}, []string{"foo"}, []string{"", "bar"}, "") + f("(?s)(foo)?", nil, []string{"foo", ""}, []string{"s", "bar"}, "") + f("foo.*", nil, []string{"foo", "foobar"}, []string{"xfoo", "xfoobar", "", "a"}, "") + f("foo(a|b)?", nil, []string{"fooa", "foob", "foo"}, []string{"xfoo", "xfoobar", "", "fooc", "fooba"}, "") + f(".*foo", nil, []string{"foo", "xfoo"}, []string{"foox", "xfoobar", "", "a"}, "foo") + f("(a|b)?foo", nil, []string{"foo", "afoo", "bfoo"}, []string{"foox", "xfoobar", "", "a"}, "foo") + f(".*foo.*", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"}, "") + f(".*foo.+", nil, []string{"foo1", "xfoodff", "foox", "xfoobar"}, []string{"", "bar", "foo", "fox"}, "") + f(".+foo.+", nil, []string{"xfoo1", "xfoodff", "xfoox", "xfoobar"}, []string{"", "bar", "foo", "foox", "xfoo"}, "") + f(".+foo.*", nil, []string{"xfoo", "xfoox", "xfoobar"}, []string{"", "bar", "foo", "fox"}, "") + f(".+foo(a|b)?", nil, []string{"xfoo", "xfooa", "xafoob"}, []string{"", "bar", "foo", "foob"}, "") + f(".*foo(a|b)?", nil, []string{"foo", "foob", "xafoo", "xfooa"}, []string{"", "bar", "fooba"}, "") + f("(a|b)?foo(a|b)?", nil, []string{"foo", "foob", "afoo", "afooa"}, []string{"", "bar", "fooba", "xfoo"}, "") + f("((.*)foo(.*))", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"}, "") + f(".+foo", nil, []string{"afoo", "bbfoo"}, []string{"foo", "foobar", "afoox", ""}, "foo") + f("a|b", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""}, "") + f("(a|b)", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""}, "") + f("(a|b)foo(c|d)", []string{"afooc", "afood", "bfooc", "bfood"}, []string{"afooc", "bfood"}, []string{"foo", "", "afoo", "fooc", "xfood"}, "") + f("foo.+", nil, []string{"foox", "foobar"}, []string{"foo", "afoox", "afoo", ""}, "") + f(".*foo.*bar", nil, []string{"foobar", "xfoobar", "xfooxbar", "fooxbar"}, []string{"", "foobarx", "afoobarx", "aaa"}, "bar") + f("foo.*bar", nil, []string{"foobar", "fooxbar"}, []string{"xfoobar", "", "foobarx", "aaa"}, "bar") + f("foo.*bar.*", nil, []string{"foobar", "fooxbar", "foobarx", "fooxbarx"}, []string{"", "afoobarx", "aaa", "afoobar"}, "") + f("foo.*bar.*baz", nil, []string{"foobarbaz", "fooxbarxbaz", "foobarxbaz", "fooxbarbaz"}, []string{"", "afoobarx", "aaa", "afoobar", "foobarzaz"}, "baz") + f(".+foo.+(b|c).+", nil, []string{"xfooxbar", "xfooxca"}, []string{"", "foo", "foob", "xfooc", "xfoodc"}, "") - f("(?i)foo", nil, []string{"foo", "Foo", "FOO"}, []string{"xfoo", "foobar", "xFOObar"}) - f("(?i).+foo", nil, []string{"xfoo", "aaFoo", "bArFOO"}, []string{"foosdf", "xFOObar"}) - f("(?i)(foo|bar)", nil, []string{"foo", "Foo", "BAR", "bAR"}, []string{"foobar", "xfoo", "xFOObAR"}) - f("(?i)foo.*bar", nil, []string{"foobar", "FooBAR", "FOOxxbaR"}, []string{"xfoobar", "foobarx", "xFOObarx"}) + f("(?i)foo", nil, []string{"foo", "Foo", "FOO"}, []string{"xfoo", "foobar", "xFOObar"}, "") + f("(?i).+foo", nil, []string{"xfoo", "aaFoo", "bArFOO"}, []string{"foosdf", "xFOObar"}, "") + f("(?i)(foo|bar)", nil, []string{"foo", "Foo", "BAR", "bAR"}, []string{"foobar", "xfoo", "xFOObAR"}, "") + f("(?i)foo.*bar", nil, []string{"foobar", "FooBAR", "FOOxxbaR"}, []string{"xfoobar", "foobarx", "xFOObarx"}, "") - f(".*", nil, []string{"", "a", "foo", "foobar"}, nil) - f("foo|.*", nil, []string{"", "a", "foo", "foobar"}, nil) - f(".+", nil, []string{"a", "foo"}, []string{""}) - f("(.+)*(foo)?", nil, []string{"a", "foo", ""}, nil) + f(".*", nil, []string{"", "a", "foo", "foobar"}, nil, "") + f("foo|.*", nil, []string{"", "a", "foo", "foobar"}, nil, "") + f(".+", nil, []string{"a", "foo"}, []string{""}, "") + f("(.+)*(foo)?", nil, []string{"a", "foo", ""}, nil, "") // Graphite-like regexps - f(`foo\.[^.]*\.bar\.ba(xx|zz)[^.]*\.a`, nil, []string{"foo.ss.bar.baxx.a", "foo.s.bar.bazzasd.a"}, []string{"", "foo", "foo.ss.xar.baxx.a"}) + f(`foo\.[^.]*\.bar\.ba(xx|zz)[^.]*\.a`, nil, []string{"foo.ss.bar.baxx.a", "foo.s.bar.bazzasd.a"}, []string{"", "foo", "foo.ss.xar.baxx.a"}, ".a") + f(`foo\.[^.]*?\.bar\.baz\.aaa`, nil, []string{"foo.aa.bar.baz.aaa"}, []string{"", "foo"}, ".bar.baz.aaa") } func TestTagFilterMatchSuffix(t *testing.T) {