lib/storage: improve search speed for time series matching Graphite whildcards such as foo.*.bar.baz

Add index for reverse Graphite-like metric names with dots. Use this index during search for filters
like `__name__=~"foo\\.[^.]*\\.bar\\.baz"` which end with non-empty suffix with dots, i.e. `.bar.baz` in this case.

This change may "hide" historical time series during queries. The workaround is to add `[.]*` to the end of regexp label filter,
i.e. "foo\\.[^.]*\\.bar\\.baz" should be substituted with "foo\\.[^.]*\\.bar\\.baz[.]*".
This commit is contained in:
Aliaksandr Valialkin 2020-05-27 21:35:58 +03:00
parent fc32881105
commit b0131c79b6
4 changed files with 139 additions and 64 deletions

View file

@ -687,6 +687,7 @@ func (db *indexDB) createIndexes(tsid *TSID, mn *MetricName) error {
items.B = marshalTagValue(items.B, mn.MetricGroup)
items.B = encoding.MarshalUint64(items.B, tsid.MetricID)
items.Next()
addReverseMetricGroupIfNeeded(items, commonPrefix.B, mn, tsid.MetricID)
// For each tag create tag -> MetricID index.
for i := range mn.Tags {
@ -2609,6 +2610,7 @@ func (is *indexSearch) storeDateMetricID(date, metricID uint64, accountID, proje
items.B = marshalTagValue(items.B, mn.MetricGroup)
items.B = encoding.MarshalUint64(items.B, metricID)
items.Next()
addReverseMetricGroupIfNeeded(items, kb.B, mn, metricID)
for i := range mn.Tags {
tag := &mn.Tags[i]
items.B = append(items.B, kb.B...)
@ -2622,6 +2624,38 @@ func (is *indexSearch) storeDateMetricID(date, metricID uint64, accountID, proje
return nil
}
func addReverseMetricGroupIfNeeded(items *indexItems, prefix []byte, mn *MetricName, metricID uint64) {
if bytes.IndexByte(mn.MetricGroup, '.') < 0 {
// The reverse metric group is needed only for Graphite-like metrics with points.
return
}
// This is most likely a Graphite metric like 'foo.bar.baz'.
// Store reverse metric name 'zab.rab.oof' in order to speed up search for '*.bar.baz'
// when the Graphite wildcard has a suffix matching small number of time series.
items.B = append(items.B, prefix...)
items.B = marshalTagValue(items.B, graphiteReverseTagKey)
revBuf := kbPool.Get()
revBuf.B = reverseBytes(revBuf.B[:0], mn.MetricGroup)
items.B = marshalTagValue(items.B, revBuf.B)
kbPool.Put(revBuf)
items.B = encoding.MarshalUint64(items.B, metricID)
items.Next()
}
// The tag key for reverse metric name used for speeding up searching
// for Graphite wildcards with suffix matching small number of time series,
// i.e. '*.bar.baz'.
//
// It is expected that the given key isn't be used by users.
var graphiteReverseTagKey = []byte("\xff")
func reverseBytes(dst, src []byte) []byte {
for i := len(src) - 1; i >= 0; i-- {
dst = append(dst, src[i])
}
return dst
}
func (is *indexSearch) hasDateMetricID(date, metricID uint64, accountID, projectID uint32) (bool, error) {
ts := &is.ts
kb := &is.kb

View file

@ -17,6 +17,20 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
)
func TestReverseBytes(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
result := reverseBytes(nil, []byte(s))
if string(result) != resultExpected {
t.Fatalf("unexpected result for reverseBytes(%q); got %q; want %q", s, result, resultExpected)
}
}
f("", "")
f("a", "a")
f("av", "va")
f("foo.bar", "rab.oof")
}
func TestMergeTagToMetricIDsRows(t *testing.T) {
f := func(items []string, expectedItems []string) {
t.Helper()
@ -679,7 +693,7 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount,
mn.ProjectID = uint32((i + 1) % projectsCount)
// Init MetricGroup.
mn.MetricGroup = []byte(fmt.Sprintf("metricGroup_%d\x00\x01\x02", i%metricGroups))
mn.MetricGroup = []byte(fmt.Sprintf("metricGroup.%d\x00\x01\x02", i%metricGroups))
// Init other tags.
tagsCount := rand.Intn(10) + 1

View file

@ -61,17 +61,26 @@ func (tfs *TagFilters) Add(key, value []byte, isNegative, isRegexp bool) error {
// since it must filter out all the time series with the given key.
}
tf := tfs.addTagFilter()
if err := tf.Init(tfs.commonPrefix, key, value, isNegative, isRegexp); err != nil {
return fmt.Errorf("cannot initialize tagFilter: %s", err)
}
if len(tf.graphiteReverseSuffix) > 0 {
tf = tfs.addTagFilter()
if err := tf.Init(tfs.commonPrefix, graphiteReverseTagKey, tf.graphiteReverseSuffix, false, false); err != nil {
return fmt.Errorf("cannot initialize reverse tag filter for Graphite wildcard: %s", err)
}
}
return nil
}
func (tfs *TagFilters) addTagFilter() *tagFilter {
if cap(tfs.tfs) > len(tfs.tfs) {
tfs.tfs = tfs.tfs[:len(tfs.tfs)+1]
} else {
tfs.tfs = append(tfs.tfs, tagFilter{})
}
tf := &tfs.tfs[len(tfs.tfs)-1]
err := tf.Init(tfs.commonPrefix, key, value, isNegative, isRegexp)
if err != nil {
return fmt.Errorf("cannot initialize tagFilter: %s", err)
}
return nil
return &tfs.tfs[len(tfs.tfs)-1]
}
// Finalize finalizes tfs and may return complementary TagFilters,
@ -162,6 +171,10 @@ type tagFilter struct {
//
// Such a filter must be applied directly to metricNames.
matchesEmptyValue bool
// Contains reverse suffix for Graphite wildcard.
// I.e. for `{__name__=~"foo\\.[^.]*\\.bar\\.baz"}` the value will be `zab.rab.`
graphiteReverseSuffix []byte
}
func (tf *tagFilter) Less(other *tagFilter) bool {
@ -243,6 +256,7 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp
tf.orSuffixes = tf.orSuffixes[:0]
tf.reSuffixMatch = nil
tf.matchesEmptyValue = false
tf.graphiteReverseSuffix = tf.graphiteReverseSuffix[:0]
tf.prefix = append(tf.prefix, commonPrefix...)
tf.prefix = marshalTagValue(tf.prefix, key)
@ -272,6 +286,10 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp
if len(prefix) == 0 && !tf.isNegative && tf.reSuffixMatch(nil) {
tf.matchesEmptyValue = true
}
if !tf.isNegative && len(key) == 0 && strings.IndexByte(rcv.literalSuffix, '.') >= 0 {
// Reverse suffix is needed only for non-negative regexp filters on __name__ that contains dots.
tf.graphiteReverseSuffix = reverseBytes(tf.graphiteReverseSuffix[:0], []byte(rcv.literalSuffix))
}
return nil
}
@ -331,6 +349,7 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) {
sExpr := string(expr)
orValues := getOrValues(sExpr)
var reMatch func(b []byte) bool
var literalSuffix string
if len(orValues) > 0 {
if len(orValues) == 1 {
v := orValues[0]
@ -348,12 +367,13 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) {
}
}
} else {
reMatch = getOptimizedReMatchFunc(re.Match, sExpr)
reMatch, literalSuffix = getOptimizedReMatchFunc(re.Match, sExpr)
}
// Put the reMatch in the cache.
rcv.orValues = orValues
rcv.reMatch = reMatch
rcv.literalSuffix = literalSuffix
regexpCacheLock.Lock()
if overflow := len(regexpCacheMap) - getMaxRegexpCacheSize(); overflow > 0 {
@ -385,31 +405,33 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) {
// '.+literal.+'
//
// It returns reMatch if it cannot find optimized function.
func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) func(b []byte) bool {
//
// It also returns literal suffix from the expr.
func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) (func(b []byte) bool, string) {
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err)
}
if matchFunc := getOptimizedReMatchFuncExt(reMatch, sre); matchFunc != nil {
if matchFunc, literalSuffix := getOptimizedReMatchFuncExt(reMatch, sre); matchFunc != nil {
// Found optimized function for matching the expr.
return matchFunc
return matchFunc, literalSuffix
}
// Fall back to un-optimized reMatch.
return reMatch
return reMatch, ""
}
func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) func(b []byte) bool {
func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) (func(b []byte) bool, string) {
if isDotStar(sre) {
// '.*'
return func(b []byte) bool {
return true
}
}, ""
}
if isDotPlus(sre) {
// '.+'
return func(b []byte) bool {
return len(b) > 0
}
}, ""
}
switch sre.Op {
case syntax.OpCapture:
@ -417,13 +439,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
return getOptimizedReMatchFuncExt(reMatch, sre.Sub[0])
case syntax.OpLiteral:
if !isLiteral(sre) {
return nil
return nil, ""
}
s := string(sre.Rune)
// Literal match
return func(b []byte) bool {
return string(b) == s
}
}, s
case syntax.OpConcat:
if len(sre.Sub) == 2 {
if isLiteral(sre.Sub[0]) {
@ -432,13 +454,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
// 'prefix.*'
return func(b []byte) bool {
return bytes.HasPrefix(b, prefix)
}
}, ""
}
if isDotPlus(sre.Sub[1]) {
// 'prefix.+'
return func(b []byte) bool {
return len(b) > len(prefix) && bytes.HasPrefix(b, prefix)
}
}, ""
}
}
if isLiteral(sre.Sub[1]) {
@ -447,13 +469,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
// '.*suffix'
return func(b []byte) bool {
return bytes.HasSuffix(b, suffix)
}
}, string(suffix)
}
if isDotPlus(sre.Sub[0]) {
// '.+suffix'
return func(b []byte) bool {
return len(b) > len(suffix) && bytes.HasSuffix(b[1:], suffix)
}
}, string(suffix)
}
}
}
@ -464,13 +486,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
// '.*middle.*'
return func(b []byte) bool {
return bytes.Contains(b, middle)
}
}, ""
}
if isDotPlus(sre.Sub[2]) {
// '.*middle.+'
return func(b []byte) bool {
return len(b) > len(middle) && bytes.Contains(b[:len(b)-1], middle)
}
}, ""
}
}
if isDotPlus(sre.Sub[0]) {
@ -478,13 +500,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
// '.+middle.*'
return func(b []byte) bool {
return len(b) > len(middle) && bytes.Contains(b[1:], middle)
}
}, ""
}
if isDotPlus(sre.Sub[2]) {
// '.+middle.+'
return func(b []byte) bool {
return len(b) > len(middle)+1 && bytes.Contains(b[1:len(b)-1], middle)
}
}, ""
}
}
}
@ -518,9 +540,9 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
}
// Fall back to slow path.
return reMatch(bOrig)
}
}, string(suffix)
default:
return nil
return nil, ""
}
}
@ -696,8 +718,9 @@ var (
)
type regexpCacheValue struct {
orValues []string
reMatch func(b []byte) bool
orValues []string
reMatch func(b []byte) bool
literalSuffix string
}
func getRegexpPrefix(b []byte) ([]byte, []byte) {

View file

@ -21,7 +21,7 @@ func TestExtractRegexpPrefix(t *testing.T) {
}
func TestGetRegexpFromCache(t *testing.T) {
f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string) {
f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string, suffixExpected string) {
t.Helper()
for i := 0; i < 3; i++ {
@ -32,6 +32,9 @@ func TestGetRegexpFromCache(t *testing.T) {
if !reflect.DeepEqual(rcv.orValues, orValuesExpected) {
t.Fatalf("unexpected orValues for s=%q; got %q; want %q", s, rcv.orValues, orValuesExpected)
}
if rcv.literalSuffix != suffixExpected {
t.Fatalf("unexpected literal suffix for s=%q; got %q; want %q", s, rcv.literalSuffix, suffixExpected)
}
for _, expectedMatch := range expectedMatches {
if !rcv.reMatch([]byte(expectedMatch)) {
t.Fatalf("s=%q must match %q", s, expectedMatch)
@ -45,44 +48,45 @@ func TestGetRegexpFromCache(t *testing.T) {
}
}
f("", []string{""}, []string{""}, []string{"foo", "x"})
f("foo", []string{"foo"}, []string{"foo"}, []string{"", "bar"})
f("(?s)(foo)?", nil, []string{"foo", ""}, []string{"s", "bar"})
f("foo.*", nil, []string{"foo", "foobar"}, []string{"xfoo", "xfoobar", "", "a"})
f("foo(a|b)?", nil, []string{"fooa", "foob", "foo"}, []string{"xfoo", "xfoobar", "", "fooc", "fooba"})
f(".*foo", nil, []string{"foo", "xfoo"}, []string{"foox", "xfoobar", "", "a"})
f("(a|b)?foo", nil, []string{"foo", "afoo", "bfoo"}, []string{"foox", "xfoobar", "", "a"})
f(".*foo.*", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"})
f(".*foo.+", nil, []string{"foo1", "xfoodff", "foox", "xfoobar"}, []string{"", "bar", "foo", "fox"})
f(".+foo.+", nil, []string{"xfoo1", "xfoodff", "xfoox", "xfoobar"}, []string{"", "bar", "foo", "foox", "xfoo"})
f(".+foo.*", nil, []string{"xfoo", "xfoox", "xfoobar"}, []string{"", "bar", "foo", "fox"})
f(".+foo(a|b)?", nil, []string{"xfoo", "xfooa", "xafoob"}, []string{"", "bar", "foo", "foob"})
f(".*foo(a|b)?", nil, []string{"foo", "foob", "xafoo", "xfooa"}, []string{"", "bar", "fooba"})
f("(a|b)?foo(a|b)?", nil, []string{"foo", "foob", "afoo", "afooa"}, []string{"", "bar", "fooba", "xfoo"})
f("((.*)foo(.*))", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"})
f(".+foo", nil, []string{"afoo", "bbfoo"}, []string{"foo", "foobar", "afoox", ""})
f("a|b", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""})
f("(a|b)", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""})
f("(a|b)foo(c|d)", []string{"afooc", "afood", "bfooc", "bfood"}, []string{"afooc", "bfood"}, []string{"foo", "", "afoo", "fooc", "xfood"})
f("foo.+", nil, []string{"foox", "foobar"}, []string{"foo", "afoox", "afoo", ""})
f(".*foo.*bar", nil, []string{"foobar", "xfoobar", "xfooxbar", "fooxbar"}, []string{"", "foobarx", "afoobarx", "aaa"})
f("foo.*bar", nil, []string{"foobar", "fooxbar"}, []string{"xfoobar", "", "foobarx", "aaa"})
f("foo.*bar.*", nil, []string{"foobar", "fooxbar", "foobarx", "fooxbarx"}, []string{"", "afoobarx", "aaa", "afoobar"})
f("foo.*bar.*baz", nil, []string{"foobarbaz", "fooxbarxbaz", "foobarxbaz", "fooxbarbaz"}, []string{"", "afoobarx", "aaa", "afoobar", "foobarzaz"})
f(".+foo.+(b|c).+", nil, []string{"xfooxbar", "xfooxca"}, []string{"", "foo", "foob", "xfooc", "xfoodc"})
f("", []string{""}, []string{""}, []string{"foo", "x"}, "")
f("foo", []string{"foo"}, []string{"foo"}, []string{"", "bar"}, "")
f("(?s)(foo)?", nil, []string{"foo", ""}, []string{"s", "bar"}, "")
f("foo.*", nil, []string{"foo", "foobar"}, []string{"xfoo", "xfoobar", "", "a"}, "")
f("foo(a|b)?", nil, []string{"fooa", "foob", "foo"}, []string{"xfoo", "xfoobar", "", "fooc", "fooba"}, "")
f(".*foo", nil, []string{"foo", "xfoo"}, []string{"foox", "xfoobar", "", "a"}, "foo")
f("(a|b)?foo", nil, []string{"foo", "afoo", "bfoo"}, []string{"foox", "xfoobar", "", "a"}, "foo")
f(".*foo.*", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"}, "")
f(".*foo.+", nil, []string{"foo1", "xfoodff", "foox", "xfoobar"}, []string{"", "bar", "foo", "fox"}, "")
f(".+foo.+", nil, []string{"xfoo1", "xfoodff", "xfoox", "xfoobar"}, []string{"", "bar", "foo", "foox", "xfoo"}, "")
f(".+foo.*", nil, []string{"xfoo", "xfoox", "xfoobar"}, []string{"", "bar", "foo", "fox"}, "")
f(".+foo(a|b)?", nil, []string{"xfoo", "xfooa", "xafoob"}, []string{"", "bar", "foo", "foob"}, "")
f(".*foo(a|b)?", nil, []string{"foo", "foob", "xafoo", "xfooa"}, []string{"", "bar", "fooba"}, "")
f("(a|b)?foo(a|b)?", nil, []string{"foo", "foob", "afoo", "afooa"}, []string{"", "bar", "fooba", "xfoo"}, "")
f("((.*)foo(.*))", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"}, "")
f(".+foo", nil, []string{"afoo", "bbfoo"}, []string{"foo", "foobar", "afoox", ""}, "foo")
f("a|b", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""}, "")
f("(a|b)", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""}, "")
f("(a|b)foo(c|d)", []string{"afooc", "afood", "bfooc", "bfood"}, []string{"afooc", "bfood"}, []string{"foo", "", "afoo", "fooc", "xfood"}, "")
f("foo.+", nil, []string{"foox", "foobar"}, []string{"foo", "afoox", "afoo", ""}, "")
f(".*foo.*bar", nil, []string{"foobar", "xfoobar", "xfooxbar", "fooxbar"}, []string{"", "foobarx", "afoobarx", "aaa"}, "bar")
f("foo.*bar", nil, []string{"foobar", "fooxbar"}, []string{"xfoobar", "", "foobarx", "aaa"}, "bar")
f("foo.*bar.*", nil, []string{"foobar", "fooxbar", "foobarx", "fooxbarx"}, []string{"", "afoobarx", "aaa", "afoobar"}, "")
f("foo.*bar.*baz", nil, []string{"foobarbaz", "fooxbarxbaz", "foobarxbaz", "fooxbarbaz"}, []string{"", "afoobarx", "aaa", "afoobar", "foobarzaz"}, "baz")
f(".+foo.+(b|c).+", nil, []string{"xfooxbar", "xfooxca"}, []string{"", "foo", "foob", "xfooc", "xfoodc"}, "")
f("(?i)foo", nil, []string{"foo", "Foo", "FOO"}, []string{"xfoo", "foobar", "xFOObar"})
f("(?i).+foo", nil, []string{"xfoo", "aaFoo", "bArFOO"}, []string{"foosdf", "xFOObar"})
f("(?i)(foo|bar)", nil, []string{"foo", "Foo", "BAR", "bAR"}, []string{"foobar", "xfoo", "xFOObAR"})
f("(?i)foo.*bar", nil, []string{"foobar", "FooBAR", "FOOxxbaR"}, []string{"xfoobar", "foobarx", "xFOObarx"})
f("(?i)foo", nil, []string{"foo", "Foo", "FOO"}, []string{"xfoo", "foobar", "xFOObar"}, "")
f("(?i).+foo", nil, []string{"xfoo", "aaFoo", "bArFOO"}, []string{"foosdf", "xFOObar"}, "")
f("(?i)(foo|bar)", nil, []string{"foo", "Foo", "BAR", "bAR"}, []string{"foobar", "xfoo", "xFOObAR"}, "")
f("(?i)foo.*bar", nil, []string{"foobar", "FooBAR", "FOOxxbaR"}, []string{"xfoobar", "foobarx", "xFOObarx"}, "")
f(".*", nil, []string{"", "a", "foo", "foobar"}, nil)
f("foo|.*", nil, []string{"", "a", "foo", "foobar"}, nil)
f(".+", nil, []string{"a", "foo"}, []string{""})
f("(.+)*(foo)?", nil, []string{"a", "foo", ""}, nil)
f(".*", nil, []string{"", "a", "foo", "foobar"}, nil, "")
f("foo|.*", nil, []string{"", "a", "foo", "foobar"}, nil, "")
f(".+", nil, []string{"a", "foo"}, []string{""}, "")
f("(.+)*(foo)?", nil, []string{"a", "foo", ""}, nil, "")
// Graphite-like regexps
f(`foo\.[^.]*\.bar\.ba(xx|zz)[^.]*\.a`, nil, []string{"foo.ss.bar.baxx.a", "foo.s.bar.bazzasd.a"}, []string{"", "foo", "foo.ss.xar.baxx.a"})
f(`foo\.[^.]*\.bar\.ba(xx|zz)[^.]*\.a`, nil, []string{"foo.ss.bar.baxx.a", "foo.s.bar.bazzasd.a"}, []string{"", "foo", "foo.ss.xar.baxx.a"}, ".a")
f(`foo\.[^.]*?\.bar\.baz\.aaa`, nil, []string{"foo.aa.bar.baz.aaa"}, []string{"", "foo"}, ".bar.baz.aaa")
}
func TestTagFilterMatchSuffix(t *testing.T) {