From efb1989193c48c54876198778321666f6d8de450 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 17 Oct 2020 01:11:02 +0300 Subject: [PATCH] lib/storage: small code adjustements after d2960a20e0c5c490c77f86f6555c4a8236f547dd Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/781 --- CHANGELOG.md | 2 ++ lib/storage/tag_filters.go | 43 +++++++++++++------------- lib/storage/tag_filters_timing_test.go | 10 +++--- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eee4217bc..99fd65efa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ * `rollup_func(foo{filters}[d]) op bar` -> `rollup_func(foo{filters}[d]) op bar{filters}` * `transform_func(foo{filters}) op bar` -> `transform_func(foo{filters}) op bar{filters}` * `num_or_scalar op foo{filters} op bar` -> `num_or_scalar op foo{filters} op bar{filters}` +* FEATURE: improve time series search for queries with multiple label filters. I.e. `foo{label1="value", label2=~"regexp"}`. + See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/781 * BUGFIX: vmagent: properly handle OpenStack endpoint ending with `v3.0` such as `https://ostack.example.com:5000/v3.0` in the same way as Prometheus does. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/728#issuecomment-709914803 diff --git a/lib/storage/tag_filters.go b/lib/storage/tag_filters.go index 9bc67d1d3..f0a9fe485 100644 --- a/lib/storage/tag_filters.go +++ b/lib/storage/tag_filters.go @@ -286,7 +286,7 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp // during the search for matching metricIDs. tf.orSuffixes = append(tf.orSuffixes[:0], "") tf.isEmptyMatch = len(prefix) == 0 - tf.matchCost = defaultCost + tf.matchCost = fullMatchCost return nil } rcv, err := getRegexpFromCache(expr) @@ -368,7 +368,6 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) { reMatch = func(b []byte) bool { return string(b) == v } - reCost = defaultLiteralCost } else { reMatch = func(b []byte) bool { for _, v := range orValues { @@ -378,8 +377,8 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) { } return false } - reCost = uint64(len(orValues)) * defaultLiteralCost } + reCost = uint64(len(orValues)) * literalMatchCost } else { reMatch, literalSuffix, reCost = getOptimizedReMatchFunc(re.Match, sExpr) } @@ -433,15 +432,17 @@ func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) (func(b [ return matchFunc, suffixUnescaped, reCost } // Fall back to un-optimized reMatch. - return reMatch, "", defaultReCost + return reMatch, "", reMatchCost } // The following & default cost values are returned from BenchmarkOptimizedReMatchCost - -var ( - defaultCost uint64 = 1 - defaultLiteralCost uint64 = 3 - defaultReCost uint64 = 140 +const ( + fullMatchCost = 1 + prefixMatchCost = 2 + literalMatchCost = 3 + suffixMatchCost = 4 + middleMatchCost = 6 + reMatchCost = 100 ) func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) (func(b []byte) bool, string, uint64) { @@ -449,13 +450,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // '.*' return func(b []byte) bool { return true - }, "", 1 + }, "", fullMatchCost } if isDotPlus(sre) { // '.+' return func(b []byte) bool { return len(b) > 0 - }, "", 1 + }, "", fullMatchCost } switch sre.Op { case syntax.OpCapture: @@ -469,7 +470,7 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // Literal match return func(b []byte) bool { return string(b) == s - }, s, defaultLiteralCost + }, s, literalMatchCost case syntax.OpConcat: if len(sre.Sub) == 2 { if isLiteral(sre.Sub[0]) { @@ -478,13 +479,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // 'prefix.*' return func(b []byte) bool { return bytes.HasPrefix(b, prefix) - }, "", 2 + }, "", prefixMatchCost } if isDotPlus(sre.Sub[1]) { // 'prefix.+' return func(b []byte) bool { return len(b) > len(prefix) && bytes.HasPrefix(b, prefix) - }, "", 2 + }, "", prefixMatchCost } } if isLiteral(sre.Sub[1]) { @@ -493,13 +494,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // '.*suffix' return func(b []byte) bool { return bytes.HasSuffix(b, suffix) - }, string(suffix), 3 + }, string(suffix), suffixMatchCost } if isDotPlus(sre.Sub[0]) { // '.+suffix' return func(b []byte) bool { return len(b) > len(suffix) && bytes.HasSuffix(b[1:], suffix) - }, string(suffix), 3 + }, string(suffix), suffixMatchCost } } } @@ -510,13 +511,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // '.*middle.*' return func(b []byte) bool { return bytes.Contains(b, middle) - }, "", 5 + }, "", middleMatchCost } if isDotPlus(sre.Sub[2]) { // '.*middle.+' return func(b []byte) bool { return len(b) > len(middle) && bytes.Contains(b[:len(b)-1], middle) - }, "", 5 + }, "", middleMatchCost } } if isDotPlus(sre.Sub[0]) { @@ -524,13 +525,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) // '.+middle.*' return func(b []byte) bool { return len(b) > len(middle) && bytes.Contains(b[1:], middle) - }, "", 5 + }, "", middleMatchCost } if isDotPlus(sre.Sub[2]) { // '.+middle.+' return func(b []byte) bool { return len(b) > len(middle)+1 && bytes.Contains(b[1:len(b)-1], middle) - }, "", 5 + }, "", middleMatchCost } } } @@ -564,7 +565,7 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) } // Fall back to slow path. return reMatch(bOrig) - }, string(suffix), defaultReCost + }, string(suffix), reMatchCost default: return nil, "", 0 } diff --git a/lib/storage/tag_filters_timing_test.go b/lib/storage/tag_filters_timing_test.go index f68c765c7..af119fbbe 100644 --- a/lib/storage/tag_filters_timing_test.go +++ b/lib/storage/tag_filters_timing_test.go @@ -312,13 +312,13 @@ func BenchmarkTagFilterMatchSuffix(b *testing.B) { // Run the following command to get the execution cost of all matches // -// go test -run=none -bench=BenchmarkOptimizedReMatchCost -count 20 | tee cost.txt +// go test -run=none -bench=BenchmarkOptimizedReMatchCost -count 20 github.com/VictoriaMetrics/VictoriaMetrics/lib/storage | tee cost.txt // benchstat ./cost.txt // // Calculate the multiplier of default for each match overhead. func BenchmarkOptimizedReMatchCost(b *testing.B) { - b.Run("default", func(b *testing.B) { + b.Run("fullMatchCost", func(b *testing.B) { reMatch := func(b []byte) bool { return len(b) == 0 } @@ -331,7 +331,7 @@ func BenchmarkOptimizedReMatchCost(b *testing.B) { } }) }) - b.Run("literal match", func(b *testing.B) { + b.Run("literalMatchCost", func(b *testing.B) { s := "foo1.bar.baz.sss.ddd" reMatch := func(b []byte) bool { return string(b) == s @@ -345,7 +345,7 @@ func BenchmarkOptimizedReMatchCost(b *testing.B) { } }) }) - b.Run("foo|bar|baz", func(b *testing.B) { + b.Run("threeLiteralsMatchCost", func(b *testing.B) { s := []string{"foo", "bar", "baz"} reMatch := func(b []byte) bool { for _, v := range s { @@ -502,7 +502,7 @@ func BenchmarkOptimizedReMatchCost(b *testing.B) { } }) }) - b.Run("default", func(b *testing.B) { + b.Run("reMatchCost", func(b *testing.B) { re := regexp.MustCompile(`foo[^.]*?\.bar\.baz\.[^.]*?\.ddd`) reMatch := func(b []byte) bool { return re.Match(b)