From c49751adf8b59ae49faeb86f49e4cad38ec68d75 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 26 Aug 2022 11:57:12 +0300 Subject: [PATCH] lib/regexutil: add Simplify() function for simplifying the regular expression --- lib/promrelabel/config.go | 21 +++- lib/promrelabel/config_test.go | 18 +++ lib/regexutil/regexutil.go | 149 +++++++++++++++++++++++- lib/regexutil/regexutil_test.go | 72 +++++++++++- lib/storage/metric_name.go | 4 +- lib/storage/tag_filters.go | 153 ++++--------------------- lib/storage/tag_filters_test.go | 117 +++++++++---------- lib/storage/tag_filters_timing_test.go | 46 ++++++++ 8 files changed, 368 insertions(+), 212 deletions(-) diff --git a/lib/promrelabel/config.go b/lib/promrelabel/config.go index d3f2ee6205..56e7236d44 100644 --- a/lib/promrelabel/config.go +++ b/lib/promrelabel/config.go @@ -197,14 +197,19 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) { if rc.Separator != nil { separator = *rc.Separator } + action := strings.ToLower(rc.Action) + if action == "" { + action = "replace" + } targetLabel := rc.TargetLabel regexCompiled := defaultRegexForRelabelConfig regexOriginalCompiled := defaultOriginalRegexForRelabelConfig var regexOrValues []string - if rc.Regex != nil { - regex := regexutil.RemoveStartEndAnchors(rc.Regex.S) + if rc.Regex != nil && !isDefaultRegex(rc.Regex.S) { + regex := rc.Regex.S regexOrig := regex if rc.Action != "replace_all" && rc.Action != "labelmap_all" { + regex = regexutil.RemoveStartEndAnchors(regex) regex = "^(?:" + regex + ")$" } re, err := regexp.Compile(regex) @@ -232,10 +237,6 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) { if rc.Labels != nil { graphiteLabelRules = newGraphiteLabelRules(rc.Labels) } - action := rc.Action - if action == "" { - action = "replace" - } switch action { case "graphite": if graphiteMatchTemplate == nil { @@ -354,3 +355,11 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) { hasLabelReferenceInReplacement: strings.Contains(replacement, "{{"), }, nil } + +func isDefaultRegex(expr string) bool { + prefix, suffix := regexutil.Simplify(expr) + if prefix != "" { + return false + } + return suffix == ".*" +} diff --git a/lib/promrelabel/config_test.go b/lib/promrelabel/config_test.go index 7cbbf7484e..d567064921 100644 --- a/lib/promrelabel/config_test.go +++ b/lib/promrelabel/config_test.go @@ -455,3 +455,21 @@ func TestParseRelabelConfigsFailure(t *testing.T) { }) }) } + +func TestIsDefaultRegex(t *testing.T) { + f := func(s string, resultExpected bool) { + t.Helper() + result := isDefaultRegex(s) + if result != resultExpected { + t.Fatalf("unexpected result for isDefaultRegex(%q); got %v; want %v", s, result, resultExpected) + } + } + f("", false) + f("foo", false) + f(".+", false) + f("a.*", false) + f(".*", true) + f("(.*)", true) + f("^.*$", true) + f("(?:.*)", true) +} diff --git a/lib/regexutil/regexutil.go b/lib/regexutil/regexutil.go index 44bf40e743..ca218098f9 100644 --- a/lib/regexutil/regexutil.go +++ b/lib/regexutil/regexutil.go @@ -1,11 +1,10 @@ package regexutil import ( + "fmt" "regexp/syntax" "sort" "strings" - - "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) // RemoveStartEndAnchors removes '^' at the start of expr and '$' at the end of the expr. @@ -28,15 +27,26 @@ func RemoveStartEndAnchors(expr string) string { // It returns an empty list if it is impossible to extract "or" values from the regexp. func GetOrValues(expr string) []string { expr = RemoveStartEndAnchors(expr) - sre, err := syntax.Parse(expr, syntax.Perl) + prefix, tailExpr := Simplify(expr) + if tailExpr == "" { + return []string{prefix} + } + sre, err := syntax.Parse(tailExpr, syntax.Perl) if err != nil { - logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err) + panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err)) } orValues := getOrValuesExt(sre) // Sort orValues for faster index seek later sort.Strings(orValues) + if len(prefix) > 0 { + // Add prefix to orValues + for i, orValue := range orValues { + orValues[i] = prefix + orValue + } + } + return orValues } @@ -51,8 +61,6 @@ func getOrValuesExt(sre *syntax.Regexp) []string { return []string{string(sre.Rune)} case syntax.OpEmptyMatch: return []string{""} - case syntax.OpBeginText, syntax.OpEndText: - return []string{""} case syntax.OpAlternate: a := make([]string, 0, len(sre.Sub)) for _, reSub := range sre.Sub { @@ -90,6 +98,9 @@ func getOrValuesExt(sre *syntax.Regexp) []string { if len(prefixes) == 0 { return nil } + if len(sre.Sub) == 1 { + return prefixes + } sre.Sub = sre.Sub[1:] suffixes := getOrValuesExt(sre) if len(suffixes) == 0 { @@ -120,3 +131,129 @@ func isLiteral(sre *syntax.Regexp) bool { } const maxOrValues = 100 + +// Simplify simplifies the given expr. +// +// It returns plaintext prefix and the remaining regular expression +// with dropped '^' and '$' anchors at the beginning and the end +// of the regular expression. +// +// The function removes capturing parens from the expr, +// so it cannot be used when capturing parens are necessary. +func Simplify(expr string) (string, string) { + sre, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + // Cannot parse the regexp. Return it all as prefix. + return expr, "" + } + sre = simplifyRegexp(sre, false) + if sre == emptyRegexp { + return "", "" + } + if isLiteral(sre) { + return string(sre.Rune), "" + } + var prefix string + if sre.Op == syntax.OpConcat { + sub0 := sre.Sub[0] + if isLiteral(sub0) { + prefix = string(sub0.Rune) + sre.Sub = sre.Sub[1:] + if len(sre.Sub) == 0 { + return prefix, "" + } + sre = simplifyRegexp(sre, true) + } + } + if _, err := syntax.Compile(sre); err != nil { + // Cannot compile the regexp. Return it all as prefix. + return expr, "" + } + s := sre.String() + s = strings.ReplaceAll(s, "(?:)", "") + s = strings.ReplaceAll(s, "(?-s:.)", ".") + s = strings.ReplaceAll(s, "(?-m:$)", "$") + return prefix, s +} + +func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp { + s := sre.String() + for { + sre = simplifyRegexpExt(sre, hasPrefix, false) + sre = sre.Simplify() + if sre.Op == syntax.OpBeginText || sre.Op == syntax.OpEndText { + sre = emptyRegexp + } + sNew := sre.String() + if sNew == s { + return sre + } + var err error + sre, err = syntax.Parse(sNew, syntax.Perl) + if err != nil { + panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err)) + } + s = sNew + } +} + +func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Regexp { + switch sre.Op { + case syntax.OpCapture: + // Substitute all the capture regexps with non-capture regexps. + sre.Op = syntax.OpAlternate + sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix) + if sre.Sub[0] == emptyRegexp { + return emptyRegexp + } + return sre + case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat: + sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix) + if sre.Sub[0] == emptyRegexp { + return emptyRegexp + } + return sre + case syntax.OpAlternate: + // Do not remove empty captures from OpAlternate, since this may break regexp. + for i, sub := range sre.Sub { + sre.Sub[i] = simplifyRegexpExt(sub, hasPrefix, hasSuffix) + } + return sre + case syntax.OpConcat: + subs := sre.Sub[:0] + for i, sub := range sre.Sub { + sub = simplifyRegexpExt(sub, hasPrefix || len(subs) > 0, hasSuffix || i+1 < len(sre.Sub)) + if sub != emptyRegexp { + subs = append(subs, sub) + } + } + sre.Sub = subs + // Remove anchros from the beginning and the end of regexp, since they + // will be added later. + if !hasPrefix { + for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText { + sre.Sub = sre.Sub[1:] + } + } + if !hasSuffix { + for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText { + sre.Sub = sre.Sub[:len(sre.Sub)-1] + } + } + if len(sre.Sub) == 0 { + return emptyRegexp + } + if len(sre.Sub) == 1 { + return sre.Sub[0] + } + return sre + case syntax.OpEmptyMatch: + return emptyRegexp + default: + return sre + } +} + +var emptyRegexp = &syntax.Regexp{ + Op: syntax.OpEmptyMatch, +} diff --git a/lib/regexutil/regexutil_test.go b/lib/regexutil/regexutil_test.go index 99a91acb33..8755ccc7fe 100644 --- a/lib/regexutil/regexutil_test.go +++ b/lib/regexutil/regexutil_test.go @@ -23,6 +23,7 @@ func TestGetOrValues(t *testing.T) { f("foo.*", nil) f(".*", nil) f("foo|.*", nil) + f("(fo((o)))|(bar)", []string{"bar", "foo"}) f("foobar", []string{"foobar"}) f("z|x|c", []string{"c", "x", "z"}) f("foo|bar", []string{"bar", "foo"}) @@ -41,8 +42,71 @@ func TestGetOrValues(t *testing.T) { f("^foo|bar$", []string{"bar", "foo"}) f("^(foo|bar)$", []string{"bar", "foo"}) f("^a(foo|b(?:a|r))$", []string{"aba", "abr", "afoo"}) - // This is incorrect conversion, because the regexp matches nothing. - // It is OK for now, since such regexps are uncommon in practice. - // TODO: properly handle this case. - f("^a(^foo|bar$)z$", []string{"abarz", "afooz"}) + f("^a(foo$|b(?:a$|r))$", []string{"aba", "abr", "afoo"}) + f("^a(^foo|bar$)z$", nil) +} + +func TestSimplify(t *testing.T) { + f := func(s, expectedPrefix, expectedSuffix string) { + t.Helper() + prefix, suffix := Simplify(s) + if prefix != expectedPrefix { + t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix) + } + if suffix != expectedSuffix { + t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix) + } + } + + f("", "", "") + f("^", "", "") + f("$", "", "") + f("^()$", "", "") + f("^(?:)$", "", "") + f("^foo|^bar$|baz", "", "foo|ba[rz]") + f("^(foo$|^bar)$", "", "foo|bar") + f("^a(foo$|bar)$", "a", "foo|bar") + f("^a(^foo|bar$)z$", "a", "(?:\\Afoo|bar$)z") + f("foobar", "foobar", "") + f("foo$|^foobar", "foo", "|bar") + f("^(foo$|^foobar)$", "foo", "|bar") + f("foobar|foobaz", "fooba", "[rz]") + f("(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x") + f("(тестЧЧ|тест)", "тест", "ЧЧ|") + f("foo(bar|baz|bana)", "fooba", "[rz]|na") + f("^foobar|foobaz", "fooba", "[rz]") + f("^foobar|^foobaz$", "fooba", "[rz]") + f("foobar|foobaz", "fooba", "[rz]") + f("(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa.*") + f("foo[bar]+", "foo", "[a-br]+") + f("foo[a-z]+", "foo", "[a-z]+") + f("foo[bar]*", "foo", "[a-br]*") + f("foo[a-z]*", "foo", "[a-z]*") + f("foo[x]+", "foo", "x+") + f("foo[^x]+", "foo", "[^x]+") + f("foo[x]*", "foo", "x*") + f("foo[^x]*", "foo", "[^x]*") + f("foo[x]*bar", "foo", "x*bar") + f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?") + f("foo.+bar", "foo", ".+bar") + f("a(b|c.*).+", "a", "(?:b|c.*).+") + f("ab|ac", "a", "[b-c]") + f("(?i)xyz", "", "(?i:XYZ)") + f("(?i)foo|bar", "", "(?i:FOO)|(?i:BAR)") + f("(?i)up.+x", "", "(?i:UP).+(?i:X)") + f("(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)") + + // test invalid regexps + f("a(", "a(", "") + f("a[", "a[", "") + f("a[]", "a[]", "") + f("a{", "a{", "") + f("a{}", "a{}", "") + f("invalid(regexp", "invalid(regexp", "") + + // The transformed regexp mustn't match aba + f("a?(^ba|c)", "", "a?(?:\\Aba|c)") + + // The transformed regexp mustn't match barx + f("(foo|bar$)x*", "", "(?:foo|bar$)x*") } diff --git a/lib/storage/metric_name.go b/lib/storage/metric_name.go index fedcfa3a69..db14d72506 100644 --- a/lib/storage/metric_name.go +++ b/lib/storage/metric_name.go @@ -66,8 +66,8 @@ func (tag *Tag) copyFrom(src *Tag) { tag.Value = append(tag.Value[:0], src.Value...) } -func marshalTagValueNoTrailingTagSeparator(dst, src []byte) []byte { - dst = marshalTagValue(dst, src) +func marshalTagValueNoTrailingTagSeparator(dst []byte, src string) []byte { + dst = marshalTagValue(dst, bytesutil.ToUnsafeBytes(src)) // Remove trailing tagSeparatorChar return dst[:len(dst)-1] } diff --git a/lib/storage/tag_filters.go b/lib/storage/tag_filters.go index ccd515c1f6..0577d958c0 100644 --- a/lib/storage/tag_filters.go +++ b/lib/storage/tag_filters.go @@ -377,7 +377,7 @@ func (tf *tagFilter) InitFromGraphiteQuery(commonPrefix, query []byte, paths []s tf.regexpPrefix = prefix tf.prefix = append(tf.prefix[:0], commonPrefix...) tf.prefix = marshalTagValue(tf.prefix, nil) - tf.prefix = marshalTagValueNoTrailingTagSeparator(tf.prefix, []byte(prefix)) + tf.prefix = marshalTagValueNoTrailingTagSeparator(tf.prefix, prefix) tf.orSuffixes = append(tf.orSuffixes[:0], orSuffixes...) tf.reSuffixMatch, tf.matchCost = newMatchFuncForOrSuffixes(orSuffixes) } @@ -433,15 +433,15 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp tf.prefix = append(tf.prefix, commonPrefix...) tf.prefix = marshalTagValue(tf.prefix, key) - var expr []byte - prefix := tf.value + var expr string + prefix := bytesutil.ToUnsafeString(tf.value) if tf.isRegexp { - prefix, expr = getRegexpPrefix(tf.value) + prefix, expr = simplifyRegexp(prefix) if len(expr) == 0 { tf.value = append(tf.value[:0], prefix...) tf.isRegexp = false } else { - tf.regexpPrefix = string(prefix) + tf.regexpPrefix = prefix } } tf.prefix = marshalTagValueNoTrailingTagSeparator(tf.prefix, prefix) @@ -522,22 +522,22 @@ func RegexpCacheMisses() uint64 { return regexpCache.Misses() } -func getRegexpFromCache(expr []byte) (*regexpCacheValue, error) { - if rcv := regexpCache.GetEntry(bytesutil.ToUnsafeString(expr)); rcv != nil { +func getRegexpFromCache(expr string) (*regexpCacheValue, error) { + if rcv := regexpCache.GetEntry(expr); rcv != nil { // Fast path - the regexp found in the cache. return rcv.(*regexpCacheValue), nil } // Slow path - build the regexp. - exprOrig := string(expr) + exprOrig := expr - expr = []byte(tagCharsRegexpEscaper.Replace(exprOrig)) + expr = tagCharsRegexpEscaper.Replace(exprOrig) exprStr := fmt.Sprintf("^(%s)$", expr) re, err := regexp.Compile(exprStr) if err != nil { return nil, fmt.Errorf("invalid regexp %q: %w", exprStr, err) } - sExpr := string(expr) + sExpr := expr orValues := regexutil.GetOrValues(sExpr) var reMatch func(b []byte) bool var reCost uint64 @@ -849,22 +849,28 @@ func (rcv *regexpCacheValue) SizeBytes() int { return rcv.sizeBytes } -func getRegexpPrefix(b []byte) ([]byte, []byte) { - // Fast path - search the prefix in the cache. - if ps := prefixesCache.GetEntry(bytesutil.ToUnsafeString(b)); ps != nil { +func simplifyRegexp(expr string) (string, string) { + // It is safe to pass the expr constructed via bytesutil.ToUnsafeString() + // to GetEntry() here. + if ps := prefixesCache.GetEntry(expr); ps != nil { + // Fast path - the simplified expr is found in the cache. ps := ps.(*prefixSuffix) return ps.prefix, ps.suffix } - // Slow path - extract the regexp prefix from b. - prefix, suffix := extractRegexpPrefix(b) + // Slow path - simplify the expr. + + // Make a copy of expr before using it, + // since it may be constructed via bytesutil.ToUnsafeString() + expr = string(append([]byte{}, expr...)) + prefix, suffix := regexutil.Simplify(expr) // Put the prefix and the suffix to the cache. ps := &prefixSuffix{ prefix: prefix, suffix: suffix, } - prefixesCache.PutEntry(string(b), ps) + prefixesCache.PutEntry(expr, ps) return prefix, suffix } @@ -911,120 +917,11 @@ func RegexpPrefixesCacheMisses() uint64 { } type prefixSuffix struct { - prefix []byte - suffix []byte + prefix string + suffix string } // SizeBytes implements lrucache.Entry interface func (ps *prefixSuffix) SizeBytes() int { - return cap(ps.prefix) + cap(ps.suffix) + int(unsafe.Sizeof(*ps)) -} - -func extractRegexpPrefix(b []byte) ([]byte, []byte) { - sre, err := syntax.Parse(string(b), syntax.Perl) - if err != nil { - // Cannot parse the regexp. Return it all as prefix. - return b, nil - } - sre = simplifyRegexp(sre) - if sre == emptyRegexp { - return nil, nil - } - if isLiteral(sre) { - return []byte(string(sre.Rune)), nil - } - var prefix []byte - if sre.Op == syntax.OpConcat { - sub0 := sre.Sub[0] - if isLiteral(sub0) { - prefix = []byte(string(sub0.Rune)) - sre.Sub = sre.Sub[1:] - if len(sre.Sub) == 0 { - return nil, nil - } - } - } - if _, err := syntax.Compile(sre); err != nil { - // Cannot compile the regexp. Return it all as prefix. - return b, nil - } - return prefix, []byte(sre.String()) -} - -func simplifyRegexp(sre *syntax.Regexp) *syntax.Regexp { - s := sre.String() - for { - sre = simplifyRegexpExt(sre, false, false) - sre = sre.Simplify() - if sre.Op == syntax.OpBeginText || sre.Op == syntax.OpEndText { - sre = emptyRegexp - } - sNew := sre.String() - if sNew == s { - return sre - } - var err error - sre, err = syntax.Parse(sNew, syntax.Perl) - if err != nil { - logger.Panicf("BUG: cannot parse simplified regexp %q: %s", sNew, err) - } - s = sNew - } -} - -func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Regexp { - switch sre.Op { - case syntax.OpCapture: - // Substitute all the capture regexps with non-capture regexps. - sre.Op = syntax.OpAlternate - sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix) - if sre.Sub[0] == emptyRegexp { - return emptyRegexp - } - return sre - case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat: - sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix) - if sre.Sub[0] == emptyRegexp { - return emptyRegexp - } - return sre - case syntax.OpAlternate: - // Do not remove empty captures from OpAlternate, since this may break regexp. - for i, sub := range sre.Sub { - sre.Sub[i] = simplifyRegexpExt(sub, hasPrefix, hasSuffix) - } - return sre - case syntax.OpConcat: - subs := sre.Sub[:0] - for i, sub := range sre.Sub { - if sub = simplifyRegexpExt(sub, i > 0, i+1 < len(sre.Sub)); sub != emptyRegexp { - subs = append(subs, sub) - } - } - sre.Sub = subs - // Remove anchros from the beginning and the end of regexp, since they - // will be added later. - if !hasPrefix { - for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText { - sre.Sub = sre.Sub[1:] - } - } - if !hasSuffix { - for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText { - sre.Sub = sre.Sub[:len(sre.Sub)-1] - } - } - if len(sre.Sub) == 0 { - return emptyRegexp - } - return sre - case syntax.OpEmptyMatch: - return emptyRegexp - default: - return sre - } -} - -var emptyRegexp = &syntax.Regexp{ - Op: syntax.OpEmptyMatch, + return len(ps.prefix) + len(ps.suffix) + int(unsafe.Sizeof(*ps)) } diff --git a/lib/storage/tag_filters_test.go b/lib/storage/tag_filters_test.go index e6121de16b..a04d1ce358 100644 --- a/lib/storage/tag_filters_test.go +++ b/lib/storage/tag_filters_test.go @@ -683,26 +683,11 @@ func TestGetCommonPrefix(t *testing.T) { f([]string{"foo1", "foo2", "foo34"}, "foo") } -func TestExtractRegexpPrefix(t *testing.T) { - f := func(s string, expectedPrefix, expectedSuffix string) { - t.Helper() - prefix, suffix := extractRegexpPrefix([]byte(s)) - if string(prefix) != expectedPrefix { - t.Fatalf("unexpected prefix for %q; got %q; want %q", s, prefix, expectedPrefix) - } - if string(suffix) != expectedSuffix { - t.Fatalf("unexpected suffix for %q; got %q; want %q", s, suffix, expectedSuffix) - } - } - f("", "", "") - f("foobar", "foobar", "") -} - func TestGetRegexpFromCache(t *testing.T) { f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string, suffixExpected string) { t.Helper() for i := 0; i < 3; i++ { - rcv, err := getRegexpFromCache([]byte(s)) + rcv, err := getRegexpFromCache(s) if err != nil { t.Fatalf("unexpected error for s=%q: %s", s, err) } @@ -772,7 +757,7 @@ func TestTagFilterMatchSuffix(t *testing.T) { var tf tagFilter tvNoTrailingTagSeparator := func(s string) string { - return string(marshalTagValueNoTrailingTagSeparator(nil, []byte(s))) + return string(marshalTagValueNoTrailingTagSeparator(nil, s)) } init := func(value string, isNegative, isRegexp bool, expectedPrefix string) { t.Helper() @@ -1153,75 +1138,75 @@ func TestTagFilterMatchSuffix(t *testing.T) { }) } -func TestGetRegexpPrefix(t *testing.T) { - f := func(t *testing.T, s, expectedPrefix, expectedSuffix string) { +func TestSimplifyRegexp(t *testing.T) { + f := func(s, expectedPrefix, expectedSuffix string) { t.Helper() - prefix, suffix := getRegexpPrefix([]byte(s)) - if string(prefix) != expectedPrefix { + prefix, suffix := simplifyRegexp(s) + if prefix != expectedPrefix { t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix) } - if string(suffix) != expectedSuffix { + if suffix != expectedSuffix { t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix) } // Get the prefix from cache. - prefix, suffix = getRegexpPrefix([]byte(s)) - if string(prefix) != expectedPrefix { + prefix, suffix = simplifyRegexp(s) + if prefix != expectedPrefix { t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix) } - if string(suffix) != expectedSuffix { + if suffix != expectedSuffix { t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix) } } - f(t, "", "", "") - f(t, "^", "", "") - f(t, "$", "", "") - f(t, "^()$", "", "") - f(t, "^(?:)$", "", "") - f(t, "foobar", "foobar", "") - f(t, "foo$|^foobar", "foo", "(?:(?:)|bar)") - f(t, "^(foo$|^foobar)$", "foo", "(?:(?:)|bar)") - f(t, "foobar|foobaz", "fooba", "[rz]") - f(t, "(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x") - f(t, "(тестЧЧ|тест)", "тест", "(?:ЧЧ|(?:))") - f(t, "foo(bar|baz|bana)", "fooba", "(?:[rz]|na)") - f(t, "^foobar|foobaz", "fooba", "[rz]") - f(t, "^foobar|^foobaz$", "fooba", "[rz]") - f(t, "foobar|foobaz", "fooba", "[rz]") - f(t, "(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa(?-s:.)*") - f(t, "foo[bar]+", "foo", "[a-br]+") - f(t, "foo[a-z]+", "foo", "[a-z]+") - f(t, "foo[bar]*", "foo", "[a-br]*") - f(t, "foo[a-z]*", "foo", "[a-z]*") - f(t, "foo[x]+", "foo", "x+") - f(t, "foo[^x]+", "foo", "[^x]+") - f(t, "foo[x]*", "foo", "x*") - f(t, "foo[^x]*", "foo", "[^x]*") - f(t, "foo[x]*bar", "foo", "x*bar") - f(t, "fo\\Bo[x]*bar?", "fo", "\\Box*bar?") - f(t, "foo.+bar", "foo", "(?-s:.)+bar") - f(t, "a(b|c.*).+", "a", "(?:b|c(?-s:.)*)(?-s:.)+") - f(t, "ab|ac", "a", "[b-c]") - f(t, "(?i)xyz", "", "(?i:XYZ)") - f(t, "(?i)foo|bar", "", "(?i:FOO)|(?i:BAR)") - f(t, "(?i)up.+x", "", "(?i:UP)(?-s:.)+(?i:X)") - f(t, "(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)") + f("", "", "") + f("^", "", "") + f("$", "", "") + f("^()$", "", "") + f("^(?:)$", "", "") + f("foobar", "foobar", "") + f("foo$|^foobar", "foo", "|bar") + f("^(foo$|^foobar)$", "foo", "|bar") + f("foobar|foobaz", "fooba", "[rz]") + f("(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x") + f("(тестЧЧ|тест)", "тест", "ЧЧ|") + f("foo(bar|baz|bana)", "fooba", "[rz]|na") + f("^foobar|foobaz", "fooba", "[rz]") + f("^foobar|^foobaz$", "fooba", "[rz]") + f("foobar|foobaz", "fooba", "[rz]") + f("(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa.*") + f("foo[bar]+", "foo", "[a-br]+") + f("foo[a-z]+", "foo", "[a-z]+") + f("foo[bar]*", "foo", "[a-br]*") + f("foo[a-z]*", "foo", "[a-z]*") + f("foo[x]+", "foo", "x+") + f("foo[^x]+", "foo", "[^x]+") + f("foo[x]*", "foo", "x*") + f("foo[^x]*", "foo", "[^x]*") + f("foo[x]*bar", "foo", "x*bar") + f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?") + f("foo.+bar", "foo", ".+bar") + f("a(b|c.*).+", "a", "(?:b|c.*).+") + f("ab|ac", "a", "[b-c]") + f("(?i)xyz", "", "(?i:XYZ)") + f("(?i)foo|bar", "", "(?i:FOO)|(?i:BAR)") + f("(?i)up.+x", "", "(?i:UP).+(?i:X)") + f("(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)") // test invalid regexps - f(t, "a(", "a(", "") - f(t, "a[", "a[", "") - f(t, "a[]", "a[]", "") - f(t, "a{", "a{", "") - f(t, "a{}", "a{}", "") - f(t, "invalid(regexp", "invalid(regexp", "") + f("a(", "a(", "") + f("a[", "a[", "") + f("a[]", "a[]", "") + f("a{", "a{", "") + f("a{}", "a{}", "") + f("invalid(regexp", "invalid(regexp", "") // The transformed regexp mustn't match aba - f(t, "a?(^ba|c)", "", "a?(?:\\Aba|c)") + f("a?(^ba|c)", "", "a?(?:\\Aba|c)") // The transformed regexp mustn't match barx - f(t, "(foo|bar$)x*", "", "(?:foo|bar(?-m:$))x*") + f("(foo|bar$)x*", "", "(?:foo|bar$)x*") } func TestTagFiltersString(t *testing.T) { diff --git a/lib/storage/tag_filters_timing_test.go b/lib/storage/tag_filters_timing_test.go index af119fbbe7..3789560fef 100644 --- a/lib/storage/tag_filters_timing_test.go +++ b/lib/storage/tag_filters_timing_test.go @@ -32,6 +32,29 @@ func BenchmarkTagFilterMatchSuffix(b *testing.B) { } }) }) + b.Run("regexp-any-suffix-match-anchored", func(b *testing.B) { + key := []byte("^foo.*$") + isNegative := false + isRegexp := true + suffix := marshalTagValue(nil, []byte("ojksdfds")) + b.ReportAllocs() + b.SetBytes(int64(1)) + b.RunParallel(func(pb *testing.PB) { + var tf tagFilter + if err := tf.Init(nil, nil, key, isNegative, isRegexp); err != nil { + logger.Panicf("BUG: unexpected error: %s", err) + } + for pb.Next() { + ok, err := tf.matchSuffix(suffix) + if err != nil { + logger.Panicf("BUG: unexpected error: %s", err) + } + if !ok { + logger.Panicf("BUG: unexpected suffix mismatch") + } + } + }) + }) b.Run("regexp-any-nonzero-suffix-match", func(b *testing.B) { key := []byte("foo.+") isNegative := false @@ -55,6 +78,29 @@ func BenchmarkTagFilterMatchSuffix(b *testing.B) { } }) }) + b.Run("regexp-any-nonzero-suffix-match-anchored", func(b *testing.B) { + key := []byte("^foo.+$") + isNegative := false + isRegexp := true + suffix := marshalTagValue(nil, []byte("ojksdfds")) + b.ReportAllocs() + b.SetBytes(int64(1)) + b.RunParallel(func(pb *testing.PB) { + var tf tagFilter + if err := tf.Init(nil, nil, key, isNegative, isRegexp); err != nil { + logger.Panicf("BUG: unexpected error: %s", err) + } + for pb.Next() { + ok, err := tf.matchSuffix(suffix) + if err != nil { + logger.Panicf("BUG: unexpected error: %s", err) + } + if !ok { + logger.Panicf("BUG: unexpected suffix mismatch") + } + } + }) + }) b.Run("regexp-any-nonzero-suffix-mismatch", func(b *testing.B) { key := []byte("foo.+") isNegative := false