From 59d52cec67d2db222ca0e770a4ad321da5d355df Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 23 May 2024 21:47:21 +0200 Subject: [PATCH] wip --- lib/logstorage/filter_regexp.go | 22 +++++----- lib/logstorage/filter_regexp_test.go | 60 ++++++++++++++++----------- lib/logstorage/parser.go | 6 +-- lib/logstorage/storage_search_test.go | 3 +- lib/promrelabel/config.go | 2 +- lib/regexutil/promregex.go | 9 ++++ lib/regexutil/promregex_test.go | 7 ++++ lib/regexutil/regex.go | 33 +++++++++------ lib/regexutil/regex_test.go | 20 +++++---- lib/storage/tag_filters_test.go | 2 +- 10 files changed, 101 insertions(+), 63 deletions(-) diff --git a/lib/logstorage/filter_regexp.go b/lib/logstorage/filter_regexp.go index adfb49337..28aa4aa40 100644 --- a/lib/logstorage/filter_regexp.go +++ b/lib/logstorage/filter_regexp.go @@ -2,9 +2,9 @@ package logstorage import ( "fmt" - "regexp" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil" ) // filterRegexp matches the given regexp @@ -12,7 +12,7 @@ import ( // Example LogsQL: `fieldName:re("regexp")` type filterRegexp struct { fieldName string - re *regexp.Regexp + re *regexutil.Regex } func (fr *filterRegexp) String() string { @@ -77,7 +77,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { s := toTimestampISO8601String(bs, bb, v) @@ -86,7 +86,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap bbPool.Put(bb) } -func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { s := toIPv4String(bs, bb, v) @@ -95,7 +95,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp bbPool.Put(bb) } -func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { s := toFloat64String(bs, bb, v) @@ -104,7 +104,7 @@ func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *reg bbPool.Put(bb) } -func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { bb := bbPool.Get() for _, v := range ch.valuesDict.values { c := byte(0) @@ -117,13 +117,13 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re * bbPool.Put(bb) } -func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { visitValues(bs, ch, bm, func(v string) bool { return re.MatchString(v) }) } -func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { s := toUint8String(bs, bb, v) @@ -132,7 +132,7 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex bbPool.Put(bb) } -func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { s := toUint16String(bs, bb, v) @@ -141,7 +141,7 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege bbPool.Put(bb) } -func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { s := toUint32String(bs, bb, v) @@ -150,7 +150,7 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege bbPool.Put(bb) } -func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { +func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { s := toUint64String(bs, bb, v) diff --git a/lib/logstorage/filter_regexp_test.go b/lib/logstorage/filter_regexp_test.go index 8ff80fdd1..30173290f 100644 --- a/lib/logstorage/filter_regexp_test.go +++ b/lib/logstorage/filter_regexp_test.go @@ -1,8 +1,10 @@ package logstorage import ( - "regexp" + "fmt" "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil" ) func TestFilterRegexp(t *testing.T) { @@ -21,32 +23,32 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("0.0"), + re: mustCompileRegex("0.0"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile(`^127\.0\.0\.1$`), + re: mustCompileRegex(`^127\.0\.0\.1$`), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) fr = &filterRegexp{ fieldName: "non-existing-column", - re: regexp.MustCompile("foo.+bar|"), + re: mustCompileRegex("foo.+bar|"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("foo.+bar"), + re: mustCompileRegex("foo.+bar"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) fr = &filterRegexp{ fieldName: "non-existing-column", - re: regexp.MustCompile("foo.+bar"), + re: mustCompileRegex("foo.+bar"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -71,20 +73,20 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("foo|bar|^$"), + re: mustCompileRegex("foo|bar|^$"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5, 6}) fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("27.0"), + re: mustCompileRegex("27.0"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 5, 6, 7}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("bar.+foo"), + re: mustCompileRegex("bar.+foo"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -111,14 +113,14 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("(?i)foo|йцу"), + re: mustCompileRegex("(?i)foo|йцу"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 6, 8}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("qwe.+rty|^$"), + re: mustCompileRegex("qwe.+rty|^$"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -146,14 +148,14 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), + re: mustCompileRegex("[32][23]?"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("foo|bar"), + re: mustCompileRegex("foo|bar"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -181,14 +183,14 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), + re: mustCompileRegex("[32][23]?"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("foo|bar"), + re: mustCompileRegex("foo|bar"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -216,14 +218,14 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), + re: mustCompileRegex("[32][23]?"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("foo|bar"), + re: mustCompileRegex("foo|bar"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -251,14 +253,14 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), + re: mustCompileRegex("[32][23]?"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("foo|bar"), + re: mustCompileRegex("foo|bar"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -286,14 +288,14 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), + re: mustCompileRegex("[32][23]?"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 6, 7, 8}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("foo|bar"), + re: mustCompileRegex("foo|bar"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -322,14 +324,14 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("127.0.[40].(1|2)"), + re: mustCompileRegex("127.0.[40].(1|2)"), } testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 4, 5, 6, 7}) // mismatch fr = &filterRegexp{ fieldName: "foo", - re: regexp.MustCompile("foo|bar|834"), + re: mustCompileRegex("foo|bar|834"), } testFilterMatchForColumns(t, columns, fr, "foo", nil) }) @@ -355,15 +357,23 @@ func TestFilterRegexp(t *testing.T) { // match fr := &filterRegexp{ fieldName: "_msg", - re: regexp.MustCompile("2006-[0-9]{2}-.+?(2|5)Z"), + re: mustCompileRegex("2006-[0-9]{2}-.+?(2|5)Z"), } testFilterMatchForColumns(t, columns, fr, "_msg", []int{1, 4}) // mismatch fr = &filterRegexp{ fieldName: "_msg", - re: regexp.MustCompile("^01|04$"), + re: mustCompileRegex("^01|04$"), } testFilterMatchForColumns(t, columns, fr, "_msg", nil) }) } + +func mustCompileRegex(expr string) *regexutil.Regex { + re, err := regexutil.NewRegex(expr) + if err != nil { + panic(fmt.Errorf("BUG: cannot compile %q: %w", expr, err)) + } + return re +} diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index 4a62f1889..67d36ca80 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -3,7 +3,6 @@ package logstorage import ( "fmt" "math" - "regexp" "strconv" "strings" "time" @@ -12,6 +11,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil" ) type lexer struct { @@ -1011,7 +1011,7 @@ func parseFilterExact(lex *lexer, fieldName string) (filter, error) { func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) { funcName := lex.token return parseFuncArg(lex, fieldName, func(arg string) (filter, error) { - re, err := regexp.Compile(arg) + re, err := regexutil.NewRegex(arg) if err != nil { return nil, fmt.Errorf("invalid regexp %q for %s(): %w", arg, funcName, err) } @@ -1026,7 +1026,7 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) { func parseFilterTilda(lex *lexer, fieldName string) (filter, error) { lex.nextToken() arg := getCompoundFuncArg(lex) - re, err := regexp.Compile(arg) + re, err := regexutil.NewRegex(arg) if err != nil { return nil, fmt.Errorf("invalid regexp %q: %w", arg, err) } diff --git a/lib/logstorage/storage_search_test.go b/lib/logstorage/storage_search_test.go index fe1cb5392..e80409d08 100644 --- a/lib/logstorage/storage_search_test.go +++ b/lib/logstorage/storage_search_test.go @@ -3,7 +3,6 @@ package logstorage import ( "context" "fmt" - "regexp" "sync/atomic" "testing" "time" @@ -582,7 +581,7 @@ func TestStorageSearch(t *testing.T) { f, &filterRegexp{ fieldName: "_msg", - re: regexp.MustCompile("message [02] at "), + re: mustCompileRegex("message [02] at "), }, }, } diff --git a/lib/promrelabel/config.go b/lib/promrelabel/config.go index 4dbf903b8..8969da7e5 100644 --- a/lib/promrelabel/config.go +++ b/lib/promrelabel/config.go @@ -439,5 +439,5 @@ func isDefaultRegex(expr string) bool { if prefix != "" { return false } - return suffix == "(?-s:.*)" + return suffix == "(?s:.*)" } diff --git a/lib/regexutil/promregex.go b/lib/regexutil/promregex.go index ebf82f309..259491c51 100644 --- a/lib/regexutil/promregex.go +++ b/lib/regexutil/promregex.go @@ -19,6 +19,9 @@ import ( // // The rest of regexps are also optimized by returning cached match results for the same input strings. type PromRegex struct { + // exprStr is the original expression. + exprStr string + // prefix contains literal prefix for regex. // For example, prefix="foo" for regex="foo(a|b)" prefix string @@ -65,6 +68,7 @@ func NewPromRegex(expr string) (*PromRegex, error) { reSuffix := regexp.MustCompile(suffixExpr) reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString) pr := &PromRegex{ + exprStr: expr, prefix: prefix, isOnlyPrefix: isOnlyPrefix, isSuffixDotStar: isSuffixDotStar, @@ -125,3 +129,8 @@ func (pr *PromRegex) MatchString(s string) bool { // Fall back to slow path by matching the original regexp. return pr.reSuffixMatcher.Match(s) } + +// String returns string representation of pr. +func (pr *PromRegex) String() string { + return pr.exprStr +} diff --git a/lib/regexutil/promregex_test.go b/lib/regexutil/promregex_test.go index 24315a187..54bcf6be5 100644 --- a/lib/regexutil/promregex_test.go +++ b/lib/regexutil/promregex_test.go @@ -8,6 +8,7 @@ import ( func TestPromRegexParseFailure(t *testing.T) { f := func(expr string) { t.Helper() + pr, err := NewPromRegex(expr) if err == nil { t.Fatalf("expecting non-nil error for expr=%s", expr) @@ -23,10 +24,15 @@ func TestPromRegexParseFailure(t *testing.T) { func TestPromRegex(t *testing.T) { f := func(expr, s string, resultExpected bool) { t.Helper() + pr, err := NewPromRegex(expr) if err != nil { t.Fatalf("unexpected error: %s", err) } + exprResult := pr.String() + if exprResult != expr { + t.Fatalf("unexpected string representation for %q: %q", expr, exprResult) + } result := pr.MatchString(s) if result != resultExpected { t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected) @@ -40,6 +46,7 @@ func TestPromRegex(t *testing.T) { t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected) } } + f("", "", true) f("", "foo", false) f("foo", "", false) diff --git a/lib/regexutil/regex.go b/lib/regexutil/regex.go index 63483041f..93963de35 100644 --- a/lib/regexutil/regex.go +++ b/lib/regexutil/regex.go @@ -15,6 +15,9 @@ import ( // - prefix match such as "foo.*" or "foo.+" // - substring match such as ".*foo.*" or ".+bar.+" type Regex struct { + // exprStr is the original expression. + exprStr string + // prefix contains literal prefix for regex. // For example, prefix="foo" for regex="foo(a|b)" prefix string @@ -38,8 +41,8 @@ type Regex struct { // For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz" orValues []string - // re is the original regexp. - re *regexp.Regexp + // suffixRe is the regexp for suffix + suffixRe *regexp.Regexp } // NewRegex returns Regex for the given expr. @@ -57,16 +60,16 @@ func NewRegex(expr string) (*Regex, error) { substrDotStar := getSubstringLiteral(sre, syntax.OpStar) substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus) - var re *regexp.Regexp suffixAnchored := suffix if len(prefix) > 0 { suffixAnchored = "^(?:" + suffix + ")" } // The suffixAnchored must be properly compiled, since it has been already checked above. // Otherwise it is a bug, which must be fixed. - re = regexp.MustCompile(suffixAnchored) + suffixRe := regexp.MustCompile(suffixAnchored) r := &Regex{ + exprStr: expr, prefix: prefix, isOnlyPrefix: isOnlyPrefix, isSuffixDotStar: isSuffixDotStar, @@ -74,22 +77,28 @@ func NewRegex(expr string) (*Regex, error) { substrDotStar: substrDotStar, substrDotPlus: substrDotPlus, orValues: orValues, - re: re, + suffixRe: suffixRe, } return r, nil } -// MatchString returns true if s matches pr. +// MatchString returns true if s matches r. func (r *Regex) MatchString(s string) bool { if r.isOnlyPrefix { return strings.Contains(s, r.prefix) } + if len(r.prefix) == 0 { return r.matchStringNoPrefix(s) } return r.matchStringWithPrefix(s) } +// String returns string represetnation for r +func (r *Regex) String() string { + return r.exprStr +} + func (r *Regex) matchStringNoPrefix(s string) bool { if r.isSuffixDotStar { return true @@ -108,11 +117,11 @@ func (r *Regex) matchStringNoPrefix(s string) bool { } if len(r.orValues) == 0 { - // Fall back to slow path by matching the original regexp. - return r.re.MatchString(s) + // Fall back to slow path by matching the suffix regexp. + return r.suffixRe.MatchString(s) } - // Fast path - compare s to pr.orValues + // Fast path - compare s to r.orValues for _, v := range r.orValues { if strings.Contains(s, v) { return true @@ -148,12 +157,12 @@ func (r *Regex) matchStringWithPrefix(s string) bool { for { if len(r.orValues) == 0 { - // Fall back to slow path by matching the original regexp. - if r.re.MatchString(s) { + // Fall back to slow path by matching the suffix regexp. + if r.suffixRe.MatchString(s) { return true } } else { - // Fast path - compare s to pr.orValues + // Fast path - compare s to r.orValues for _, v := range r.orValues { if strings.HasPrefix(s, v) { return true diff --git a/lib/regexutil/regex_test.go b/lib/regexutil/regex_test.go index 72ab66dac..d0e3a0380 100644 --- a/lib/regexutil/regex_test.go +++ b/lib/regexutil/regex_test.go @@ -5,12 +5,12 @@ import ( ) func TestNewRegexFailure(t *testing.T) { - f := func(regex string) { + f := func(expr string) { t.Helper() - re, err := NewRegex(regex) + r, err := NewRegex(expr) if err == nil { - t.Fatalf("expecting non-nil error when parsing %q; got %q", regex, re.re) + t.Fatalf("expecting non-nil error when parsing %q; got %q", expr, r) } } @@ -19,16 +19,20 @@ func TestNewRegexFailure(t *testing.T) { } func TestRegexMatchString(t *testing.T) { - f := func(regex, s string, resultExpected bool) { + f := func(expr, s string, resultExpected bool) { t.Helper() - re, err := NewRegex(regex) + r, err := NewRegex(expr) if err != nil { - t.Fatalf("cannot parse %q: %s", regex, err) + t.Fatalf("cannot parse %q: %s", expr, err) } - result := re.MatchString(s) + exprResult := r.String() + if exprResult != expr { + t.Fatalf("unexpected string representation for %q: %q", expr, exprResult) + } + result := r.MatchString(s) if result != resultExpected { - t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, regex, result, resultExpected) + t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, expr, result, resultExpected) } } diff --git a/lib/storage/tag_filters_test.go b/lib/storage/tag_filters_test.go index 1eedeb454..448774669 100644 --- a/lib/storage/tag_filters_test.go +++ b/lib/storage/tag_filters_test.go @@ -1183,7 +1183,7 @@ func TestSimplifyRegexp(t *testing.T) { f("ab|ad", "a", "[bd]") f("(?i)xyz", "", "(?i:XYZ)") f("(?i)foo|bar", "", "(?i:FOO|BAR)") - f("(?i)up.+x", "", "(?i-s:UP.+X)") + f("(?i)up.+x", "", "(?is:UP.+X)") f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)") // test invalid regexps