From 2b3235414b217d2da6593476ef62156cffe68f6f Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 13 Nov 2023 18:23:36 +0100 Subject: [PATCH] lib/regexutil: properly handle alternate regexps surrounded by .+ or .* Previously the following regexps were improperly handled: .+foo|bar.+ .*foo|bar.* This could lead to unexpected regexp match results. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297 Thanks to @Haleygo for the initial attempt to fix the issue at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5308 --- docs/CHANGELOG.md | 1 + lib/regexutil/promregex.go | 16 ++++++++++++++++ lib/regexutil/promregex_test.go | 33 +++++++++++++++++++++++++++++++-- lib/regexutil/regexutil_test.go | 8 ++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 554736e949..5bff1da290 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f ## v1.87.x long-time support release (LTS) +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly apply [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) with `regex`, which start and end with `.+` or `.*` and which contain alternate sub-regexps. For example, `.+;|;.+` or `.*foo|bar|baz.*`. Previously such regexps were improperly parsed, which could result in undexpected relabeling results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297). * BUGFIX: [vmstorage](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): log warning about switching to ReadOnly mode only on state change. Before, vmstorage would log this warning every 1s. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5159) for details. ## [v1.87.10](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.10) diff --git a/lib/regexutil/promregex.go b/lib/regexutil/promregex.go index cf196b8cd1..786dfd9b3e 100644 --- a/lib/regexutil/promregex.go +++ b/lib/regexutil/promregex.go @@ -2,6 +2,7 @@ package regexutil import ( "regexp" + "regexp/syntax" "strings" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" @@ -105,7 +106,22 @@ func (pr *PromRegex) MatchString(s string) bool { return pr.reSuffixMatcher.Match(s) } +// getSubstringLiteral returns regex part from expr surrounded by prefixSuffix. +// +// For example, if expr=".+foo.+" and prefixSuffix=".+", then the function returns "foo". +// +// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix +// or if the regex part surrounded by prefixSuffix contains alternate regexps. func getSubstringLiteral(expr, prefixSuffix string) string { + // Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix. + sre, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + return "" + } + if sre.Op == syntax.OpAlternate { + return "" + } + if !strings.HasPrefix(expr, prefixSuffix) { return "" } diff --git a/lib/regexutil/promregex_test.go b/lib/regexutil/promregex_test.go index 20a04312ad..83cea682fa 100644 --- a/lib/regexutil/promregex_test.go +++ b/lib/regexutil/promregex_test.go @@ -29,7 +29,7 @@ func TestPromRegex(t *testing.T) { } result := pr.MatchString(s) if result != resultExpected { - t.Fatalf("unexpected result when matching %s against %s; got %v; want %v", expr, s, result, resultExpected) + t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected) } // Make sure the result is the same for regular regexp @@ -37,7 +37,7 @@ func TestPromRegex(t *testing.T) { re := regexp.MustCompile(exprAnchored) result = re.MatchString(s) if result != resultExpected { - t.Fatalf("unexpected result when matching %s against %s during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected) + t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected) } } f("", "", true) @@ -89,4 +89,33 @@ func TestPromRegex(t *testing.T) { f(".*(a|b).*", "xzy", false) f("^(?:true)$", "true", true) f("^(?:true)$", "false", false) + + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297 + f(".+;|;.+", ";", false) + f(".+;|;.+", "foo", false) + f(".+;|;.+", "foo;bar", false) + f(".+;|;.+", "foo;", true) + f(".+;|;.+", ";foo", true) + f(".+foo|bar|baz.+", "foo", false) + f(".+foo|bar|baz.+", "afoo", true) + f(".+foo|bar|baz.+", "fooa", false) + f(".+foo|bar|baz.+", "afooa", false) + f(".+foo|bar|baz.+", "bar", true) + f(".+foo|bar|baz.+", "abar", false) + f(".+foo|bar|baz.+", "abara", false) + f(".+foo|bar|baz.+", "bara", false) + f(".+foo|bar|baz.+", "baz", false) + f(".+foo|bar|baz.+", "baza", true) + f(".+foo|bar|baz.+", "abaz", false) + f(".+foo|bar|baz.+", "abaza", false) + f(".+foo|bar|baz.+", "afoo|bar|baza", false) + f(".+(foo|bar|baz).+", "abara", true) + f(".+(foo|bar|baz).+", "afooa", true) + f(".+(foo|bar|baz).+", "abaza", true) + + f(".*;|;.*", ";", true) + f(".*;|;.*", "foo", false) + f(".*;|;.*", "foo;bar", false) + f(".*;|;.*", "foo;", true) + f(".*;|;.*", ";foo", true) } diff --git a/lib/regexutil/regexutil_test.go b/lib/regexutil/regexutil_test.go index e45781d25e..7357e115f4 100644 --- a/lib/regexutil/regexutil_test.go +++ b/lib/regexutil/regexutil_test.go @@ -109,6 +109,14 @@ func TestSimplify(t *testing.T) { // The transformed regexp mustn't match barx f("(foo|bar$)x*", "", "(?:foo|bar$)x*") + + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297 + f(".+;|;.+", "", ".+;|;.+") + f("^(.+);|;(.+)$", "", ".+;|;.+") + f("^(.+);$|^;(.+)$", "", ".+;|;.+") + f(".*;|;.*", "", ".*;|;.*") + f("^(.*);|;(.*)$", "", ".*;|;.*") + f("^(.*);$|^;(.*)$", "", ".*;|;.*") } func TestRemoveStartEndAnchors(t *testing.T) {