lib/regexutil: properly handle alternate regexps surrounded by .+ or .*

Previously the following regexps were improperly handled:

  .+foo|bar.+
  .*foo|bar.*

This could lead to unexpected regexp match results.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297

Thanks to @Haleygo for the initial attempt to fix the issue at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5308
This commit is contained in:
Aliaksandr Valialkin 2023-11-13 18:23:36 +01:00
parent 2f23445d40
commit 2b3235414b
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 56 additions and 2 deletions

View file

@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
## v1.87.x long-time support release (LTS)
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly apply [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) with `regex`, which start and end with `.+` or `.*` and which contain alternate sub-regexps. For example, `.+;|;.+` or `.*foo|bar|baz.*`. Previously such regexps were improperly parsed, which could result in undexpected relabeling results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297).
* BUGFIX: [vmstorage](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): log warning about switching to ReadOnly mode only on state change. Before, vmstorage would log this warning every 1s. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5159) for details.
## [v1.87.10](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.10)

View file

@ -2,6 +2,7 @@ package regexutil
import (
"regexp"
"regexp/syntax"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@ -105,7 +106,22 @@ func (pr *PromRegex) MatchString(s string) bool {
return pr.reSuffixMatcher.Match(s)
}
// getSubstringLiteral returns regex part from expr surrounded by prefixSuffix.
//
// For example, if expr=".+foo.+" and prefixSuffix=".+", then the function returns "foo".
//
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
func getSubstringLiteral(expr, prefixSuffix string) string {
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return ""
}
if sre.Op == syntax.OpAlternate {
return ""
}
if !strings.HasPrefix(expr, prefixSuffix) {
return ""
}

View file

@ -29,7 +29,7 @@ func TestPromRegex(t *testing.T) {
}
result := pr.MatchString(s)
if result != resultExpected {
t.Fatalf("unexpected result when matching %s against %s; got %v; want %v", expr, s, result, resultExpected)
t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected)
}
// Make sure the result is the same for regular regexp
@ -37,7 +37,7 @@ func TestPromRegex(t *testing.T) {
re := regexp.MustCompile(exprAnchored)
result = re.MatchString(s)
if result != resultExpected {
t.Fatalf("unexpected result when matching %s against %s during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected)
t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected)
}
}
f("", "", true)
@ -89,4 +89,33 @@ func TestPromRegex(t *testing.T) {
f(".*(a|b).*", "xzy", false)
f("^(?:true)$", "true", true)
f("^(?:true)$", "false", false)
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
f(".+;|;.+", ";", false)
f(".+;|;.+", "foo", false)
f(".+;|;.+", "foo;bar", false)
f(".+;|;.+", "foo;", true)
f(".+;|;.+", ";foo", true)
f(".+foo|bar|baz.+", "foo", false)
f(".+foo|bar|baz.+", "afoo", true)
f(".+foo|bar|baz.+", "fooa", false)
f(".+foo|bar|baz.+", "afooa", false)
f(".+foo|bar|baz.+", "bar", true)
f(".+foo|bar|baz.+", "abar", false)
f(".+foo|bar|baz.+", "abara", false)
f(".+foo|bar|baz.+", "bara", false)
f(".+foo|bar|baz.+", "baz", false)
f(".+foo|bar|baz.+", "baza", true)
f(".+foo|bar|baz.+", "abaz", false)
f(".+foo|bar|baz.+", "abaza", false)
f(".+foo|bar|baz.+", "afoo|bar|baza", false)
f(".+(foo|bar|baz).+", "abara", true)
f(".+(foo|bar|baz).+", "afooa", true)
f(".+(foo|bar|baz).+", "abaza", true)
f(".*;|;.*", ";", true)
f(".*;|;.*", "foo", false)
f(".*;|;.*", "foo;bar", false)
f(".*;|;.*", "foo;", true)
f(".*;|;.*", ";foo", true)
}

View file

@ -109,6 +109,14 @@ func TestSimplify(t *testing.T) {
// The transformed regexp mustn't match barx
f("(foo|bar$)x*", "", "(?:foo|bar$)x*")
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
f(".+;|;.+", "", ".+;|;.+")
f("^(.+);|;(.+)$", "", ".+;|;.+")
f("^(.+);$|^;(.+)$", "", ".+;|;.+")
f(".*;|;.*", "", ".*;|;.*")
f("^(.*);|;(.*)$", "", ".*;|;.*")
f("^(.*);$|^;(.*)$", "", ".*;|;.*")
}
func TestRemoveStartEndAnchors(t *testing.T) {