This commit is contained in:
Aliaksandr Valialkin 2024-05-23 17:32:42 +02:00
parent ceae8a7e08
commit 91b006f0a7
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 429 additions and 91 deletions

View file

@ -23,9 +23,11 @@ type PromRegex struct {
// For example, prefix="foo" for regex="foo(a|b)"
prefix string
// Suffix contains regex suffix left after removing the prefix.
// For example, suffix="a|b" for regex="foo(a|b)"
suffix string
// isSuffixDotStar is set to true if suffix is ".*"
isSuffixDotStar bool
// isSuffixDotPlus is set to true if suffix is ".+"
isSuffixDotPlus bool
// substrDotStar contains literal string for regex suffix=".*string.*"
substrDotStar string
@ -48,8 +50,10 @@ func NewPromRegex(expr string) (*PromRegex, error) {
}
prefix, suffix := SimplifyPromRegex(expr)
orValues := GetOrValuesPromRegex(suffix)
substrDotStar := getSubstringLiteral(suffix, ".*")
substrDotPlus := getSubstringLiteral(suffix, ".+")
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
// Anchor suffix to the beginning and the end of the matching string.
suffixExpr := "^(?:" + suffix + ")$"
@ -57,7 +61,8 @@ func NewPromRegex(expr string) (*PromRegex, error) {
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
pr := &PromRegex{
prefix: prefix,
suffix: suffix,
isSuffixDotStar: isSuffixDotStar,
isSuffixDotPlus: isSuffixDotPlus,
substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus,
orValues: orValues,
@ -71,19 +76,21 @@ func NewPromRegex(expr string) (*PromRegex, error) {
// The pr is automatically anchored to the beginning and to the end
// of the matching string with '^' and '$'.
func (pr *PromRegex) MatchString(s string) bool {
if !strings.HasPrefix(s, pr.prefix) {
// Fast path - s has another prefix than pr.
return false
}
s = s[len(pr.prefix):]
if len(pr.orValues) > 0 {
// Fast path - pr contains only alternate strings such as 'foo|bar|baz'
for _, v := range pr.orValues {
if s == v {
return true
}
if len(pr.prefix) > 0 {
if !strings.HasPrefix(s, pr.prefix) {
// Fast path - s has another prefix than pr.
return false
}
return false
s = s[len(pr.prefix):]
}
if pr.isSuffixDotStar {
// Fast path - the pr contains "prefix.*"
return true
}
if pr.isSuffixDotPlus {
// Fast path - the pr contains "prefix.+"
return len(s) > 0
}
if pr.substrDotStar != "" {
// Fast path - pr contains ".*someText.*"
@ -94,45 +101,17 @@ func (pr *PromRegex) MatchString(s string) bool {
n := strings.Index(s, pr.substrDotPlus)
return n > 0 && n+len(pr.substrDotPlus) < len(s)
}
switch pr.suffix {
case ".*":
// Fast path - the pr contains "prefix.*"
return true
case ".+":
// Fast path - the pr contains "prefix.+"
return len(s) > 0
if len(pr.orValues) > 0 {
// Fast path - pr contains only alternate strings such as 'foo|bar|baz'
for _, v := range pr.orValues {
if s == v {
return true
}
}
return false
}
// Fall back to slow path by matching the original regexp.
return pr.reSuffixMatcher.Match(s)
}
// getSubstringLiteral returns regex part from expr surrounded by prefixSuffix.
//
// For example, if expr=".+foo.+" and prefixSuffix=".+", then the function returns "foo".
//
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
func getSubstringLiteral(expr, prefixSuffix string) string {
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return ""
}
if sre.Op == syntax.OpAlternate {
return ""
}
if !strings.HasPrefix(expr, prefixSuffix) {
return ""
}
expr = expr[len(prefixSuffix):]
if !strings.HasSuffix(expr, prefixSuffix) {
return ""
}
expr = expr[:len(expr)-len(prefixSuffix)]
prefix, suffix := SimplifyPromRegex(expr)
if suffix != "" {
return ""
}
return prefix
}

165
lib/regexutil/regex.go Normal file
View file

@ -0,0 +1,165 @@
package regexutil
import (
"regexp"
"regexp/syntax"
"strings"
)
// Regex implements an optimized string matching for Go regex.
//
// The following regexs are optimized:
//
// - plain string such as "foobar"
// - alternate strings such as "foo|bar|baz"
// - prefix match such as "foo.*" or "foo.+"
// - substring match such as ".*foo.*" or ".+bar.+"
type Regex struct {
// prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)"
prefix string
// isSuffixDotStar is set to true if suffix is ".*"
isSuffixDotStar bool
// isSuffixDotPlus is set to true if suffix is ".+"
isSuffixDotPlus bool
// substrDotStar contains literal string for regex suffix=".*string.*"
substrDotStar string
// substrDotPlus contains literal string for regex suffix=".+string.+"
substrDotPlus string
// orValues contains or values for the suffix regex.
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
orValues []string
// re is the original regexp.
re *regexp.Regexp
}
// NewRegex returns Regex for the given expr.
func NewRegex(expr string) (*Regex, error) {
if _, err := regexp.Compile(expr); err != nil {
return nil, err
}
prefix, suffix := SimplifyRegex(expr)
orValues := GetOrValuesRegex(suffix)
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
var re *regexp.Regexp
if len(orValues) == 0 && substrDotStar == "" && substrDotPlus == "" && suffix != ".*" && suffix != ".+" {
suffixAnchored := suffix
if len(prefix) > 0 {
suffixAnchored = "^(?:" + suffix + ")"
}
// The suffixAnchored must be properly compiled, since it has been already checked above.
// Otherwise it is a bug, which must be fixed.
re = regexp.MustCompile(suffixAnchored)
}
r := &Regex{
prefix: prefix,
isSuffixDotStar: isSuffixDotStar,
isSuffixDotPlus: isSuffixDotPlus,
substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus,
orValues: orValues,
re: re,
}
return r, nil
}
// MatchString returns true if s matches pr.
func (r *Regex) MatchString(s string) bool {
if len(r.prefix) == 0 {
return r.matchStringNoPrefix(s)
}
return r.matchStringWithPrefix(s)
}
func (r *Regex) matchStringNoPrefix(s string) bool {
if r.isSuffixDotStar {
return true
}
if r.isSuffixDotPlus {
return len(s) > 0
}
if r.substrDotStar != "" {
// Fast path - r contains ".*someText.*"
return strings.Contains(s, r.substrDotStar)
}
if r.substrDotPlus != "" {
// Fast path - r contains ".+someText.+"
n := strings.Index(s, r.substrDotPlus)
return n > 0 && n+len(r.substrDotPlus) < len(s)
}
if len(r.orValues) == 0 {
// Fall back to slow path by matching the original regexp.
return r.re.MatchString(s)
}
// Fast path - compare s to pr.orValues
for _, v := range r.orValues {
if strings.Contains(s, v) {
return true
}
}
return false
}
func (r *Regex) matchStringWithPrefix(s string) bool {
n := strings.Index(s, r.prefix)
if n < 0 {
// Fast path - s doesn't contain the needed prefix
return false
}
sNext := s[n+1:]
s = s[n+len(r.prefix):]
if r.isSuffixDotStar {
return true
}
if r.isSuffixDotPlus {
return len(s) > 0
}
if r.substrDotStar != "" {
// Fast path - r contains ".*someText.*"
return strings.Contains(s, r.substrDotStar)
}
if r.substrDotPlus != "" {
// Fast path - r contains ".+someText.+"
n := strings.Index(s, r.substrDotPlus)
return n > 0 && n+len(r.substrDotPlus) < len(s)
}
for {
if len(r.orValues) == 0 {
// Fall back to slow path by matching the original regexp.
if r.re.MatchString(s) {
return true
}
} else {
// Fast path - compare s to pr.orValues
for _, v := range r.orValues {
if strings.HasPrefix(s, v) {
return true
}
}
}
// Mismatch. Try again starting from the next char.
s = sNext
n := strings.Index(s, r.prefix)
if n < 0 {
// Fast path - s doesn't contain the needed prefix
return false
}
sNext = s[n+1:]
s = s[n+len(r.prefix):]
}
}

125
lib/regexutil/regex_test.go Normal file
View file

@ -0,0 +1,125 @@
package regexutil
import (
"testing"
)
func TestRegexMatchString(t *testing.T) {
f := func(regex, s string, resultExpected bool) {
t.Helper()
re, err := NewRegex(regex)
if err != nil {
t.Fatalf("cannot parse %q: %s", regex, err)
}
result := re.MatchString(s)
if result != resultExpected {
t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, regex, result, resultExpected)
}
}
f("", "", true)
f("", "foo", true)
f("foo", "", false)
f(".*", "", true)
f(".*", "foo", true)
f(".+", "", false)
f(".+", "foo", true)
f("foo.*", "bar", false)
f("foo.*", "foo", true)
f("foo.*", "a foo", true)
f("foo.*", "a foo a", true)
f("foo.*", "foobar", true)
f("foo.*", "a foobar", true)
f("foo.+", "bar", false)
f("foo.+", "foo", false)
f("foo.+", "a foo", false)
f("foo.+", "foobar", true)
f("foo.+", "a foobar", true)
f("foo|bar", "", false)
f("foo|bar", "a", false)
f("foo|bar", "foo", true)
f("foo|bar", "a foo", true)
f("foo|bar", "foo a", true)
f("foo|bar", "a foo a", true)
f("foo|bar", "bar", true)
f("foo|bar", "foobar", true)
f("foo(bar|baz)", "a", false)
f("foo(bar|baz)", "foobar", true)
f("foo(bar|baz)", "foobaz", true)
f("foo(bar|baz)", "foobaza", true)
f("foo(bar|baz)", "a foobaz a", true)
f("foo(bar|baz)", "foobal", false)
f("^foo|b(ar)$", "foo", true)
f("^foo|b(ar)$", "foo a", true)
f("^foo|b(ar)$", "a foo", false)
f("^foo|b(ar)$", "bar", true)
f("^foo|b(ar)$", "a bar", true)
f("^foo|b(ar)$", "barz", false)
f("^foo|b(ar)$", "ar", false)
f(".*foo.*", "foo", true)
f(".*foo.*", "afoobar", true)
f(".*foo.*", "abc", false)
f("foo.*bar.*", "foobar", true)
f("foo.*bar.*", "foo_bar_", true)
f("foo.*bar.*", "a foo bar baz", true)
f("foo.*bar.*", "foobaz", false)
f("foo.*bar.*", "baz foo", false)
f(".+foo.+", "foo", false)
f(".+foo.+", "afoobar", true)
f(".+foo.+", "afoo", false)
f(".+foo.+", "abc", false)
f("foo.+bar.+", "foobar", false)
f("foo.+bar.+", "foo_bar_", true)
f("foo.+bar.+", "a foo_bar_", true)
f("foo.+bar.+", "foobaz", false)
f("foo.+bar.+", "abc", false)
f(".+foo.*", "foo", false)
f(".+foo.*", "afoo", true)
f(".+foo.*", "afoobar", true)
f(".*(a|b).*", "a", true)
f(".*(a|b).*", "ax", true)
f(".*(a|b).*", "xa", true)
f(".*(a|b).*", "xay", true)
f(".*(a|b).*", "xzy", false)
f("^(?:true)$", "true", true)
f("^(?:true)$", "false", false)
f(".+;|;.+", ";", false)
f(".+;|;.+", "foo", false)
f(".+;|;.+", "foo;bar", true)
f(".+;|;.+", "foo;", true)
f(".+;|;.+", ";foo", true)
f(".+foo|bar|baz.+", "foo", false)
f(".+foo|bar|baz.+", "afoo", true)
f(".+foo|bar|baz.+", "fooa", false)
f(".+foo|bar|baz.+", "afooa", true)
f(".+foo|bar|baz.+", "bar", true)
f(".+foo|bar|baz.+", "abar", true)
f(".+foo|bar|baz.+", "abara", true)
f(".+foo|bar|baz.+", "bara", true)
f(".+foo|bar|baz.+", "baz", false)
f(".+foo|bar|baz.+", "baza", true)
f(".+foo|bar|baz.+", "abaz", false)
f(".+foo|bar|baz.+", "abaza", true)
f(".+foo|bar|baz.+", "afoo|bar|baza", true)
f(".+(foo|bar|baz).+", "bar", false)
f(".+(foo|bar|baz).+", "bara", false)
f(".+(foo|bar|baz).+", "abar", false)
f(".+(foo|bar|baz).+", "abara", true)
f(".+(foo|bar|baz).+", "afooa", true)
f(".+(foo|bar|baz).+", "abaza", true)
f(".*;|;.*", ";", true)
f(".*;|;.*", "foo", false)
f(".*;|;.*", "foo;bar", true)
f(".*;|;.*", "foo;", true)
f(".*;|;.*", ";foo", true)
f("^bar", "foobarbaz", false)
f("^foo", "foobarbaz", true)
f("bar$", "foobarbaz", false)
f("baz$", "foobarbaz", true)
f("(bar$|^foo)", "foobarbaz", true)
f("(bar$^boo)", "foobarbaz", false)
}

View file

@ -18,6 +18,16 @@ func RemoveStartEndAnchors(expr string) string {
return expr
}
// GetOrValuesRegex returns "or" values from the given regexp expr.
//
// It returns ["foo", "bar"] for "foo|bar" regexp.
// It returns ["foo"] for "foo" regexp.
// It returns [""] for "" regexp.
// It returns an empty list if it is impossible to extract "or" values from the regexp.
func GetOrValuesRegex(expr string) []string {
return getOrValuesRegex(expr, true)
}
// GetOrValuesPromRegex returns "or" values from the given Prometheus-like regexp expr.
//
// It ignores start and end anchors ('^') and ('$') at the start and the end of expr.
@ -27,15 +37,19 @@ func RemoveStartEndAnchors(expr string) string {
// It returns an empty list if it is impossible to extract "or" values from the regexp.
func GetOrValuesPromRegex(expr string) []string {
expr = RemoveStartEndAnchors(expr)
prefix, tailExpr := SimplifyPromRegex(expr)
return getOrValuesRegex(expr, false)
}
func getOrValuesRegex(expr string, keepAnchors bool) []string {
prefix, tailExpr := simplifyRegex(expr, keepAnchors)
if tailExpr == "" {
return []string{prefix}
}
sre, err := syntax.Parse(tailExpr, syntax.Perl)
sre, err := syntax.Parse(tailExpr, regexParseFlags)
if err != nil {
panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err))
}
orValues := getOrValuesExt(sre)
orValues := getOrValues(sre)
// Sort orValues for faster index seek later
sort.Strings(orValues)
@ -50,10 +64,10 @@ func GetOrValuesPromRegex(expr string) []string {
return orValues
}
func getOrValuesExt(sre *syntax.Regexp) []string {
func getOrValues(sre *syntax.Regexp) []string {
switch sre.Op {
case syntax.OpCapture:
return getOrValuesExt(sre.Sub[0])
return getOrValues(sre.Sub[0])
case syntax.OpLiteral:
if !isLiteral(sre) {
return nil
@ -64,7 +78,7 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
case syntax.OpAlternate:
a := make([]string, 0, len(sre.Sub))
for _, reSub := range sre.Sub {
ca := getOrValuesExt(reSub)
ca := getOrValues(reSub)
if len(ca) == 0 {
return nil
}
@ -94,7 +108,7 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
if len(sre.Sub) < 1 {
return []string{""}
}
prefixes := getOrValuesExt(sre.Sub[0])
prefixes := getOrValues(sre.Sub[0])
if len(prefixes) == 0 {
return nil
}
@ -102,7 +116,7 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
return prefixes
}
sre.Sub = sre.Sub[1:]
suffixes := getOrValuesExt(sre)
suffixes := getOrValues(sre)
if len(suffixes) == 0 {
return nil
}
@ -132,21 +146,33 @@ func isLiteral(sre *syntax.Regexp) bool {
const maxOrValues = 100
// SimplifyRegex simplifies the given regexp expr.
//
// It returns plaintext pefix and the remaining regular expression
// without capturing parens.
func SimplifyRegex(expr string) (string, string) {
return simplifyRegex(expr, true)
}
// SimplifyPromRegex simplifies the given Prometheus-like expr.
//
// It returns plaintext prefix and the remaining regular expression
// with dropped '^' and '$' anchors at the beginning and the end
// with dropped '^' and '$' anchors at the beginning and at the end
// of the regular expression.
//
// The function removes capturing parens from the expr,
// so it cannot be used when capturing parens are necessary.
func SimplifyPromRegex(expr string) (string, string) {
sre, err := syntax.Parse(expr, syntax.Perl)
return simplifyRegex(expr, false)
}
func simplifyRegex(expr string, keepAnchors bool) (string, string) {
sre, err := syntax.Parse(expr, regexParseFlags)
if err != nil {
// Cannot parse the regexp. Return it all as prefix.
return expr, ""
}
sre = simplifyRegexp(sre, false)
sre = simplifyRegexp(sre, keepAnchors, keepAnchors)
if sre == emptyRegexp {
return "", ""
}
@ -162,7 +188,7 @@ func SimplifyPromRegex(expr string) (string, string) {
if len(sre.Sub) == 0 {
return prefix, ""
}
sre = simplifyRegexp(sre, true)
sre = simplifyRegexp(sre, true, keepAnchors)
}
}
if _, err := syntax.Compile(sre); err != nil {
@ -171,17 +197,19 @@ func SimplifyPromRegex(expr string) (string, string) {
}
s := sre.String()
s = strings.ReplaceAll(s, "(?:)", "")
s = strings.ReplaceAll(s, "(?-s:.)", ".")
s = strings.ReplaceAll(s, "(?-m:$)", "$")
s = strings.ReplaceAll(s, "(?s:.)", ".")
s = strings.ReplaceAll(s, "(?m:$)", "$")
return prefix, s
}
func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
func simplifyRegexp(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.Regexp {
s := sre.String()
for {
sre = simplifyRegexpExt(sre, hasPrefix, false)
sre = simplifyRegexpExt(sre, keepBeginOp, keepEndOp)
sre = sre.Simplify()
if sre.Op == syntax.OpBeginText || sre.Op == syntax.OpEndText {
if !keepBeginOp && sre.Op == syntax.OpBeginText {
sre = emptyRegexp
} else if !keepEndOp && sre.Op == syntax.OpEndText {
sre = emptyRegexp
}
sNew := sre.String()
@ -189,7 +217,7 @@ func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
return sre
}
var err error
sre, err = syntax.Parse(sNew, syntax.Perl)
sre, err = syntax.Parse(sNew, regexParseFlags)
if err != nil {
panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err))
}
@ -197,18 +225,18 @@ func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
}
}
func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Regexp {
func simplifyRegexpExt(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.Regexp {
switch sre.Op {
case syntax.OpCapture:
// Substitute all the capture regexps with non-capture regexps.
sre.Op = syntax.OpAlternate
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], keepBeginOp, keepEndOp)
if sre.Sub[0] == emptyRegexp {
return emptyRegexp
}
return sre
case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat:
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], keepBeginOp, keepEndOp)
if sre.Sub[0] == emptyRegexp {
return emptyRegexp
}
@ -216,13 +244,13 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
case syntax.OpAlternate:
// Do not remove empty captures from OpAlternate, since this may break regexp.
for i, sub := range sre.Sub {
sre.Sub[i] = simplifyRegexpExt(sub, hasPrefix, hasSuffix)
sre.Sub[i] = simplifyRegexpExt(sub, keepBeginOp, keepEndOp)
}
return sre
case syntax.OpConcat:
subs := sre.Sub[:0]
for i, sub := range sre.Sub {
sub = simplifyRegexpExt(sub, hasPrefix || len(subs) > 0, hasSuffix || i+1 < len(sre.Sub))
sub = simplifyRegexpExt(sub, keepBeginOp || len(subs) > 0, keepEndOp || i+1 < len(sre.Sub))
if sub != emptyRegexp {
subs = append(subs, sub)
}
@ -230,12 +258,12 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
sre.Sub = subs
// Remove anchros from the beginning and the end of regexp, since they
// will be added later.
if !hasPrefix {
if !keepBeginOp {
for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText {
sre.Sub = sre.Sub[1:]
}
}
if !hasSuffix {
if !keepEndOp {
for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText {
sre.Sub = sre.Sub[:len(sre.Sub)-1]
}
@ -254,6 +282,47 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
}
}
// getSubstringLiteral returns regex part from expr surrounded by .+ or .* depending on the prefixSuffixOp.
//
// For example, if expr=".+foo.+" and prefixSuffix=syntax.OpPlus, then the function returns "foo".
//
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
func getSubstringLiteral(expr string, prefixSuffixOp syntax.Op) string {
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
sre, err := syntax.Parse(expr, regexParseFlags)
if err != nil {
return ""
}
if sre.Op != syntax.OpConcat {
return ""
}
if len(sre.Sub) != 3 {
return ""
}
if !isDotOp(sre.Sub[0], prefixSuffixOp) || !isDotOp(sre.Sub[2], prefixSuffixOp) || !isLiteral(sre.Sub[1]) {
return ""
}
return string(sre.Sub[1].Rune)
}
func isDotOpRegexp(expr string, op syntax.Op) bool {
sre, err := syntax.Parse(expr, regexParseFlags)
if err != nil {
return false
}
return isDotOp(sre, op)
}
func isDotOp(sre *syntax.Regexp, op syntax.Op) bool {
if sre.Op != op {
return false
}
return sre.Sub[0].Op == syntax.OpAnyChar
}
var emptyRegexp = &syntax.Regexp{
Op: syntax.OpEmptyMatch,
}
const regexParseFlags = syntax.Perl | syntax.DotNL

View file

@ -77,7 +77,7 @@ func TestSimplifyPromRegex(t *testing.T) {
f("^foobar|foobaz", "fooba", "[rz]")
f("^foobar|^foobaz$", "fooba", "[rz]")
f("foobar|foobaz", "fooba", "[rz]")
f("(?:^foobar|^foobaz)aa.*", "fooba", "(?-s:[rz]aa.*)")
f("(?:^foobar|^foobaz)aa.*", "fooba", "(?s:[rz]aa.*)")
f("foo[bar]+", "foo", "[abr]+")
f("foo[a-z]+", "foo", "[a-z]+")
f("foo[bar]*", "foo", "[abr]*")
@ -88,12 +88,12 @@ func TestSimplifyPromRegex(t *testing.T) {
f("foo[^x]*", "foo", "[^x]*")
f("foo[x]*bar", "foo", "x*bar")
f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
f("foo.+bar", "foo", "(?-s:.+bar)")
f("a(b|c.*).+", "a", "(?-s:(?:b|c.*).+)")
f("foo.+bar", "foo", "(?s:.+bar)")
f("a(b|c.*).+", "a", "(?s:(?:b|c.*).+)")
f("ab|ac", "a", "[bc]")
f("(?i)xyz", "", "(?i:XYZ)")
f("(?i)foo|bar", "", "(?i:FOO|BAR)")
f("(?i)up.+x", "", "(?i-s:UP.+X)")
f("(?i)up.+x", "", "(?is:UP.+X)")
f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)")
// test invalid regexps
@ -111,12 +111,12 @@ func TestSimplifyPromRegex(t *testing.T) {
f("(foo|bar$)x*", "", "(?-m:(?:foo|bar$)x*)")
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
f(".+;|;.+", "", "(?-s:.+;|;.+)")
f("^(.+);|;(.+)$", "", "(?-s:.+;|;.+)")
f("^(.+);$|^;(.+)$", "", "(?-s:.+;|;.+)")
f(".*;|;.*", "", "(?-s:.*;|;.*)")
f("^(.*);|;(.*)$", "", "(?-s:.*;|;.*)")
f("^(.*);$|^;(.*)$", "", "(?-s:.*;|;.*)")
f(".+;|;.+", "", "(?s:.+;|;.+)")
f("^(.+);|;(.+)$", "", "(?s:.+;|;.+)")
f("^(.+);$|^;(.+)$", "", "(?s:.+;|;.+)")
f(".*;|;.*", "", "(?s:.*;|;.*)")
f("^(.*);|;(.*)$", "", "(?s:.*;|;.*)")
f("^(.*);$|^;(.*)$", "", "(?s:.*;|;.*)")
}
func TestRemoveStartEndAnchors(t *testing.T) {