mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
ceae8a7e08
commit
91b006f0a7
5 changed files with 429 additions and 91 deletions
|
@ -23,9 +23,11 @@ type PromRegex struct {
|
||||||
// For example, prefix="foo" for regex="foo(a|b)"
|
// For example, prefix="foo" for regex="foo(a|b)"
|
||||||
prefix string
|
prefix string
|
||||||
|
|
||||||
// Suffix contains regex suffix left after removing the prefix.
|
// isSuffixDotStar is set to true if suffix is ".*"
|
||||||
// For example, suffix="a|b" for regex="foo(a|b)"
|
isSuffixDotStar bool
|
||||||
suffix string
|
|
||||||
|
// isSuffixDotPlus is set to true if suffix is ".+"
|
||||||
|
isSuffixDotPlus bool
|
||||||
|
|
||||||
// substrDotStar contains literal string for regex suffix=".*string.*"
|
// substrDotStar contains literal string for regex suffix=".*string.*"
|
||||||
substrDotStar string
|
substrDotStar string
|
||||||
|
@ -48,8 +50,10 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
||||||
}
|
}
|
||||||
prefix, suffix := SimplifyPromRegex(expr)
|
prefix, suffix := SimplifyPromRegex(expr)
|
||||||
orValues := GetOrValuesPromRegex(suffix)
|
orValues := GetOrValuesPromRegex(suffix)
|
||||||
substrDotStar := getSubstringLiteral(suffix, ".*")
|
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
|
||||||
substrDotPlus := getSubstringLiteral(suffix, ".+")
|
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
|
||||||
|
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
|
||||||
|
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
|
||||||
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
|
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
|
||||||
// Anchor suffix to the beginning and the end of the matching string.
|
// Anchor suffix to the beginning and the end of the matching string.
|
||||||
suffixExpr := "^(?:" + suffix + ")$"
|
suffixExpr := "^(?:" + suffix + ")$"
|
||||||
|
@ -57,7 +61,8 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
||||||
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
|
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
|
||||||
pr := &PromRegex{
|
pr := &PromRegex{
|
||||||
prefix: prefix,
|
prefix: prefix,
|
||||||
suffix: suffix,
|
isSuffixDotStar: isSuffixDotStar,
|
||||||
|
isSuffixDotPlus: isSuffixDotPlus,
|
||||||
substrDotStar: substrDotStar,
|
substrDotStar: substrDotStar,
|
||||||
substrDotPlus: substrDotPlus,
|
substrDotPlus: substrDotPlus,
|
||||||
orValues: orValues,
|
orValues: orValues,
|
||||||
|
@ -71,19 +76,21 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
||||||
// The pr is automatically anchored to the beginning and to the end
|
// The pr is automatically anchored to the beginning and to the end
|
||||||
// of the matching string with '^' and '$'.
|
// of the matching string with '^' and '$'.
|
||||||
func (pr *PromRegex) MatchString(s string) bool {
|
func (pr *PromRegex) MatchString(s string) bool {
|
||||||
if !strings.HasPrefix(s, pr.prefix) {
|
if len(pr.prefix) > 0 {
|
||||||
// Fast path - s has another prefix than pr.
|
if !strings.HasPrefix(s, pr.prefix) {
|
||||||
return false
|
// Fast path - s has another prefix than pr.
|
||||||
}
|
return false
|
||||||
s = s[len(pr.prefix):]
|
|
||||||
if len(pr.orValues) > 0 {
|
|
||||||
// Fast path - pr contains only alternate strings such as 'foo|bar|baz'
|
|
||||||
for _, v := range pr.orValues {
|
|
||||||
if s == v {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return false
|
s = s[len(pr.prefix):]
|
||||||
|
}
|
||||||
|
|
||||||
|
if pr.isSuffixDotStar {
|
||||||
|
// Fast path - the pr contains "prefix.*"
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if pr.isSuffixDotPlus {
|
||||||
|
// Fast path - the pr contains "prefix.+"
|
||||||
|
return len(s) > 0
|
||||||
}
|
}
|
||||||
if pr.substrDotStar != "" {
|
if pr.substrDotStar != "" {
|
||||||
// Fast path - pr contains ".*someText.*"
|
// Fast path - pr contains ".*someText.*"
|
||||||
|
@ -94,45 +101,17 @@ func (pr *PromRegex) MatchString(s string) bool {
|
||||||
n := strings.Index(s, pr.substrDotPlus)
|
n := strings.Index(s, pr.substrDotPlus)
|
||||||
return n > 0 && n+len(pr.substrDotPlus) < len(s)
|
return n > 0 && n+len(pr.substrDotPlus) < len(s)
|
||||||
}
|
}
|
||||||
switch pr.suffix {
|
|
||||||
case ".*":
|
if len(pr.orValues) > 0 {
|
||||||
// Fast path - the pr contains "prefix.*"
|
// Fast path - pr contains only alternate strings such as 'foo|bar|baz'
|
||||||
return true
|
for _, v := range pr.orValues {
|
||||||
case ".+":
|
if s == v {
|
||||||
// Fast path - the pr contains "prefix.+"
|
return true
|
||||||
return len(s) > 0
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fall back to slow path by matching the original regexp.
|
// Fall back to slow path by matching the original regexp.
|
||||||
return pr.reSuffixMatcher.Match(s)
|
return pr.reSuffixMatcher.Match(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getSubstringLiteral returns regex part from expr surrounded by prefixSuffix.
|
|
||||||
//
|
|
||||||
// For example, if expr=".+foo.+" and prefixSuffix=".+", then the function returns "foo".
|
|
||||||
//
|
|
||||||
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
|
|
||||||
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
|
|
||||||
func getSubstringLiteral(expr, prefixSuffix string) string {
|
|
||||||
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
|
|
||||||
sre, err := syntax.Parse(expr, syntax.Perl)
|
|
||||||
if err != nil {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
if sre.Op == syntax.OpAlternate {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
if !strings.HasPrefix(expr, prefixSuffix) {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
expr = expr[len(prefixSuffix):]
|
|
||||||
if !strings.HasSuffix(expr, prefixSuffix) {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
expr = expr[:len(expr)-len(prefixSuffix)]
|
|
||||||
prefix, suffix := SimplifyPromRegex(expr)
|
|
||||||
if suffix != "" {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return prefix
|
|
||||||
}
|
|
||||||
|
|
165
lib/regexutil/regex.go
Normal file
165
lib/regexutil/regex.go
Normal file
|
@ -0,0 +1,165 @@
|
||||||
|
package regexutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"regexp/syntax"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Regex implements an optimized string matching for Go regex.
|
||||||
|
//
|
||||||
|
// The following regexs are optimized:
|
||||||
|
//
|
||||||
|
// - plain string such as "foobar"
|
||||||
|
// - alternate strings such as "foo|bar|baz"
|
||||||
|
// - prefix match such as "foo.*" or "foo.+"
|
||||||
|
// - substring match such as ".*foo.*" or ".+bar.+"
|
||||||
|
type Regex struct {
|
||||||
|
// prefix contains literal prefix for regex.
|
||||||
|
// For example, prefix="foo" for regex="foo(a|b)"
|
||||||
|
prefix string
|
||||||
|
|
||||||
|
// isSuffixDotStar is set to true if suffix is ".*"
|
||||||
|
isSuffixDotStar bool
|
||||||
|
|
||||||
|
// isSuffixDotPlus is set to true if suffix is ".+"
|
||||||
|
isSuffixDotPlus bool
|
||||||
|
|
||||||
|
// substrDotStar contains literal string for regex suffix=".*string.*"
|
||||||
|
substrDotStar string
|
||||||
|
|
||||||
|
// substrDotPlus contains literal string for regex suffix=".+string.+"
|
||||||
|
substrDotPlus string
|
||||||
|
|
||||||
|
// orValues contains or values for the suffix regex.
|
||||||
|
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
|
||||||
|
orValues []string
|
||||||
|
|
||||||
|
// re is the original regexp.
|
||||||
|
re *regexp.Regexp
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRegex returns Regex for the given expr.
|
||||||
|
func NewRegex(expr string) (*Regex, error) {
|
||||||
|
if _, err := regexp.Compile(expr); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
prefix, suffix := SimplifyRegex(expr)
|
||||||
|
orValues := GetOrValuesRegex(suffix)
|
||||||
|
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
|
||||||
|
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
|
||||||
|
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
|
||||||
|
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
|
||||||
|
|
||||||
|
var re *regexp.Regexp
|
||||||
|
if len(orValues) == 0 && substrDotStar == "" && substrDotPlus == "" && suffix != ".*" && suffix != ".+" {
|
||||||
|
suffixAnchored := suffix
|
||||||
|
if len(prefix) > 0 {
|
||||||
|
suffixAnchored = "^(?:" + suffix + ")"
|
||||||
|
}
|
||||||
|
// The suffixAnchored must be properly compiled, since it has been already checked above.
|
||||||
|
// Otherwise it is a bug, which must be fixed.
|
||||||
|
re = regexp.MustCompile(suffixAnchored)
|
||||||
|
}
|
||||||
|
r := &Regex{
|
||||||
|
prefix: prefix,
|
||||||
|
isSuffixDotStar: isSuffixDotStar,
|
||||||
|
isSuffixDotPlus: isSuffixDotPlus,
|
||||||
|
substrDotStar: substrDotStar,
|
||||||
|
substrDotPlus: substrDotPlus,
|
||||||
|
orValues: orValues,
|
||||||
|
re: re,
|
||||||
|
}
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchString returns true if s matches pr.
|
||||||
|
func (r *Regex) MatchString(s string) bool {
|
||||||
|
if len(r.prefix) == 0 {
|
||||||
|
return r.matchStringNoPrefix(s)
|
||||||
|
}
|
||||||
|
return r.matchStringWithPrefix(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Regex) matchStringNoPrefix(s string) bool {
|
||||||
|
if r.isSuffixDotStar {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if r.isSuffixDotPlus {
|
||||||
|
return len(s) > 0
|
||||||
|
}
|
||||||
|
if r.substrDotStar != "" {
|
||||||
|
// Fast path - r contains ".*someText.*"
|
||||||
|
return strings.Contains(s, r.substrDotStar)
|
||||||
|
}
|
||||||
|
if r.substrDotPlus != "" {
|
||||||
|
// Fast path - r contains ".+someText.+"
|
||||||
|
n := strings.Index(s, r.substrDotPlus)
|
||||||
|
return n > 0 && n+len(r.substrDotPlus) < len(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(r.orValues) == 0 {
|
||||||
|
// Fall back to slow path by matching the original regexp.
|
||||||
|
return r.re.MatchString(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fast path - compare s to pr.orValues
|
||||||
|
for _, v := range r.orValues {
|
||||||
|
if strings.Contains(s, v) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Regex) matchStringWithPrefix(s string) bool {
|
||||||
|
n := strings.Index(s, r.prefix)
|
||||||
|
if n < 0 {
|
||||||
|
// Fast path - s doesn't contain the needed prefix
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
sNext := s[n+1:]
|
||||||
|
s = s[n+len(r.prefix):]
|
||||||
|
|
||||||
|
if r.isSuffixDotStar {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if r.isSuffixDotPlus {
|
||||||
|
return len(s) > 0
|
||||||
|
}
|
||||||
|
if r.substrDotStar != "" {
|
||||||
|
// Fast path - r contains ".*someText.*"
|
||||||
|
return strings.Contains(s, r.substrDotStar)
|
||||||
|
}
|
||||||
|
if r.substrDotPlus != "" {
|
||||||
|
// Fast path - r contains ".+someText.+"
|
||||||
|
n := strings.Index(s, r.substrDotPlus)
|
||||||
|
return n > 0 && n+len(r.substrDotPlus) < len(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
if len(r.orValues) == 0 {
|
||||||
|
// Fall back to slow path by matching the original regexp.
|
||||||
|
if r.re.MatchString(s) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fast path - compare s to pr.orValues
|
||||||
|
for _, v := range r.orValues {
|
||||||
|
if strings.HasPrefix(s, v) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mismatch. Try again starting from the next char.
|
||||||
|
s = sNext
|
||||||
|
n := strings.Index(s, r.prefix)
|
||||||
|
if n < 0 {
|
||||||
|
// Fast path - s doesn't contain the needed prefix
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
sNext = s[n+1:]
|
||||||
|
s = s[n+len(r.prefix):]
|
||||||
|
}
|
||||||
|
}
|
125
lib/regexutil/regex_test.go
Normal file
125
lib/regexutil/regex_test.go
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
package regexutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRegexMatchString(t *testing.T) {
|
||||||
|
f := func(regex, s string, resultExpected bool) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
re, err := NewRegex(regex)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("cannot parse %q: %s", regex, err)
|
||||||
|
}
|
||||||
|
result := re.MatchString(s)
|
||||||
|
if result != resultExpected {
|
||||||
|
t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, regex, result, resultExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f("", "", true)
|
||||||
|
f("", "foo", true)
|
||||||
|
f("foo", "", false)
|
||||||
|
f(".*", "", true)
|
||||||
|
f(".*", "foo", true)
|
||||||
|
f(".+", "", false)
|
||||||
|
f(".+", "foo", true)
|
||||||
|
f("foo.*", "bar", false)
|
||||||
|
f("foo.*", "foo", true)
|
||||||
|
f("foo.*", "a foo", true)
|
||||||
|
f("foo.*", "a foo a", true)
|
||||||
|
f("foo.*", "foobar", true)
|
||||||
|
f("foo.*", "a foobar", true)
|
||||||
|
f("foo.+", "bar", false)
|
||||||
|
f("foo.+", "foo", false)
|
||||||
|
f("foo.+", "a foo", false)
|
||||||
|
f("foo.+", "foobar", true)
|
||||||
|
f("foo.+", "a foobar", true)
|
||||||
|
f("foo|bar", "", false)
|
||||||
|
f("foo|bar", "a", false)
|
||||||
|
f("foo|bar", "foo", true)
|
||||||
|
f("foo|bar", "a foo", true)
|
||||||
|
f("foo|bar", "foo a", true)
|
||||||
|
f("foo|bar", "a foo a", true)
|
||||||
|
f("foo|bar", "bar", true)
|
||||||
|
f("foo|bar", "foobar", true)
|
||||||
|
f("foo(bar|baz)", "a", false)
|
||||||
|
f("foo(bar|baz)", "foobar", true)
|
||||||
|
f("foo(bar|baz)", "foobaz", true)
|
||||||
|
f("foo(bar|baz)", "foobaza", true)
|
||||||
|
f("foo(bar|baz)", "a foobaz a", true)
|
||||||
|
f("foo(bar|baz)", "foobal", false)
|
||||||
|
f("^foo|b(ar)$", "foo", true)
|
||||||
|
f("^foo|b(ar)$", "foo a", true)
|
||||||
|
f("^foo|b(ar)$", "a foo", false)
|
||||||
|
f("^foo|b(ar)$", "bar", true)
|
||||||
|
f("^foo|b(ar)$", "a bar", true)
|
||||||
|
f("^foo|b(ar)$", "barz", false)
|
||||||
|
f("^foo|b(ar)$", "ar", false)
|
||||||
|
f(".*foo.*", "foo", true)
|
||||||
|
f(".*foo.*", "afoobar", true)
|
||||||
|
f(".*foo.*", "abc", false)
|
||||||
|
f("foo.*bar.*", "foobar", true)
|
||||||
|
f("foo.*bar.*", "foo_bar_", true)
|
||||||
|
f("foo.*bar.*", "a foo bar baz", true)
|
||||||
|
f("foo.*bar.*", "foobaz", false)
|
||||||
|
f("foo.*bar.*", "baz foo", false)
|
||||||
|
f(".+foo.+", "foo", false)
|
||||||
|
f(".+foo.+", "afoobar", true)
|
||||||
|
f(".+foo.+", "afoo", false)
|
||||||
|
f(".+foo.+", "abc", false)
|
||||||
|
f("foo.+bar.+", "foobar", false)
|
||||||
|
f("foo.+bar.+", "foo_bar_", true)
|
||||||
|
f("foo.+bar.+", "a foo_bar_", true)
|
||||||
|
f("foo.+bar.+", "foobaz", false)
|
||||||
|
f("foo.+bar.+", "abc", false)
|
||||||
|
f(".+foo.*", "foo", false)
|
||||||
|
f(".+foo.*", "afoo", true)
|
||||||
|
f(".+foo.*", "afoobar", true)
|
||||||
|
f(".*(a|b).*", "a", true)
|
||||||
|
f(".*(a|b).*", "ax", true)
|
||||||
|
f(".*(a|b).*", "xa", true)
|
||||||
|
f(".*(a|b).*", "xay", true)
|
||||||
|
f(".*(a|b).*", "xzy", false)
|
||||||
|
f("^(?:true)$", "true", true)
|
||||||
|
f("^(?:true)$", "false", false)
|
||||||
|
|
||||||
|
f(".+;|;.+", ";", false)
|
||||||
|
f(".+;|;.+", "foo", false)
|
||||||
|
f(".+;|;.+", "foo;bar", true)
|
||||||
|
f(".+;|;.+", "foo;", true)
|
||||||
|
f(".+;|;.+", ";foo", true)
|
||||||
|
f(".+foo|bar|baz.+", "foo", false)
|
||||||
|
f(".+foo|bar|baz.+", "afoo", true)
|
||||||
|
f(".+foo|bar|baz.+", "fooa", false)
|
||||||
|
f(".+foo|bar|baz.+", "afooa", true)
|
||||||
|
f(".+foo|bar|baz.+", "bar", true)
|
||||||
|
f(".+foo|bar|baz.+", "abar", true)
|
||||||
|
f(".+foo|bar|baz.+", "abara", true)
|
||||||
|
f(".+foo|bar|baz.+", "bara", true)
|
||||||
|
f(".+foo|bar|baz.+", "baz", false)
|
||||||
|
f(".+foo|bar|baz.+", "baza", true)
|
||||||
|
f(".+foo|bar|baz.+", "abaz", false)
|
||||||
|
f(".+foo|bar|baz.+", "abaza", true)
|
||||||
|
f(".+foo|bar|baz.+", "afoo|bar|baza", true)
|
||||||
|
f(".+(foo|bar|baz).+", "bar", false)
|
||||||
|
f(".+(foo|bar|baz).+", "bara", false)
|
||||||
|
f(".+(foo|bar|baz).+", "abar", false)
|
||||||
|
f(".+(foo|bar|baz).+", "abara", true)
|
||||||
|
f(".+(foo|bar|baz).+", "afooa", true)
|
||||||
|
f(".+(foo|bar|baz).+", "abaza", true)
|
||||||
|
|
||||||
|
f(".*;|;.*", ";", true)
|
||||||
|
f(".*;|;.*", "foo", false)
|
||||||
|
f(".*;|;.*", "foo;bar", true)
|
||||||
|
f(".*;|;.*", "foo;", true)
|
||||||
|
f(".*;|;.*", ";foo", true)
|
||||||
|
|
||||||
|
f("^bar", "foobarbaz", false)
|
||||||
|
f("^foo", "foobarbaz", true)
|
||||||
|
f("bar$", "foobarbaz", false)
|
||||||
|
f("baz$", "foobarbaz", true)
|
||||||
|
f("(bar$|^foo)", "foobarbaz", true)
|
||||||
|
f("(bar$^boo)", "foobarbaz", false)
|
||||||
|
}
|
|
@ -18,6 +18,16 @@ func RemoveStartEndAnchors(expr string) string {
|
||||||
return expr
|
return expr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetOrValuesRegex returns "or" values from the given regexp expr.
|
||||||
|
//
|
||||||
|
// It returns ["foo", "bar"] for "foo|bar" regexp.
|
||||||
|
// It returns ["foo"] for "foo" regexp.
|
||||||
|
// It returns [""] for "" regexp.
|
||||||
|
// It returns an empty list if it is impossible to extract "or" values from the regexp.
|
||||||
|
func GetOrValuesRegex(expr string) []string {
|
||||||
|
return getOrValuesRegex(expr, true)
|
||||||
|
}
|
||||||
|
|
||||||
// GetOrValuesPromRegex returns "or" values from the given Prometheus-like regexp expr.
|
// GetOrValuesPromRegex returns "or" values from the given Prometheus-like regexp expr.
|
||||||
//
|
//
|
||||||
// It ignores start and end anchors ('^') and ('$') at the start and the end of expr.
|
// It ignores start and end anchors ('^') and ('$') at the start and the end of expr.
|
||||||
|
@ -27,15 +37,19 @@ func RemoveStartEndAnchors(expr string) string {
|
||||||
// It returns an empty list if it is impossible to extract "or" values from the regexp.
|
// It returns an empty list if it is impossible to extract "or" values from the regexp.
|
||||||
func GetOrValuesPromRegex(expr string) []string {
|
func GetOrValuesPromRegex(expr string) []string {
|
||||||
expr = RemoveStartEndAnchors(expr)
|
expr = RemoveStartEndAnchors(expr)
|
||||||
prefix, tailExpr := SimplifyPromRegex(expr)
|
return getOrValuesRegex(expr, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getOrValuesRegex(expr string, keepAnchors bool) []string {
|
||||||
|
prefix, tailExpr := simplifyRegex(expr, keepAnchors)
|
||||||
if tailExpr == "" {
|
if tailExpr == "" {
|
||||||
return []string{prefix}
|
return []string{prefix}
|
||||||
}
|
}
|
||||||
sre, err := syntax.Parse(tailExpr, syntax.Perl)
|
sre, err := syntax.Parse(tailExpr, regexParseFlags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err))
|
panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err))
|
||||||
}
|
}
|
||||||
orValues := getOrValuesExt(sre)
|
orValues := getOrValues(sre)
|
||||||
|
|
||||||
// Sort orValues for faster index seek later
|
// Sort orValues for faster index seek later
|
||||||
sort.Strings(orValues)
|
sort.Strings(orValues)
|
||||||
|
@ -50,10 +64,10 @@ func GetOrValuesPromRegex(expr string) []string {
|
||||||
return orValues
|
return orValues
|
||||||
}
|
}
|
||||||
|
|
||||||
func getOrValuesExt(sre *syntax.Regexp) []string {
|
func getOrValues(sre *syntax.Regexp) []string {
|
||||||
switch sre.Op {
|
switch sre.Op {
|
||||||
case syntax.OpCapture:
|
case syntax.OpCapture:
|
||||||
return getOrValuesExt(sre.Sub[0])
|
return getOrValues(sre.Sub[0])
|
||||||
case syntax.OpLiteral:
|
case syntax.OpLiteral:
|
||||||
if !isLiteral(sre) {
|
if !isLiteral(sre) {
|
||||||
return nil
|
return nil
|
||||||
|
@ -64,7 +78,7 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
|
||||||
case syntax.OpAlternate:
|
case syntax.OpAlternate:
|
||||||
a := make([]string, 0, len(sre.Sub))
|
a := make([]string, 0, len(sre.Sub))
|
||||||
for _, reSub := range sre.Sub {
|
for _, reSub := range sre.Sub {
|
||||||
ca := getOrValuesExt(reSub)
|
ca := getOrValues(reSub)
|
||||||
if len(ca) == 0 {
|
if len(ca) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -94,7 +108,7 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
|
||||||
if len(sre.Sub) < 1 {
|
if len(sre.Sub) < 1 {
|
||||||
return []string{""}
|
return []string{""}
|
||||||
}
|
}
|
||||||
prefixes := getOrValuesExt(sre.Sub[0])
|
prefixes := getOrValues(sre.Sub[0])
|
||||||
if len(prefixes) == 0 {
|
if len(prefixes) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -102,7 +116,7 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
|
||||||
return prefixes
|
return prefixes
|
||||||
}
|
}
|
||||||
sre.Sub = sre.Sub[1:]
|
sre.Sub = sre.Sub[1:]
|
||||||
suffixes := getOrValuesExt(sre)
|
suffixes := getOrValues(sre)
|
||||||
if len(suffixes) == 0 {
|
if len(suffixes) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -132,21 +146,33 @@ func isLiteral(sre *syntax.Regexp) bool {
|
||||||
|
|
||||||
const maxOrValues = 100
|
const maxOrValues = 100
|
||||||
|
|
||||||
|
// SimplifyRegex simplifies the given regexp expr.
|
||||||
|
//
|
||||||
|
// It returns plaintext pefix and the remaining regular expression
|
||||||
|
// without capturing parens.
|
||||||
|
func SimplifyRegex(expr string) (string, string) {
|
||||||
|
return simplifyRegex(expr, true)
|
||||||
|
}
|
||||||
|
|
||||||
// SimplifyPromRegex simplifies the given Prometheus-like expr.
|
// SimplifyPromRegex simplifies the given Prometheus-like expr.
|
||||||
//
|
//
|
||||||
// It returns plaintext prefix and the remaining regular expression
|
// It returns plaintext prefix and the remaining regular expression
|
||||||
// with dropped '^' and '$' anchors at the beginning and the end
|
// with dropped '^' and '$' anchors at the beginning and at the end
|
||||||
// of the regular expression.
|
// of the regular expression.
|
||||||
//
|
//
|
||||||
// The function removes capturing parens from the expr,
|
// The function removes capturing parens from the expr,
|
||||||
// so it cannot be used when capturing parens are necessary.
|
// so it cannot be used when capturing parens are necessary.
|
||||||
func SimplifyPromRegex(expr string) (string, string) {
|
func SimplifyPromRegex(expr string) (string, string) {
|
||||||
sre, err := syntax.Parse(expr, syntax.Perl)
|
return simplifyRegex(expr, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func simplifyRegex(expr string, keepAnchors bool) (string, string) {
|
||||||
|
sre, err := syntax.Parse(expr, regexParseFlags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Cannot parse the regexp. Return it all as prefix.
|
// Cannot parse the regexp. Return it all as prefix.
|
||||||
return expr, ""
|
return expr, ""
|
||||||
}
|
}
|
||||||
sre = simplifyRegexp(sre, false)
|
sre = simplifyRegexp(sre, keepAnchors, keepAnchors)
|
||||||
if sre == emptyRegexp {
|
if sre == emptyRegexp {
|
||||||
return "", ""
|
return "", ""
|
||||||
}
|
}
|
||||||
|
@ -162,7 +188,7 @@ func SimplifyPromRegex(expr string) (string, string) {
|
||||||
if len(sre.Sub) == 0 {
|
if len(sre.Sub) == 0 {
|
||||||
return prefix, ""
|
return prefix, ""
|
||||||
}
|
}
|
||||||
sre = simplifyRegexp(sre, true)
|
sre = simplifyRegexp(sre, true, keepAnchors)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if _, err := syntax.Compile(sre); err != nil {
|
if _, err := syntax.Compile(sre); err != nil {
|
||||||
|
@ -171,17 +197,19 @@ func SimplifyPromRegex(expr string) (string, string) {
|
||||||
}
|
}
|
||||||
s := sre.String()
|
s := sre.String()
|
||||||
s = strings.ReplaceAll(s, "(?:)", "")
|
s = strings.ReplaceAll(s, "(?:)", "")
|
||||||
s = strings.ReplaceAll(s, "(?-s:.)", ".")
|
s = strings.ReplaceAll(s, "(?s:.)", ".")
|
||||||
s = strings.ReplaceAll(s, "(?-m:$)", "$")
|
s = strings.ReplaceAll(s, "(?m:$)", "$")
|
||||||
return prefix, s
|
return prefix, s
|
||||||
}
|
}
|
||||||
|
|
||||||
func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
|
func simplifyRegexp(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.Regexp {
|
||||||
s := sre.String()
|
s := sre.String()
|
||||||
for {
|
for {
|
||||||
sre = simplifyRegexpExt(sre, hasPrefix, false)
|
sre = simplifyRegexpExt(sre, keepBeginOp, keepEndOp)
|
||||||
sre = sre.Simplify()
|
sre = sre.Simplify()
|
||||||
if sre.Op == syntax.OpBeginText || sre.Op == syntax.OpEndText {
|
if !keepBeginOp && sre.Op == syntax.OpBeginText {
|
||||||
|
sre = emptyRegexp
|
||||||
|
} else if !keepEndOp && sre.Op == syntax.OpEndText {
|
||||||
sre = emptyRegexp
|
sre = emptyRegexp
|
||||||
}
|
}
|
||||||
sNew := sre.String()
|
sNew := sre.String()
|
||||||
|
@ -189,7 +217,7 @@ func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
|
||||||
return sre
|
return sre
|
||||||
}
|
}
|
||||||
var err error
|
var err error
|
||||||
sre, err = syntax.Parse(sNew, syntax.Perl)
|
sre, err = syntax.Parse(sNew, regexParseFlags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err))
|
panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err))
|
||||||
}
|
}
|
||||||
|
@ -197,18 +225,18 @@ func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Regexp {
|
func simplifyRegexpExt(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.Regexp {
|
||||||
switch sre.Op {
|
switch sre.Op {
|
||||||
case syntax.OpCapture:
|
case syntax.OpCapture:
|
||||||
// Substitute all the capture regexps with non-capture regexps.
|
// Substitute all the capture regexps with non-capture regexps.
|
||||||
sre.Op = syntax.OpAlternate
|
sre.Op = syntax.OpAlternate
|
||||||
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
|
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], keepBeginOp, keepEndOp)
|
||||||
if sre.Sub[0] == emptyRegexp {
|
if sre.Sub[0] == emptyRegexp {
|
||||||
return emptyRegexp
|
return emptyRegexp
|
||||||
}
|
}
|
||||||
return sre
|
return sre
|
||||||
case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat:
|
case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat:
|
||||||
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
|
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], keepBeginOp, keepEndOp)
|
||||||
if sre.Sub[0] == emptyRegexp {
|
if sre.Sub[0] == emptyRegexp {
|
||||||
return emptyRegexp
|
return emptyRegexp
|
||||||
}
|
}
|
||||||
|
@ -216,13 +244,13 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
|
||||||
case syntax.OpAlternate:
|
case syntax.OpAlternate:
|
||||||
// Do not remove empty captures from OpAlternate, since this may break regexp.
|
// Do not remove empty captures from OpAlternate, since this may break regexp.
|
||||||
for i, sub := range sre.Sub {
|
for i, sub := range sre.Sub {
|
||||||
sre.Sub[i] = simplifyRegexpExt(sub, hasPrefix, hasSuffix)
|
sre.Sub[i] = simplifyRegexpExt(sub, keepBeginOp, keepEndOp)
|
||||||
}
|
}
|
||||||
return sre
|
return sre
|
||||||
case syntax.OpConcat:
|
case syntax.OpConcat:
|
||||||
subs := sre.Sub[:0]
|
subs := sre.Sub[:0]
|
||||||
for i, sub := range sre.Sub {
|
for i, sub := range sre.Sub {
|
||||||
sub = simplifyRegexpExt(sub, hasPrefix || len(subs) > 0, hasSuffix || i+1 < len(sre.Sub))
|
sub = simplifyRegexpExt(sub, keepBeginOp || len(subs) > 0, keepEndOp || i+1 < len(sre.Sub))
|
||||||
if sub != emptyRegexp {
|
if sub != emptyRegexp {
|
||||||
subs = append(subs, sub)
|
subs = append(subs, sub)
|
||||||
}
|
}
|
||||||
|
@ -230,12 +258,12 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
|
||||||
sre.Sub = subs
|
sre.Sub = subs
|
||||||
// Remove anchros from the beginning and the end of regexp, since they
|
// Remove anchros from the beginning and the end of regexp, since they
|
||||||
// will be added later.
|
// will be added later.
|
||||||
if !hasPrefix {
|
if !keepBeginOp {
|
||||||
for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText {
|
for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText {
|
||||||
sre.Sub = sre.Sub[1:]
|
sre.Sub = sre.Sub[1:]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !hasSuffix {
|
if !keepEndOp {
|
||||||
for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText {
|
for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText {
|
||||||
sre.Sub = sre.Sub[:len(sre.Sub)-1]
|
sre.Sub = sre.Sub[:len(sre.Sub)-1]
|
||||||
}
|
}
|
||||||
|
@ -254,6 +282,47 @@ func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Re
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getSubstringLiteral returns regex part from expr surrounded by .+ or .* depending on the prefixSuffixOp.
|
||||||
|
//
|
||||||
|
// For example, if expr=".+foo.+" and prefixSuffix=syntax.OpPlus, then the function returns "foo".
|
||||||
|
//
|
||||||
|
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
|
||||||
|
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
|
||||||
|
func getSubstringLiteral(expr string, prefixSuffixOp syntax.Op) string {
|
||||||
|
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
|
||||||
|
sre, err := syntax.Parse(expr, regexParseFlags)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if sre.Op != syntax.OpConcat {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if len(sre.Sub) != 3 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if !isDotOp(sre.Sub[0], prefixSuffixOp) || !isDotOp(sre.Sub[2], prefixSuffixOp) || !isLiteral(sre.Sub[1]) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return string(sre.Sub[1].Rune)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDotOpRegexp(expr string, op syntax.Op) bool {
|
||||||
|
sre, err := syntax.Parse(expr, regexParseFlags)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return isDotOp(sre, op)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDotOp(sre *syntax.Regexp, op syntax.Op) bool {
|
||||||
|
if sre.Op != op {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return sre.Sub[0].Op == syntax.OpAnyChar
|
||||||
|
}
|
||||||
|
|
||||||
var emptyRegexp = &syntax.Regexp{
|
var emptyRegexp = &syntax.Regexp{
|
||||||
Op: syntax.OpEmptyMatch,
|
Op: syntax.OpEmptyMatch,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const regexParseFlags = syntax.Perl | syntax.DotNL
|
||||||
|
|
|
@ -77,7 +77,7 @@ func TestSimplifyPromRegex(t *testing.T) {
|
||||||
f("^foobar|foobaz", "fooba", "[rz]")
|
f("^foobar|foobaz", "fooba", "[rz]")
|
||||||
f("^foobar|^foobaz$", "fooba", "[rz]")
|
f("^foobar|^foobaz$", "fooba", "[rz]")
|
||||||
f("foobar|foobaz", "fooba", "[rz]")
|
f("foobar|foobaz", "fooba", "[rz]")
|
||||||
f("(?:^foobar|^foobaz)aa.*", "fooba", "(?-s:[rz]aa.*)")
|
f("(?:^foobar|^foobaz)aa.*", "fooba", "(?s:[rz]aa.*)")
|
||||||
f("foo[bar]+", "foo", "[abr]+")
|
f("foo[bar]+", "foo", "[abr]+")
|
||||||
f("foo[a-z]+", "foo", "[a-z]+")
|
f("foo[a-z]+", "foo", "[a-z]+")
|
||||||
f("foo[bar]*", "foo", "[abr]*")
|
f("foo[bar]*", "foo", "[abr]*")
|
||||||
|
@ -88,12 +88,12 @@ func TestSimplifyPromRegex(t *testing.T) {
|
||||||
f("foo[^x]*", "foo", "[^x]*")
|
f("foo[^x]*", "foo", "[^x]*")
|
||||||
f("foo[x]*bar", "foo", "x*bar")
|
f("foo[x]*bar", "foo", "x*bar")
|
||||||
f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
|
f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
|
||||||
f("foo.+bar", "foo", "(?-s:.+bar)")
|
f("foo.+bar", "foo", "(?s:.+bar)")
|
||||||
f("a(b|c.*).+", "a", "(?-s:(?:b|c.*).+)")
|
f("a(b|c.*).+", "a", "(?s:(?:b|c.*).+)")
|
||||||
f("ab|ac", "a", "[bc]")
|
f("ab|ac", "a", "[bc]")
|
||||||
f("(?i)xyz", "", "(?i:XYZ)")
|
f("(?i)xyz", "", "(?i:XYZ)")
|
||||||
f("(?i)foo|bar", "", "(?i:FOO|BAR)")
|
f("(?i)foo|bar", "", "(?i:FOO|BAR)")
|
||||||
f("(?i)up.+x", "", "(?i-s:UP.+X)")
|
f("(?i)up.+x", "", "(?is:UP.+X)")
|
||||||
f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)")
|
f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)")
|
||||||
|
|
||||||
// test invalid regexps
|
// test invalid regexps
|
||||||
|
@ -111,12 +111,12 @@ func TestSimplifyPromRegex(t *testing.T) {
|
||||||
f("(foo|bar$)x*", "", "(?-m:(?:foo|bar$)x*)")
|
f("(foo|bar$)x*", "", "(?-m:(?:foo|bar$)x*)")
|
||||||
|
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
|
||||||
f(".+;|;.+", "", "(?-s:.+;|;.+)")
|
f(".+;|;.+", "", "(?s:.+;|;.+)")
|
||||||
f("^(.+);|;(.+)$", "", "(?-s:.+;|;.+)")
|
f("^(.+);|;(.+)$", "", "(?s:.+;|;.+)")
|
||||||
f("^(.+);$|^;(.+)$", "", "(?-s:.+;|;.+)")
|
f("^(.+);$|^;(.+)$", "", "(?s:.+;|;.+)")
|
||||||
f(".*;|;.*", "", "(?-s:.*;|;.*)")
|
f(".*;|;.*", "", "(?s:.*;|;.*)")
|
||||||
f("^(.*);|;(.*)$", "", "(?-s:.*;|;.*)")
|
f("^(.*);|;(.*)$", "", "(?s:.*;|;.*)")
|
||||||
f("^(.*);$|^;(.*)$", "", "(?-s:.*;|;.*)")
|
f("^(.*);$|^;(.*)$", "", "(?s:.*;|;.*)")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRemoveStartEndAnchors(t *testing.T) {
|
func TestRemoveStartEndAnchors(t *testing.T) {
|
||||||
|
|
Loading…
Reference in a new issue