mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
32e96050f9
commit
b0afef1e2b
5 changed files with 96 additions and 59 deletions
|
@ -23,6 +23,9 @@ type PromRegex struct {
|
|||
// For example, prefix="foo" for regex="foo(a|b)"
|
||||
prefix string
|
||||
|
||||
// isOnlyPrefix is set to true if the regex contains only the prefix.
|
||||
isOnlyPrefix bool
|
||||
|
||||
// isSuffixDotStar is set to true if suffix is ".*"
|
||||
isSuffixDotStar bool
|
||||
|
||||
|
@ -49,11 +52,13 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
|||
return nil, err
|
||||
}
|
||||
prefix, suffix := SimplifyPromRegex(expr)
|
||||
orValues := GetOrValuesPromRegex(suffix)
|
||||
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
|
||||
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
|
||||
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
|
||||
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
|
||||
sre := mustParseRegexp(suffix)
|
||||
orValues := getOrValues(sre)
|
||||
isOnlyPrefix := len(orValues) == 1 && orValues[0] == ""
|
||||
isSuffixDotStar := isDotOp(sre, syntax.OpStar)
|
||||
isSuffixDotPlus := isDotOp(sre, syntax.OpPlus)
|
||||
substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
|
||||
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
|
||||
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
|
||||
// Anchor suffix to the beginning and the end of the matching string.
|
||||
suffixExpr := "^(?:" + suffix + ")$"
|
||||
|
@ -61,6 +66,7 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
|||
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
|
||||
pr := &PromRegex{
|
||||
prefix: prefix,
|
||||
isOnlyPrefix: isOnlyPrefix,
|
||||
isSuffixDotStar: isSuffixDotStar,
|
||||
isSuffixDotPlus: isSuffixDotPlus,
|
||||
substrDotStar: substrDotStar,
|
||||
|
@ -76,6 +82,10 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
|||
// The pr is automatically anchored to the beginning and to the end
|
||||
// of the matching string with '^' and '$'.
|
||||
func (pr *PromRegex) MatchString(s string) bool {
|
||||
if pr.isOnlyPrefix {
|
||||
return s == pr.prefix
|
||||
}
|
||||
|
||||
if len(pr.prefix) > 0 {
|
||||
if !strings.HasPrefix(s, pr.prefix) {
|
||||
// Fast path - s has another prefix than pr.
|
||||
|
|
|
@ -118,4 +118,8 @@ func TestPromRegex(t *testing.T) {
|
|||
f(".*;|;.*", "foo;bar", false)
|
||||
f(".*;|;.*", "foo;", true)
|
||||
f(".*;|;.*", ";foo", true)
|
||||
|
||||
f(".*foo(bar|baz)", "fooxfoobaz", true)
|
||||
f(".*foo(bar|baz)", "fooxfooban", false)
|
||||
f(".*foo(bar|baz)", "fooxfooban foobar", true)
|
||||
}
|
||||
|
|
|
@ -19,6 +19,9 @@ type Regex struct {
|
|||
// For example, prefix="foo" for regex="foo(a|b)"
|
||||
prefix string
|
||||
|
||||
// isOnlyPrefix is set to true if the regex contains only the prefix.
|
||||
isOnlyPrefix bool
|
||||
|
||||
// isSuffixDotStar is set to true if suffix is ".*"
|
||||
isSuffixDotStar bool
|
||||
|
||||
|
@ -44,25 +47,28 @@ func NewRegex(expr string) (*Regex, error) {
|
|||
if _, err := regexp.Compile(expr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
prefix, suffix := SimplifyRegex(expr)
|
||||
orValues := GetOrValuesRegex(suffix)
|
||||
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
|
||||
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
|
||||
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
|
||||
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
|
||||
sre := mustParseRegexp(suffix)
|
||||
orValues := getOrValues(sre)
|
||||
isOnlyPrefix := len(orValues) == 1 && orValues[0] == ""
|
||||
isSuffixDotStar := isDotOp(sre, syntax.OpStar)
|
||||
isSuffixDotPlus := isDotOp(sre, syntax.OpPlus)
|
||||
substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
|
||||
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
|
||||
|
||||
var re *regexp.Regexp
|
||||
if len(orValues) == 0 && substrDotStar == "" && substrDotPlus == "" && suffix != ".*" && suffix != ".+" {
|
||||
suffixAnchored := suffix
|
||||
if len(prefix) > 0 {
|
||||
suffixAnchored = "^(?:" + suffix + ")"
|
||||
}
|
||||
// The suffixAnchored must be properly compiled, since it has been already checked above.
|
||||
// Otherwise it is a bug, which must be fixed.
|
||||
re = regexp.MustCompile(suffixAnchored)
|
||||
suffixAnchored := suffix
|
||||
if len(prefix) > 0 {
|
||||
suffixAnchored = "^(?:" + suffix + ")"
|
||||
}
|
||||
// The suffixAnchored must be properly compiled, since it has been already checked above.
|
||||
// Otherwise it is a bug, which must be fixed.
|
||||
re = regexp.MustCompile(suffixAnchored)
|
||||
|
||||
r := &Regex{
|
||||
prefix: prefix,
|
||||
isOnlyPrefix: isOnlyPrefix,
|
||||
isSuffixDotStar: isSuffixDotStar,
|
||||
isSuffixDotPlus: isSuffixDotPlus,
|
||||
substrDotStar: substrDotStar,
|
||||
|
@ -75,6 +81,9 @@ func NewRegex(expr string) (*Regex, error) {
|
|||
|
||||
// MatchString returns true if s matches pr.
|
||||
func (r *Regex) MatchString(s string) bool {
|
||||
if r.isOnlyPrefix {
|
||||
return strings.Contains(s, r.prefix)
|
||||
}
|
||||
if len(r.prefix) == 0 {
|
||||
return r.matchStringNoPrefix(s)
|
||||
}
|
||||
|
|
|
@ -4,6 +4,20 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
func TestNewRegexFailure(t *testing.T) {
|
||||
f := func(regex string) {
|
||||
t.Helper()
|
||||
|
||||
re, err := NewRegex(regex)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error when parsing %q; got %q", regex, re.re)
|
||||
}
|
||||
}
|
||||
|
||||
f("[foo")
|
||||
f("(foo")
|
||||
}
|
||||
|
||||
func TestRegexMatchString(t *testing.T) {
|
||||
f := func(regex, s string, resultExpected bool) {
|
||||
t.Helper()
|
||||
|
@ -122,4 +136,7 @@ func TestRegexMatchString(t *testing.T) {
|
|||
f("baz$", "foobarbaz", true)
|
||||
f("(bar$|^foo)", "foobarbaz", true)
|
||||
f("(bar$^boo)", "foobarbaz", false)
|
||||
f("foo(bar|baz)", "a fooxfoobaz a", true)
|
||||
f("foo(bar|baz)", "a fooxfooban a", false)
|
||||
f("foo(bar|baz)", "a fooxfooban foobar a", true)
|
||||
}
|
||||
|
|
|
@ -45,9 +45,9 @@ func getOrValuesRegex(expr string, keepAnchors bool) []string {
|
|||
if tailExpr == "" {
|
||||
return []string{prefix}
|
||||
}
|
||||
sre, err := syntax.Parse(tailExpr, regexParseFlags)
|
||||
sre, err := parseRegexp(tailExpr)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err))
|
||||
return nil
|
||||
}
|
||||
orValues := getOrValues(sre)
|
||||
|
||||
|
@ -69,10 +69,11 @@ func getOrValues(sre *syntax.Regexp) []string {
|
|||
case syntax.OpCapture:
|
||||
return getOrValues(sre.Sub[0])
|
||||
case syntax.OpLiteral:
|
||||
if !isLiteral(sre) {
|
||||
v, ok := getLiteral(sre)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return []string{string(sre.Rune)}
|
||||
return []string{v}
|
||||
case syntax.OpEmptyMatch:
|
||||
return []string{""}
|
||||
case syntax.OpAlternate:
|
||||
|
@ -137,11 +138,14 @@ func getOrValues(sre *syntax.Regexp) []string {
|
|||
}
|
||||
}
|
||||
|
||||
func isLiteral(sre *syntax.Regexp) bool {
|
||||
func getLiteral(sre *syntax.Regexp) (string, bool) {
|
||||
if sre.Op == syntax.OpCapture {
|
||||
return isLiteral(sre.Sub[0])
|
||||
return getLiteral(sre.Sub[0])
|
||||
}
|
||||
return sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0
|
||||
if sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0 {
|
||||
return string(sre.Rune), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
const maxOrValues = 100
|
||||
|
@ -167,7 +171,7 @@ func SimplifyPromRegex(expr string) (string, string) {
|
|||
}
|
||||
|
||||
func simplifyRegex(expr string, keepAnchors bool) (string, string) {
|
||||
sre, err := syntax.Parse(expr, regexParseFlags)
|
||||
sre, err := parseRegexp(expr)
|
||||
if err != nil {
|
||||
// Cannot parse the regexp. Return it all as prefix.
|
||||
return expr, ""
|
||||
|
@ -176,14 +180,14 @@ func simplifyRegex(expr string, keepAnchors bool) (string, string) {
|
|||
if sre == emptyRegexp {
|
||||
return "", ""
|
||||
}
|
||||
if isLiteral(sre) {
|
||||
return string(sre.Rune), ""
|
||||
v, ok := getLiteral(sre)
|
||||
if ok {
|
||||
return v, ""
|
||||
}
|
||||
var prefix string
|
||||
if sre.Op == syntax.OpConcat {
|
||||
sub0 := sre.Sub[0]
|
||||
if isLiteral(sub0) {
|
||||
prefix = string(sub0.Rune)
|
||||
prefix, ok = getLiteral(sre.Sub[0])
|
||||
if ok {
|
||||
sre.Sub = sre.Sub[1:]
|
||||
if len(sre.Sub) == 0 {
|
||||
return prefix, ""
|
||||
|
@ -216,11 +220,7 @@ func simplifyRegexp(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.Reg
|
|||
if sNew == s {
|
||||
return sre
|
||||
}
|
||||
var err error
|
||||
sre, err = syntax.Parse(sNew, regexParseFlags)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err))
|
||||
}
|
||||
sre = mustParseRegexp(sNew)
|
||||
s = sNew
|
||||
}
|
||||
}
|
||||
|
@ -282,36 +282,23 @@ func simplifyRegexpExt(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.
|
|||
}
|
||||
}
|
||||
|
||||
// getSubstringLiteral returns regex part from expr surrounded by .+ or .* depending on the prefixSuffixOp.
|
||||
// getSubstringLiteral returns regex part from sre surrounded by .+ or .* depending on the prefixSuffixOp.
|
||||
//
|
||||
// For example, if expr=".+foo.+" and prefixSuffix=syntax.OpPlus, then the function returns "foo".
|
||||
// For example, if sre=".+foo.+" and prefixSuffix=syntax.OpPlus, then the function returns "foo".
|
||||
//
|
||||
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
|
||||
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
|
||||
func getSubstringLiteral(expr string, prefixSuffixOp syntax.Op) string {
|
||||
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
|
||||
sre, err := syntax.Parse(expr, regexParseFlags)
|
||||
if err != nil {
|
||||
// An empty string is returned if sre doesn't contain the given prefixSuffixOp prefix and suffix.
|
||||
func getSubstringLiteral(sre *syntax.Regexp, prefixSuffixOp syntax.Op) string {
|
||||
if sre.Op != syntax.OpConcat || len(sre.Sub) != 3 {
|
||||
return ""
|
||||
}
|
||||
if sre.Op != syntax.OpConcat {
|
||||
if !isDotOp(sre.Sub[0], prefixSuffixOp) || !isDotOp(sre.Sub[2], prefixSuffixOp) {
|
||||
return ""
|
||||
}
|
||||
if len(sre.Sub) != 3 {
|
||||
v, ok := getLiteral(sre.Sub[1])
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
if !isDotOp(sre.Sub[0], prefixSuffixOp) || !isDotOp(sre.Sub[2], prefixSuffixOp) || !isLiteral(sre.Sub[1]) {
|
||||
return ""
|
||||
}
|
||||
return string(sre.Sub[1].Rune)
|
||||
}
|
||||
|
||||
func isDotOpRegexp(expr string, op syntax.Op) bool {
|
||||
sre, err := syntax.Parse(expr, regexParseFlags)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return isDotOp(sre, op)
|
||||
return v
|
||||
}
|
||||
|
||||
func isDotOp(sre *syntax.Regexp, op syntax.Op) bool {
|
||||
|
@ -325,4 +312,14 @@ var emptyRegexp = &syntax.Regexp{
|
|||
Op: syntax.OpEmptyMatch,
|
||||
}
|
||||
|
||||
const regexParseFlags = syntax.Perl | syntax.DotNL
|
||||
func parseRegexp(expr string) (*syntax.Regexp, error) {
|
||||
return syntax.Parse(expr, syntax.Perl|syntax.DotNL)
|
||||
}
|
||||
|
||||
func mustParseRegexp(expr string) *syntax.Regexp {
|
||||
sre, err := parseRegexp(expr)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("BUG: cannot parse already verified regexp %q: %w", expr, err))
|
||||
}
|
||||
return sre
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue