mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-20 15:16:42 +00:00
wip
This commit is contained in:
parent
32e96050f9
commit
b0afef1e2b
5 changed files with 96 additions and 59 deletions
|
@ -23,6 +23,9 @@ type PromRegex struct {
|
||||||
// For example, prefix="foo" for regex="foo(a|b)"
|
// For example, prefix="foo" for regex="foo(a|b)"
|
||||||
prefix string
|
prefix string
|
||||||
|
|
||||||
|
// isOnlyPrefix is set to true if the regex contains only the prefix.
|
||||||
|
isOnlyPrefix bool
|
||||||
|
|
||||||
// isSuffixDotStar is set to true if suffix is ".*"
|
// isSuffixDotStar is set to true if suffix is ".*"
|
||||||
isSuffixDotStar bool
|
isSuffixDotStar bool
|
||||||
|
|
||||||
|
@ -49,11 +52,13 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
prefix, suffix := SimplifyPromRegex(expr)
|
prefix, suffix := SimplifyPromRegex(expr)
|
||||||
orValues := GetOrValuesPromRegex(suffix)
|
sre := mustParseRegexp(suffix)
|
||||||
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
|
orValues := getOrValues(sre)
|
||||||
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
|
isOnlyPrefix := len(orValues) == 1 && orValues[0] == ""
|
||||||
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
|
isSuffixDotStar := isDotOp(sre, syntax.OpStar)
|
||||||
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
|
isSuffixDotPlus := isDotOp(sre, syntax.OpPlus)
|
||||||
|
substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
|
||||||
|
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
|
||||||
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
|
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
|
||||||
// Anchor suffix to the beginning and the end of the matching string.
|
// Anchor suffix to the beginning and the end of the matching string.
|
||||||
suffixExpr := "^(?:" + suffix + ")$"
|
suffixExpr := "^(?:" + suffix + ")$"
|
||||||
|
@ -61,6 +66,7 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
||||||
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
|
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
|
||||||
pr := &PromRegex{
|
pr := &PromRegex{
|
||||||
prefix: prefix,
|
prefix: prefix,
|
||||||
|
isOnlyPrefix: isOnlyPrefix,
|
||||||
isSuffixDotStar: isSuffixDotStar,
|
isSuffixDotStar: isSuffixDotStar,
|
||||||
isSuffixDotPlus: isSuffixDotPlus,
|
isSuffixDotPlus: isSuffixDotPlus,
|
||||||
substrDotStar: substrDotStar,
|
substrDotStar: substrDotStar,
|
||||||
|
@ -76,6 +82,10 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
||||||
// The pr is automatically anchored to the beginning and to the end
|
// The pr is automatically anchored to the beginning and to the end
|
||||||
// of the matching string with '^' and '$'.
|
// of the matching string with '^' and '$'.
|
||||||
func (pr *PromRegex) MatchString(s string) bool {
|
func (pr *PromRegex) MatchString(s string) bool {
|
||||||
|
if pr.isOnlyPrefix {
|
||||||
|
return s == pr.prefix
|
||||||
|
}
|
||||||
|
|
||||||
if len(pr.prefix) > 0 {
|
if len(pr.prefix) > 0 {
|
||||||
if !strings.HasPrefix(s, pr.prefix) {
|
if !strings.HasPrefix(s, pr.prefix) {
|
||||||
// Fast path - s has another prefix than pr.
|
// Fast path - s has another prefix than pr.
|
||||||
|
|
|
@ -118,4 +118,8 @@ func TestPromRegex(t *testing.T) {
|
||||||
f(".*;|;.*", "foo;bar", false)
|
f(".*;|;.*", "foo;bar", false)
|
||||||
f(".*;|;.*", "foo;", true)
|
f(".*;|;.*", "foo;", true)
|
||||||
f(".*;|;.*", ";foo", true)
|
f(".*;|;.*", ";foo", true)
|
||||||
|
|
||||||
|
f(".*foo(bar|baz)", "fooxfoobaz", true)
|
||||||
|
f(".*foo(bar|baz)", "fooxfooban", false)
|
||||||
|
f(".*foo(bar|baz)", "fooxfooban foobar", true)
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,9 @@ type Regex struct {
|
||||||
// For example, prefix="foo" for regex="foo(a|b)"
|
// For example, prefix="foo" for regex="foo(a|b)"
|
||||||
prefix string
|
prefix string
|
||||||
|
|
||||||
|
// isOnlyPrefix is set to true if the regex contains only the prefix.
|
||||||
|
isOnlyPrefix bool
|
||||||
|
|
||||||
// isSuffixDotStar is set to true if suffix is ".*"
|
// isSuffixDotStar is set to true if suffix is ".*"
|
||||||
isSuffixDotStar bool
|
isSuffixDotStar bool
|
||||||
|
|
||||||
|
@ -44,15 +47,17 @@ func NewRegex(expr string) (*Regex, error) {
|
||||||
if _, err := regexp.Compile(expr); err != nil {
|
if _, err := regexp.Compile(expr); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
prefix, suffix := SimplifyRegex(expr)
|
prefix, suffix := SimplifyRegex(expr)
|
||||||
orValues := GetOrValuesRegex(suffix)
|
sre := mustParseRegexp(suffix)
|
||||||
isSuffixDotStar := isDotOpRegexp(suffix, syntax.OpStar)
|
orValues := getOrValues(sre)
|
||||||
isSuffixDotPlus := isDotOpRegexp(suffix, syntax.OpPlus)
|
isOnlyPrefix := len(orValues) == 1 && orValues[0] == ""
|
||||||
substrDotStar := getSubstringLiteral(suffix, syntax.OpStar)
|
isSuffixDotStar := isDotOp(sre, syntax.OpStar)
|
||||||
substrDotPlus := getSubstringLiteral(suffix, syntax.OpPlus)
|
isSuffixDotPlus := isDotOp(sre, syntax.OpPlus)
|
||||||
|
substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
|
||||||
|
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
|
||||||
|
|
||||||
var re *regexp.Regexp
|
var re *regexp.Regexp
|
||||||
if len(orValues) == 0 && substrDotStar == "" && substrDotPlus == "" && suffix != ".*" && suffix != ".+" {
|
|
||||||
suffixAnchored := suffix
|
suffixAnchored := suffix
|
||||||
if len(prefix) > 0 {
|
if len(prefix) > 0 {
|
||||||
suffixAnchored = "^(?:" + suffix + ")"
|
suffixAnchored = "^(?:" + suffix + ")"
|
||||||
|
@ -60,9 +65,10 @@ func NewRegex(expr string) (*Regex, error) {
|
||||||
// The suffixAnchored must be properly compiled, since it has been already checked above.
|
// The suffixAnchored must be properly compiled, since it has been already checked above.
|
||||||
// Otherwise it is a bug, which must be fixed.
|
// Otherwise it is a bug, which must be fixed.
|
||||||
re = regexp.MustCompile(suffixAnchored)
|
re = regexp.MustCompile(suffixAnchored)
|
||||||
}
|
|
||||||
r := &Regex{
|
r := &Regex{
|
||||||
prefix: prefix,
|
prefix: prefix,
|
||||||
|
isOnlyPrefix: isOnlyPrefix,
|
||||||
isSuffixDotStar: isSuffixDotStar,
|
isSuffixDotStar: isSuffixDotStar,
|
||||||
isSuffixDotPlus: isSuffixDotPlus,
|
isSuffixDotPlus: isSuffixDotPlus,
|
||||||
substrDotStar: substrDotStar,
|
substrDotStar: substrDotStar,
|
||||||
|
@ -75,6 +81,9 @@ func NewRegex(expr string) (*Regex, error) {
|
||||||
|
|
||||||
// MatchString returns true if s matches pr.
|
// MatchString returns true if s matches pr.
|
||||||
func (r *Regex) MatchString(s string) bool {
|
func (r *Regex) MatchString(s string) bool {
|
||||||
|
if r.isOnlyPrefix {
|
||||||
|
return strings.Contains(s, r.prefix)
|
||||||
|
}
|
||||||
if len(r.prefix) == 0 {
|
if len(r.prefix) == 0 {
|
||||||
return r.matchStringNoPrefix(s)
|
return r.matchStringNoPrefix(s)
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,20 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestNewRegexFailure(t *testing.T) {
|
||||||
|
f := func(regex string) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
re, err := NewRegex(regex)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expecting non-nil error when parsing %q; got %q", regex, re.re)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f("[foo")
|
||||||
|
f("(foo")
|
||||||
|
}
|
||||||
|
|
||||||
func TestRegexMatchString(t *testing.T) {
|
func TestRegexMatchString(t *testing.T) {
|
||||||
f := func(regex, s string, resultExpected bool) {
|
f := func(regex, s string, resultExpected bool) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
@ -122,4 +136,7 @@ func TestRegexMatchString(t *testing.T) {
|
||||||
f("baz$", "foobarbaz", true)
|
f("baz$", "foobarbaz", true)
|
||||||
f("(bar$|^foo)", "foobarbaz", true)
|
f("(bar$|^foo)", "foobarbaz", true)
|
||||||
f("(bar$^boo)", "foobarbaz", false)
|
f("(bar$^boo)", "foobarbaz", false)
|
||||||
|
f("foo(bar|baz)", "a fooxfoobaz a", true)
|
||||||
|
f("foo(bar|baz)", "a fooxfooban a", false)
|
||||||
|
f("foo(bar|baz)", "a fooxfooban foobar a", true)
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,9 +45,9 @@ func getOrValuesRegex(expr string, keepAnchors bool) []string {
|
||||||
if tailExpr == "" {
|
if tailExpr == "" {
|
||||||
return []string{prefix}
|
return []string{prefix}
|
||||||
}
|
}
|
||||||
sre, err := syntax.Parse(tailExpr, regexParseFlags)
|
sre, err := parseRegexp(tailExpr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err))
|
return nil
|
||||||
}
|
}
|
||||||
orValues := getOrValues(sre)
|
orValues := getOrValues(sre)
|
||||||
|
|
||||||
|
@ -69,10 +69,11 @@ func getOrValues(sre *syntax.Regexp) []string {
|
||||||
case syntax.OpCapture:
|
case syntax.OpCapture:
|
||||||
return getOrValues(sre.Sub[0])
|
return getOrValues(sre.Sub[0])
|
||||||
case syntax.OpLiteral:
|
case syntax.OpLiteral:
|
||||||
if !isLiteral(sre) {
|
v, ok := getLiteral(sre)
|
||||||
|
if !ok {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return []string{string(sre.Rune)}
|
return []string{v}
|
||||||
case syntax.OpEmptyMatch:
|
case syntax.OpEmptyMatch:
|
||||||
return []string{""}
|
return []string{""}
|
||||||
case syntax.OpAlternate:
|
case syntax.OpAlternate:
|
||||||
|
@ -137,11 +138,14 @@ func getOrValues(sre *syntax.Regexp) []string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func isLiteral(sre *syntax.Regexp) bool {
|
func getLiteral(sre *syntax.Regexp) (string, bool) {
|
||||||
if sre.Op == syntax.OpCapture {
|
if sre.Op == syntax.OpCapture {
|
||||||
return isLiteral(sre.Sub[0])
|
return getLiteral(sre.Sub[0])
|
||||||
}
|
}
|
||||||
return sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0
|
if sre.Op == syntax.OpLiteral && sre.Flags&syntax.FoldCase == 0 {
|
||||||
|
return string(sre.Rune), true
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
const maxOrValues = 100
|
const maxOrValues = 100
|
||||||
|
@ -167,7 +171,7 @@ func SimplifyPromRegex(expr string) (string, string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func simplifyRegex(expr string, keepAnchors bool) (string, string) {
|
func simplifyRegex(expr string, keepAnchors bool) (string, string) {
|
||||||
sre, err := syntax.Parse(expr, regexParseFlags)
|
sre, err := parseRegexp(expr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Cannot parse the regexp. Return it all as prefix.
|
// Cannot parse the regexp. Return it all as prefix.
|
||||||
return expr, ""
|
return expr, ""
|
||||||
|
@ -176,14 +180,14 @@ func simplifyRegex(expr string, keepAnchors bool) (string, string) {
|
||||||
if sre == emptyRegexp {
|
if sre == emptyRegexp {
|
||||||
return "", ""
|
return "", ""
|
||||||
}
|
}
|
||||||
if isLiteral(sre) {
|
v, ok := getLiteral(sre)
|
||||||
return string(sre.Rune), ""
|
if ok {
|
||||||
|
return v, ""
|
||||||
}
|
}
|
||||||
var prefix string
|
var prefix string
|
||||||
if sre.Op == syntax.OpConcat {
|
if sre.Op == syntax.OpConcat {
|
||||||
sub0 := sre.Sub[0]
|
prefix, ok = getLiteral(sre.Sub[0])
|
||||||
if isLiteral(sub0) {
|
if ok {
|
||||||
prefix = string(sub0.Rune)
|
|
||||||
sre.Sub = sre.Sub[1:]
|
sre.Sub = sre.Sub[1:]
|
||||||
if len(sre.Sub) == 0 {
|
if len(sre.Sub) == 0 {
|
||||||
return prefix, ""
|
return prefix, ""
|
||||||
|
@ -216,11 +220,7 @@ func simplifyRegexp(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.Reg
|
||||||
if sNew == s {
|
if sNew == s {
|
||||||
return sre
|
return sre
|
||||||
}
|
}
|
||||||
var err error
|
sre = mustParseRegexp(sNew)
|
||||||
sre, err = syntax.Parse(sNew, regexParseFlags)
|
|
||||||
if err != nil {
|
|
||||||
panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err))
|
|
||||||
}
|
|
||||||
s = sNew
|
s = sNew
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -282,36 +282,23 @@ func simplifyRegexpExt(sre *syntax.Regexp, keepBeginOp, keepEndOp bool) *syntax.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// getSubstringLiteral returns regex part from expr surrounded by .+ or .* depending on the prefixSuffixOp.
|
// getSubstringLiteral returns regex part from sre surrounded by .+ or .* depending on the prefixSuffixOp.
|
||||||
//
|
//
|
||||||
// For example, if expr=".+foo.+" and prefixSuffix=syntax.OpPlus, then the function returns "foo".
|
// For example, if sre=".+foo.+" and prefixSuffix=syntax.OpPlus, then the function returns "foo".
|
||||||
//
|
//
|
||||||
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
|
// An empty string is returned if sre doesn't contain the given prefixSuffixOp prefix and suffix.
|
||||||
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
|
func getSubstringLiteral(sre *syntax.Regexp, prefixSuffixOp syntax.Op) string {
|
||||||
func getSubstringLiteral(expr string, prefixSuffixOp syntax.Op) string {
|
if sre.Op != syntax.OpConcat || len(sre.Sub) != 3 {
|
||||||
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
|
|
||||||
sre, err := syntax.Parse(expr, regexParseFlags)
|
|
||||||
if err != nil {
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
if sre.Op != syntax.OpConcat {
|
if !isDotOp(sre.Sub[0], prefixSuffixOp) || !isDotOp(sre.Sub[2], prefixSuffixOp) {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
if len(sre.Sub) != 3 {
|
v, ok := getLiteral(sre.Sub[1])
|
||||||
|
if !ok {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
if !isDotOp(sre.Sub[0], prefixSuffixOp) || !isDotOp(sre.Sub[2], prefixSuffixOp) || !isLiteral(sre.Sub[1]) {
|
return v
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return string(sre.Sub[1].Rune)
|
|
||||||
}
|
|
||||||
|
|
||||||
func isDotOpRegexp(expr string, op syntax.Op) bool {
|
|
||||||
sre, err := syntax.Parse(expr, regexParseFlags)
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return isDotOp(sre, op)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func isDotOp(sre *syntax.Regexp, op syntax.Op) bool {
|
func isDotOp(sre *syntax.Regexp, op syntax.Op) bool {
|
||||||
|
@ -325,4 +312,14 @@ var emptyRegexp = &syntax.Regexp{
|
||||||
Op: syntax.OpEmptyMatch,
|
Op: syntax.OpEmptyMatch,
|
||||||
}
|
}
|
||||||
|
|
||||||
const regexParseFlags = syntax.Perl | syntax.DotNL
|
func parseRegexp(expr string) (*syntax.Regexp, error) {
|
||||||
|
return syntax.Parse(expr, syntax.Perl|syntax.DotNL)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustParseRegexp(expr string) *syntax.Regexp {
|
||||||
|
sre, err := parseRegexp(expr)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("BUG: cannot parse already verified regexp %q: %w", expr, err))
|
||||||
|
}
|
||||||
|
return sre
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue