This commit is contained in:
Aliaksandr Valialkin 2024-05-23 21:47:21 +02:00
parent b0afef1e2b
commit 59d52cec67
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
10 changed files with 101 additions and 63 deletions

View file

@ -2,9 +2,9 @@ package logstorage
import ( import (
"fmt" "fmt"
"regexp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
) )
// filterRegexp matches the given regexp // filterRegexp matches the given regexp
@ -12,7 +12,7 @@ import (
// Example LogsQL: `fieldName:re("regexp")` // Example LogsQL: `fieldName:re("regexp")`
type filterRegexp struct { type filterRegexp struct {
fieldName string fieldName string
re *regexp.Regexp re *regexutil.Regex
} }
func (fr *filterRegexp) String() string { func (fr *filterRegexp) String() string {
@ -77,7 +77,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601String(bs, bb, v) s := toTimestampISO8601String(bs, bb, v)
@ -86,7 +86,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4String(bs, bb, v) s := toIPv4String(bs, bb, v)
@ -95,7 +95,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64String(bs, bb, v) s := toFloat64String(bs, bb, v)
@ -104,7 +104,7 @@ func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *reg
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get() bb := bbPool.Get()
for _, v := range ch.valuesDict.values { for _, v := range ch.valuesDict.values {
c := byte(0) c := byte(0)
@ -117,13 +117,13 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
return re.MatchString(v) return re.MatchString(v)
}) })
} }
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toUint8String(bs, bb, v) s := toUint8String(bs, bb, v)
@ -132,7 +132,7 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toUint16String(bs, bb, v) s := toUint16String(bs, bb, v)
@ -141,7 +141,7 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toUint32String(bs, bb, v) s := toUint32String(bs, bb, v)
@ -150,7 +150,7 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get() bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool { visitValues(bs, ch, bm, func(v string) bool {
s := toUint64String(bs, bb, v) s := toUint64String(bs, bb, v)

View file

@ -1,8 +1,10 @@
package logstorage package logstorage
import ( import (
"regexp" "fmt"
"testing" "testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
) )
func TestFilterRegexp(t *testing.T) { func TestFilterRegexp(t *testing.T) {
@ -21,32 +23,32 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("0.0"), re: mustCompileRegex("0.0"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile(`^127\.0\.0\.1$`), re: mustCompileRegex(`^127\.0\.0\.1$`),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "non-existing-column", fieldName: "non-existing-column",
re: regexp.MustCompile("foo.+bar|"), re: mustCompileRegex("foo.+bar|"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("foo.+bar"), re: mustCompileRegex("foo.+bar"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "non-existing-column", fieldName: "non-existing-column",
re: regexp.MustCompile("foo.+bar"), re: mustCompileRegex("foo.+bar"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -71,20 +73,20 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("foo|bar|^$"), re: mustCompileRegex("foo|bar|^$"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5, 6}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5, 6})
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("27.0"), re: mustCompileRegex("27.0"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 5, 6, 7}) testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 5, 6, 7})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("bar.+foo"), re: mustCompileRegex("bar.+foo"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -111,14 +113,14 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("(?i)foo|йцу"), re: mustCompileRegex("(?i)foo|йцу"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 6, 8}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 6, 8})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("qwe.+rty|^$"), re: mustCompileRegex("qwe.+rty|^$"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -146,14 +148,14 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("[32][23]?"), re: mustCompileRegex("[32][23]?"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("foo|bar"), re: mustCompileRegex("foo|bar"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -181,14 +183,14 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("[32][23]?"), re: mustCompileRegex("[32][23]?"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("foo|bar"), re: mustCompileRegex("foo|bar"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -216,14 +218,14 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("[32][23]?"), re: mustCompileRegex("[32][23]?"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("foo|bar"), re: mustCompileRegex("foo|bar"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -251,14 +253,14 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("[32][23]?"), re: mustCompileRegex("[32][23]?"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("foo|bar"), re: mustCompileRegex("foo|bar"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -286,14 +288,14 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("[32][23]?"), re: mustCompileRegex("[32][23]?"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 6, 7, 8}) testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 6, 7, 8})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("foo|bar"), re: mustCompileRegex("foo|bar"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -322,14 +324,14 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("127.0.[40].(1|2)"), re: mustCompileRegex("127.0.[40].(1|2)"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 4, 5, 6, 7}) testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 4, 5, 6, 7})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "foo", fieldName: "foo",
re: regexp.MustCompile("foo|bar|834"), re: mustCompileRegex("foo|bar|834"),
} }
testFilterMatchForColumns(t, columns, fr, "foo", nil) testFilterMatchForColumns(t, columns, fr, "foo", nil)
}) })
@ -355,15 +357,23 @@ func TestFilterRegexp(t *testing.T) {
// match // match
fr := &filterRegexp{ fr := &filterRegexp{
fieldName: "_msg", fieldName: "_msg",
re: regexp.MustCompile("2006-[0-9]{2}-.+?(2|5)Z"), re: mustCompileRegex("2006-[0-9]{2}-.+?(2|5)Z"),
} }
testFilterMatchForColumns(t, columns, fr, "_msg", []int{1, 4}) testFilterMatchForColumns(t, columns, fr, "_msg", []int{1, 4})
// mismatch // mismatch
fr = &filterRegexp{ fr = &filterRegexp{
fieldName: "_msg", fieldName: "_msg",
re: regexp.MustCompile("^01|04$"), re: mustCompileRegex("^01|04$"),
} }
testFilterMatchForColumns(t, columns, fr, "_msg", nil) testFilterMatchForColumns(t, columns, fr, "_msg", nil)
}) })
} }
func mustCompileRegex(expr string) *regexutil.Regex {
re, err := regexutil.NewRegex(expr)
if err != nil {
panic(fmt.Errorf("BUG: cannot compile %q: %w", expr, err))
}
return re
}

View file

@ -3,7 +3,6 @@ package logstorage
import ( import (
"fmt" "fmt"
"math" "math"
"regexp"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -12,6 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
) )
type lexer struct { type lexer struct {
@ -1011,7 +1011,7 @@ func parseFilterExact(lex *lexer, fieldName string) (filter, error) {
func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) { func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
funcName := lex.token funcName := lex.token
return parseFuncArg(lex, fieldName, func(arg string) (filter, error) { return parseFuncArg(lex, fieldName, func(arg string) (filter, error) {
re, err := regexp.Compile(arg) re, err := regexutil.NewRegex(arg)
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid regexp %q for %s(): %w", arg, funcName, err) return nil, fmt.Errorf("invalid regexp %q for %s(): %w", arg, funcName, err)
} }
@ -1026,7 +1026,7 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
func parseFilterTilda(lex *lexer, fieldName string) (filter, error) { func parseFilterTilda(lex *lexer, fieldName string) (filter, error) {
lex.nextToken() lex.nextToken()
arg := getCompoundFuncArg(lex) arg := getCompoundFuncArg(lex)
re, err := regexp.Compile(arg) re, err := regexutil.NewRegex(arg)
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid regexp %q: %w", arg, err) return nil, fmt.Errorf("invalid regexp %q: %w", arg, err)
} }

View file

@ -3,7 +3,6 @@ package logstorage
import ( import (
"context" "context"
"fmt" "fmt"
"regexp"
"sync/atomic" "sync/atomic"
"testing" "testing"
"time" "time"
@ -582,7 +581,7 @@ func TestStorageSearch(t *testing.T) {
f, f,
&filterRegexp{ &filterRegexp{
fieldName: "_msg", fieldName: "_msg",
re: regexp.MustCompile("message [02] at "), re: mustCompileRegex("message [02] at "),
}, },
}, },
} }

View file

@ -439,5 +439,5 @@ func isDefaultRegex(expr string) bool {
if prefix != "" { if prefix != "" {
return false return false
} }
return suffix == "(?-s:.*)" return suffix == "(?s:.*)"
} }

View file

@ -19,6 +19,9 @@ import (
// //
// The rest of regexps are also optimized by returning cached match results for the same input strings. // The rest of regexps are also optimized by returning cached match results for the same input strings.
type PromRegex struct { type PromRegex struct {
// exprStr is the original expression.
exprStr string
// prefix contains literal prefix for regex. // prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)" // For example, prefix="foo" for regex="foo(a|b)"
prefix string prefix string
@ -65,6 +68,7 @@ func NewPromRegex(expr string) (*PromRegex, error) {
reSuffix := regexp.MustCompile(suffixExpr) reSuffix := regexp.MustCompile(suffixExpr)
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString) reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
pr := &PromRegex{ pr := &PromRegex{
exprStr: expr,
prefix: prefix, prefix: prefix,
isOnlyPrefix: isOnlyPrefix, isOnlyPrefix: isOnlyPrefix,
isSuffixDotStar: isSuffixDotStar, isSuffixDotStar: isSuffixDotStar,
@ -125,3 +129,8 @@ func (pr *PromRegex) MatchString(s string) bool {
// Fall back to slow path by matching the original regexp. // Fall back to slow path by matching the original regexp.
return pr.reSuffixMatcher.Match(s) return pr.reSuffixMatcher.Match(s)
} }
// String returns string representation of pr.
func (pr *PromRegex) String() string {
return pr.exprStr
}

View file

@ -8,6 +8,7 @@ import (
func TestPromRegexParseFailure(t *testing.T) { func TestPromRegexParseFailure(t *testing.T) {
f := func(expr string) { f := func(expr string) {
t.Helper() t.Helper()
pr, err := NewPromRegex(expr) pr, err := NewPromRegex(expr)
if err == nil { if err == nil {
t.Fatalf("expecting non-nil error for expr=%s", expr) t.Fatalf("expecting non-nil error for expr=%s", expr)
@ -23,10 +24,15 @@ func TestPromRegexParseFailure(t *testing.T) {
func TestPromRegex(t *testing.T) { func TestPromRegex(t *testing.T) {
f := func(expr, s string, resultExpected bool) { f := func(expr, s string, resultExpected bool) {
t.Helper() t.Helper()
pr, err := NewPromRegex(expr) pr, err := NewPromRegex(expr)
if err != nil { if err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
exprResult := pr.String()
if exprResult != expr {
t.Fatalf("unexpected string representation for %q: %q", expr, exprResult)
}
result := pr.MatchString(s) result := pr.MatchString(s)
if result != resultExpected { if result != resultExpected {
t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected) t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected)
@ -40,6 +46,7 @@ func TestPromRegex(t *testing.T) {
t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected) t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected)
} }
} }
f("", "", true) f("", "", true)
f("", "foo", false) f("", "foo", false)
f("foo", "", false) f("foo", "", false)

View file

@ -15,6 +15,9 @@ import (
// - prefix match such as "foo.*" or "foo.+" // - prefix match such as "foo.*" or "foo.+"
// - substring match such as ".*foo.*" or ".+bar.+" // - substring match such as ".*foo.*" or ".+bar.+"
type Regex struct { type Regex struct {
// exprStr is the original expression.
exprStr string
// prefix contains literal prefix for regex. // prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)" // For example, prefix="foo" for regex="foo(a|b)"
prefix string prefix string
@ -38,8 +41,8 @@ type Regex struct {
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz" // For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
orValues []string orValues []string
// re is the original regexp. // suffixRe is the regexp for suffix
re *regexp.Regexp suffixRe *regexp.Regexp
} }
// NewRegex returns Regex for the given expr. // NewRegex returns Regex for the given expr.
@ -57,16 +60,16 @@ func NewRegex(expr string) (*Regex, error) {
substrDotStar := getSubstringLiteral(sre, syntax.OpStar) substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus) substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
var re *regexp.Regexp
suffixAnchored := suffix suffixAnchored := suffix
if len(prefix) > 0 { if len(prefix) > 0 {
suffixAnchored = "^(?:" + suffix + ")" suffixAnchored = "^(?:" + suffix + ")"
} }
// The suffixAnchored must be properly compiled, since it has been already checked above. // The suffixAnchored must be properly compiled, since it has been already checked above.
// Otherwise it is a bug, which must be fixed. // Otherwise it is a bug, which must be fixed.
re = regexp.MustCompile(suffixAnchored) suffixRe := regexp.MustCompile(suffixAnchored)
r := &Regex{ r := &Regex{
exprStr: expr,
prefix: prefix, prefix: prefix,
isOnlyPrefix: isOnlyPrefix, isOnlyPrefix: isOnlyPrefix,
isSuffixDotStar: isSuffixDotStar, isSuffixDotStar: isSuffixDotStar,
@ -74,22 +77,28 @@ func NewRegex(expr string) (*Regex, error) {
substrDotStar: substrDotStar, substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus, substrDotPlus: substrDotPlus,
orValues: orValues, orValues: orValues,
re: re, suffixRe: suffixRe,
} }
return r, nil return r, nil
} }
// MatchString returns true if s matches pr. // MatchString returns true if s matches r.
func (r *Regex) MatchString(s string) bool { func (r *Regex) MatchString(s string) bool {
if r.isOnlyPrefix { if r.isOnlyPrefix {
return strings.Contains(s, r.prefix) return strings.Contains(s, r.prefix)
} }
if len(r.prefix) == 0 { if len(r.prefix) == 0 {
return r.matchStringNoPrefix(s) return r.matchStringNoPrefix(s)
} }
return r.matchStringWithPrefix(s) return r.matchStringWithPrefix(s)
} }
// String returns string represetnation for r
func (r *Regex) String() string {
return r.exprStr
}
func (r *Regex) matchStringNoPrefix(s string) bool { func (r *Regex) matchStringNoPrefix(s string) bool {
if r.isSuffixDotStar { if r.isSuffixDotStar {
return true return true
@ -108,11 +117,11 @@ func (r *Regex) matchStringNoPrefix(s string) bool {
} }
if len(r.orValues) == 0 { if len(r.orValues) == 0 {
// Fall back to slow path by matching the original regexp. // Fall back to slow path by matching the suffix regexp.
return r.re.MatchString(s) return r.suffixRe.MatchString(s)
} }
// Fast path - compare s to pr.orValues // Fast path - compare s to r.orValues
for _, v := range r.orValues { for _, v := range r.orValues {
if strings.Contains(s, v) { if strings.Contains(s, v) {
return true return true
@ -148,12 +157,12 @@ func (r *Regex) matchStringWithPrefix(s string) bool {
for { for {
if len(r.orValues) == 0 { if len(r.orValues) == 0 {
// Fall back to slow path by matching the original regexp. // Fall back to slow path by matching the suffix regexp.
if r.re.MatchString(s) { if r.suffixRe.MatchString(s) {
return true return true
} }
} else { } else {
// Fast path - compare s to pr.orValues // Fast path - compare s to r.orValues
for _, v := range r.orValues { for _, v := range r.orValues {
if strings.HasPrefix(s, v) { if strings.HasPrefix(s, v) {
return true return true

View file

@ -5,12 +5,12 @@ import (
) )
func TestNewRegexFailure(t *testing.T) { func TestNewRegexFailure(t *testing.T) {
f := func(regex string) { f := func(expr string) {
t.Helper() t.Helper()
re, err := NewRegex(regex) r, err := NewRegex(expr)
if err == nil { if err == nil {
t.Fatalf("expecting non-nil error when parsing %q; got %q", regex, re.re) t.Fatalf("expecting non-nil error when parsing %q; got %q", expr, r)
} }
} }
@ -19,16 +19,20 @@ func TestNewRegexFailure(t *testing.T) {
} }
func TestRegexMatchString(t *testing.T) { func TestRegexMatchString(t *testing.T) {
f := func(regex, s string, resultExpected bool) { f := func(expr, s string, resultExpected bool) {
t.Helper() t.Helper()
re, err := NewRegex(regex) r, err := NewRegex(expr)
if err != nil { if err != nil {
t.Fatalf("cannot parse %q: %s", regex, err) t.Fatalf("cannot parse %q: %s", expr, err)
} }
result := re.MatchString(s) exprResult := r.String()
if exprResult != expr {
t.Fatalf("unexpected string representation for %q: %q", expr, exprResult)
}
result := r.MatchString(s)
if result != resultExpected { if result != resultExpected {
t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, regex, result, resultExpected) t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, expr, result, resultExpected)
} }
} }

View file

@ -1183,7 +1183,7 @@ func TestSimplifyRegexp(t *testing.T) {
f("ab|ad", "a", "[bd]") f("ab|ad", "a", "[bd]")
f("(?i)xyz", "", "(?i:XYZ)") f("(?i)xyz", "", "(?i:XYZ)")
f("(?i)foo|bar", "", "(?i:FOO|BAR)") f("(?i)foo|bar", "", "(?i:FOO|BAR)")
f("(?i)up.+x", "", "(?i-s:UP.+X)") f("(?i)up.+x", "", "(?is:UP.+X)")
f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)") f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)")
// test invalid regexps // test invalid regexps