This commit is contained in:
Aliaksandr Valialkin 2024-05-23 21:47:21 +02:00
parent b0afef1e2b
commit 59d52cec67
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
10 changed files with 101 additions and 63 deletions

View file

@ -2,9 +2,9 @@ package logstorage
import (
"fmt"
"regexp"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
)
// filterRegexp matches the given regexp
@ -12,7 +12,7 @@ import (
// Example LogsQL: `fieldName:re("regexp")`
type filterRegexp struct {
fieldName string
re *regexp.Regexp
re *regexutil.Regex
}
func (fr *filterRegexp) String() string {
@ -77,7 +77,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
}
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601String(bs, bb, v)
@ -86,7 +86,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb)
}
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4String(bs, bb, v)
@ -95,7 +95,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp
bbPool.Put(bb)
}
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64String(bs, bb, v)
@ -104,7 +104,7 @@ func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *reg
bbPool.Put(bb)
}
func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
for _, v := range ch.valuesDict.values {
c := byte(0)
@ -117,13 +117,13 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *
bbPool.Put(bb)
}
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
visitValues(bs, ch, bm, func(v string) bool {
return re.MatchString(v)
})
}
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint8String(bs, bb, v)
@ -132,7 +132,7 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex
bbPool.Put(bb)
}
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint16String(bs, bb, v)
@ -141,7 +141,7 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb)
}
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint32String(bs, bb, v)
@ -150,7 +150,7 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb)
}
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint64String(bs, bb, v)

View file

@ -1,8 +1,10 @@
package logstorage
import (
"regexp"
"fmt"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
)
func TestFilterRegexp(t *testing.T) {
@ -21,32 +23,32 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("0.0"),
re: mustCompileRegex("0.0"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile(`^127\.0\.0\.1$`),
re: mustCompileRegex(`^127\.0\.0\.1$`),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
fr = &filterRegexp{
fieldName: "non-existing-column",
re: regexp.MustCompile("foo.+bar|"),
re: mustCompileRegex("foo.+bar|"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo.+bar"),
re: mustCompileRegex("foo.+bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterRegexp{
fieldName: "non-existing-column",
re: regexp.MustCompile("foo.+bar"),
re: mustCompileRegex("foo.+bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -71,20 +73,20 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar|^$"),
re: mustCompileRegex("foo|bar|^$"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5, 6})
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("27.0"),
re: mustCompileRegex("27.0"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 5, 6, 7})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("bar.+foo"),
re: mustCompileRegex("bar.+foo"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -111,14 +113,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("(?i)foo|йцу"),
re: mustCompileRegex("(?i)foo|йцу"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 6, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("qwe.+rty|^$"),
re: mustCompileRegex("qwe.+rty|^$"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -146,14 +148,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -181,14 +183,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -216,14 +218,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -251,14 +253,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -286,14 +288,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("[32][23]?"),
re: mustCompileRegex("[32][23]?"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 6, 7, 8})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar"),
re: mustCompileRegex("foo|bar"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -322,14 +324,14 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("127.0.[40].(1|2)"),
re: mustCompileRegex("127.0.[40].(1|2)"),
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 4, 5, 6, 7})
// mismatch
fr = &filterRegexp{
fieldName: "foo",
re: regexp.MustCompile("foo|bar|834"),
re: mustCompileRegex("foo|bar|834"),
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
@ -355,15 +357,23 @@ func TestFilterRegexp(t *testing.T) {
// match
fr := &filterRegexp{
fieldName: "_msg",
re: regexp.MustCompile("2006-[0-9]{2}-.+?(2|5)Z"),
re: mustCompileRegex("2006-[0-9]{2}-.+?(2|5)Z"),
}
testFilterMatchForColumns(t, columns, fr, "_msg", []int{1, 4})
// mismatch
fr = &filterRegexp{
fieldName: "_msg",
re: regexp.MustCompile("^01|04$"),
re: mustCompileRegex("^01|04$"),
}
testFilterMatchForColumns(t, columns, fr, "_msg", nil)
})
}
func mustCompileRegex(expr string) *regexutil.Regex {
re, err := regexutil.NewRegex(expr)
if err != nil {
panic(fmt.Errorf("BUG: cannot compile %q: %w", expr, err))
}
return re
}

View file

@ -3,7 +3,6 @@ package logstorage
import (
"fmt"
"math"
"regexp"
"strconv"
"strings"
"time"
@ -12,6 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
)
type lexer struct {
@ -1011,7 +1011,7 @@ func parseFilterExact(lex *lexer, fieldName string) (filter, error) {
func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
funcName := lex.token
return parseFuncArg(lex, fieldName, func(arg string) (filter, error) {
re, err := regexp.Compile(arg)
re, err := regexutil.NewRegex(arg)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q for %s(): %w", arg, funcName, err)
}
@ -1026,7 +1026,7 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
func parseFilterTilda(lex *lexer, fieldName string) (filter, error) {
lex.nextToken()
arg := getCompoundFuncArg(lex)
re, err := regexp.Compile(arg)
re, err := regexutil.NewRegex(arg)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q: %w", arg, err)
}

View file

@ -3,7 +3,6 @@ package logstorage
import (
"context"
"fmt"
"regexp"
"sync/atomic"
"testing"
"time"
@ -582,7 +581,7 @@ func TestStorageSearch(t *testing.T) {
f,
&filterRegexp{
fieldName: "_msg",
re: regexp.MustCompile("message [02] at "),
re: mustCompileRegex("message [02] at "),
},
},
}

View file

@ -439,5 +439,5 @@ func isDefaultRegex(expr string) bool {
if prefix != "" {
return false
}
return suffix == "(?-s:.*)"
return suffix == "(?s:.*)"
}

View file

@ -19,6 +19,9 @@ import (
//
// The rest of regexps are also optimized by returning cached match results for the same input strings.
type PromRegex struct {
// exprStr is the original expression.
exprStr string
// prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)"
prefix string
@ -65,6 +68,7 @@ func NewPromRegex(expr string) (*PromRegex, error) {
reSuffix := regexp.MustCompile(suffixExpr)
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
pr := &PromRegex{
exprStr: expr,
prefix: prefix,
isOnlyPrefix: isOnlyPrefix,
isSuffixDotStar: isSuffixDotStar,
@ -125,3 +129,8 @@ func (pr *PromRegex) MatchString(s string) bool {
// Fall back to slow path by matching the original regexp.
return pr.reSuffixMatcher.Match(s)
}
// String returns string representation of pr.
func (pr *PromRegex) String() string {
return pr.exprStr
}

View file

@ -8,6 +8,7 @@ import (
func TestPromRegexParseFailure(t *testing.T) {
f := func(expr string) {
t.Helper()
pr, err := NewPromRegex(expr)
if err == nil {
t.Fatalf("expecting non-nil error for expr=%s", expr)
@ -23,10 +24,15 @@ func TestPromRegexParseFailure(t *testing.T) {
func TestPromRegex(t *testing.T) {
f := func(expr, s string, resultExpected bool) {
t.Helper()
pr, err := NewPromRegex(expr)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
exprResult := pr.String()
if exprResult != expr {
t.Fatalf("unexpected string representation for %q: %q", expr, exprResult)
}
result := pr.MatchString(s)
if result != resultExpected {
t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected)
@ -40,6 +46,7 @@ func TestPromRegex(t *testing.T) {
t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected)
}
}
f("", "", true)
f("", "foo", false)
f("foo", "", false)

View file

@ -15,6 +15,9 @@ import (
// - prefix match such as "foo.*" or "foo.+"
// - substring match such as ".*foo.*" or ".+bar.+"
type Regex struct {
// exprStr is the original expression.
exprStr string
// prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)"
prefix string
@ -38,8 +41,8 @@ type Regex struct {
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
orValues []string
// re is the original regexp.
re *regexp.Regexp
// suffixRe is the regexp for suffix
suffixRe *regexp.Regexp
}
// NewRegex returns Regex for the given expr.
@ -57,16 +60,16 @@ func NewRegex(expr string) (*Regex, error) {
substrDotStar := getSubstringLiteral(sre, syntax.OpStar)
substrDotPlus := getSubstringLiteral(sre, syntax.OpPlus)
var re *regexp.Regexp
suffixAnchored := suffix
if len(prefix) > 0 {
suffixAnchored = "^(?:" + suffix + ")"
}
// The suffixAnchored must be properly compiled, since it has been already checked above.
// Otherwise it is a bug, which must be fixed.
re = regexp.MustCompile(suffixAnchored)
suffixRe := regexp.MustCompile(suffixAnchored)
r := &Regex{
exprStr: expr,
prefix: prefix,
isOnlyPrefix: isOnlyPrefix,
isSuffixDotStar: isSuffixDotStar,
@ -74,22 +77,28 @@ func NewRegex(expr string) (*Regex, error) {
substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus,
orValues: orValues,
re: re,
suffixRe: suffixRe,
}
return r, nil
}
// MatchString returns true if s matches pr.
// MatchString returns true if s matches r.
func (r *Regex) MatchString(s string) bool {
if r.isOnlyPrefix {
return strings.Contains(s, r.prefix)
}
if len(r.prefix) == 0 {
return r.matchStringNoPrefix(s)
}
return r.matchStringWithPrefix(s)
}
// String returns string represetnation for r
func (r *Regex) String() string {
return r.exprStr
}
func (r *Regex) matchStringNoPrefix(s string) bool {
if r.isSuffixDotStar {
return true
@ -108,11 +117,11 @@ func (r *Regex) matchStringNoPrefix(s string) bool {
}
if len(r.orValues) == 0 {
// Fall back to slow path by matching the original regexp.
return r.re.MatchString(s)
// Fall back to slow path by matching the suffix regexp.
return r.suffixRe.MatchString(s)
}
// Fast path - compare s to pr.orValues
// Fast path - compare s to r.orValues
for _, v := range r.orValues {
if strings.Contains(s, v) {
return true
@ -148,12 +157,12 @@ func (r *Regex) matchStringWithPrefix(s string) bool {
for {
if len(r.orValues) == 0 {
// Fall back to slow path by matching the original regexp.
if r.re.MatchString(s) {
// Fall back to slow path by matching the suffix regexp.
if r.suffixRe.MatchString(s) {
return true
}
} else {
// Fast path - compare s to pr.orValues
// Fast path - compare s to r.orValues
for _, v := range r.orValues {
if strings.HasPrefix(s, v) {
return true

View file

@ -5,12 +5,12 @@ import (
)
func TestNewRegexFailure(t *testing.T) {
f := func(regex string) {
f := func(expr string) {
t.Helper()
re, err := NewRegex(regex)
r, err := NewRegex(expr)
if err == nil {
t.Fatalf("expecting non-nil error when parsing %q; got %q", regex, re.re)
t.Fatalf("expecting non-nil error when parsing %q; got %q", expr, r)
}
}
@ -19,16 +19,20 @@ func TestNewRegexFailure(t *testing.T) {
}
func TestRegexMatchString(t *testing.T) {
f := func(regex, s string, resultExpected bool) {
f := func(expr, s string, resultExpected bool) {
t.Helper()
re, err := NewRegex(regex)
r, err := NewRegex(expr)
if err != nil {
t.Fatalf("cannot parse %q: %s", regex, err)
t.Fatalf("cannot parse %q: %s", expr, err)
}
result := re.MatchString(s)
exprResult := r.String()
if exprResult != expr {
t.Fatalf("unexpected string representation for %q: %q", expr, exprResult)
}
result := r.MatchString(s)
if result != resultExpected {
t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, regex, result, resultExpected)
t.Fatalf("unexpected result when matching %q against regex=%q; got %v; want %v", s, expr, result, resultExpected)
}
}

View file

@ -1183,7 +1183,7 @@ func TestSimplifyRegexp(t *testing.T) {
f("ab|ad", "a", "[bd]")
f("(?i)xyz", "", "(?i:XYZ)")
f("(?i)foo|bar", "", "(?i:FOO|BAR)")
f("(?i)up.+x", "", "(?i-s:UP.+X)")
f("(?i)up.+x", "", "(?is:UP.+X)")
f("(?smi)xy.*z$", "", "(?ims:XY.*Z$)")
// test invalid regexps