lib/storage: properly match labels against regexp with (?i) flag

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/161
This commit is contained in:
Aliaksandr Valialkin 2019-09-26 11:00:22 +03:00
parent bd1cf053f6
commit 2b8358726f
2 changed files with 88 additions and 3 deletions

View file

@ -334,6 +334,9 @@ func getSingleValueFuncExt(re *syntax.Regexp) func(b []byte) bool {
case syntax.OpCapture:
return getSingleValueFuncExt(re.Sub[0])
case syntax.OpLiteral:
if !isLiteral(re) {
return nil
}
s := string(re.Rune)
return func(b []byte) bool {
return string(b) == s
@ -416,7 +419,7 @@ func isLiteral(re *syntax.Regexp) bool {
if re.Op == syntax.OpCapture {
return isLiteral(re.Sub[0])
}
return re.Op == syntax.OpLiteral
return re.Op == syntax.OpLiteral && re.Flags&syntax.FoldCase == 0
}
func getOrValues(expr string) []string {
@ -437,6 +440,9 @@ func getOrValuesExt(re *syntax.Regexp) []string {
case syntax.OpCapture:
return getOrValuesExt(re.Sub[0])
case syntax.OpLiteral:
if !isLiteral(re) {
return nil
}
return []string{string(re.Rune)}
case syntax.OpEmptyMatch:
return []string{""}
@ -609,13 +615,13 @@ func extractRegexpPrefix(b []byte) ([]byte, []byte) {
if re == emptyRegexp {
return nil, nil
}
if re.Op == syntax.OpLiteral && re.Flags&syntax.FoldCase == 0 {
if isLiteral(re) {
return []byte(string(re.Rune)), nil
}
var prefix []byte
if re.Op == syntax.OpConcat {
sub0 := re.Sub[0]
if sub0.Op == syntax.OpLiteral && sub0.Flags&syntax.FoldCase == 0 {
if isLiteral(sub0) {
prefix = []byte(string(sub0.Rune))
re.Sub = re.Sub[1:]
if len(re.Sub) == 0 {

View file

@ -53,11 +53,17 @@ func TestGetRegexpFromCache(t *testing.T) {
f("((.*)foo(.*))", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"})
f(".+foo", nil, []string{"afoo", "bbfoo"}, []string{"foo", "foobar", "afoox", ""})
f("a|b", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""})
f("(a|b)", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""})
f("foo.+", nil, []string{"foox", "foobar"}, []string{"foo", "afoox", "afoo", ""})
f(".*foo.*bar", nil, []string{"foobar", "xfoobar", "xfooxbar", "fooxbar"}, []string{"", "foobarx", "afoobarx", "aaa"})
f("foo.*bar", nil, []string{"foobar", "fooxbar"}, []string{"xfoobar", "", "foobarx", "aaa"})
f("foo.*bar.*", nil, []string{"foobar", "fooxbar", "foobarx", "fooxbarx"}, []string{"", "afoobarx", "aaa", "afoobar"})
f("(?i)foo", nil, []string{"foo", "Foo", "FOO"}, []string{"xfoo", "foobar", "xFOObar"})
f("(?i).+foo", nil, []string{"xfoo", "aaFoo", "bArFOO"}, []string{"foosdf", "xFOObar"})
f("(?i)(foo|bar)", nil, []string{"foo", "Foo", "BAR", "bAR"}, []string{"foobar", "xfoo", "xFOObAR"})
f("(?i)foo.*bar", nil, []string{"foobar", "FooBAR", "FOOxxbaR"}, []string{"xfoobar", "foobarx", "xFOObarx"})
f(".*", nil, []string{"", "a", "foo", "foobar"}, nil)
f("foo|.*", nil, []string{"", "a", "foo", "foobar"}, nil)
f(".+", nil, []string{"a", "foo"}, []string{""})
@ -323,6 +329,76 @@ func TestTagFilterMatchSuffix(t *testing.T) {
mismatch("bar")
match("xhttpbar")
})
t.Run("regexp-iflag-no-suffix", func(t *testing.T) {
value := "(?i)http"
isNegative := false
isRegexp := true
expectedPrefix := tvNoTrailingTagSeparator("")
init(value, isNegative, isRegexp, expectedPrefix)
// Must match case-insenstive http
match("http")
match("HTTP")
match("hTTp")
mismatch("")
mismatch("foobar")
mismatch("xhttp")
mismatch("xhttp://")
mismatch("hTTp://foobar.com")
})
t.Run("negative-regexp-iflag-no-suffix", func(t *testing.T) {
value := "(?i)http"
isNegative := true
isRegexp := true
expectedPrefix := tvNoTrailingTagSeparator("")
init(value, isNegative, isRegexp, expectedPrefix)
// Mustn't match case-insensitive http
mismatch("http")
mismatch("HTTP")
mismatch("hTTp")
match("")
match("foobar")
match("xhttp")
match("xhttp://")
match("hTTp://foobar.com")
})
t.Run("regexp-iflag-any-suffix", func(t *testing.T) {
value := "(?i)http.*"
isNegative := false
isRegexp := true
expectedPrefix := tvNoTrailingTagSeparator("")
init(value, isNegative, isRegexp, expectedPrefix)
// Must match case-insenstive http
match("http")
match("HTTP")
match("hTTp://foobar.com")
mismatch("")
mismatch("foobar")
mismatch("xhttp")
mismatch("xhttp://")
})
t.Run("negative-regexp-iflag-any-suffix", func(t *testing.T) {
value := "(?i)http.*"
isNegative := true
isRegexp := true
expectedPrefix := tvNoTrailingTagSeparator("")
init(value, isNegative, isRegexp, expectedPrefix)
// Mustn't match case-insensitive http
mismatch("http")
mismatch("HTTP")
mismatch("hTTp://foobar.com")
match("")
match("foobar")
match("xhttp")
match("xhttp://")
})
t.Run("non-empty-string-regexp-negative-match", func(t *testing.T) {
value := ".+"
isNegative := true
@ -409,6 +485,8 @@ func TestGetOrValues(t *testing.T) {
f("foo(?:bar|baz)x(qwe|rt)", []string{"foobarxqwe", "foobarxrt", "foobazxqwe", "foobazxrt"})
f("foo(bar||baz)", []string{"foo", "foobar", "foobaz"})
f("(a|b|c)(d|e|f)(g|h|k)", nil)
f("(?i)foo", nil)
f("(?i)(foo|bar)", nil)
}
func TestGetRegexpPrefix(t *testing.T) {
@ -463,6 +541,7 @@ func TestGetRegexpPrefix(t *testing.T) {
f(t, "a(b|c.*).+", "a", "(?:b|c(?-s:.)*)(?-s:.)+")
f(t, "ab|ac", "a", "[b-c]")
f(t, "(?i)xyz", "", "(?i:XYZ)")
f(t, "(?i)foo|bar", "", "(?i:FOO)|(?i:BAR)")
f(t, "(?i)up.+x", "", "(?i:UP)(?-s:.)+(?i:X)")
f(t, "(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)")