From 2b8358726f86a7031751527b6f1465fe0e31fb96 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 26 Sep 2019 11:00:22 +0300 Subject: [PATCH] lib/storage: properly match labels against regexp with `(?i)` flag Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/161 --- lib/storage/tag_filters.go | 12 +++-- lib/storage/tag_filters_test.go | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/lib/storage/tag_filters.go b/lib/storage/tag_filters.go index da0318648..0baac3eed 100644 --- a/lib/storage/tag_filters.go +++ b/lib/storage/tag_filters.go @@ -334,6 +334,9 @@ func getSingleValueFuncExt(re *syntax.Regexp) func(b []byte) bool { case syntax.OpCapture: return getSingleValueFuncExt(re.Sub[0]) case syntax.OpLiteral: + if !isLiteral(re) { + return nil + } s := string(re.Rune) return func(b []byte) bool { return string(b) == s @@ -416,7 +419,7 @@ func isLiteral(re *syntax.Regexp) bool { if re.Op == syntax.OpCapture { return isLiteral(re.Sub[0]) } - return re.Op == syntax.OpLiteral + return re.Op == syntax.OpLiteral && re.Flags&syntax.FoldCase == 0 } func getOrValues(expr string) []string { @@ -437,6 +440,9 @@ func getOrValuesExt(re *syntax.Regexp) []string { case syntax.OpCapture: return getOrValuesExt(re.Sub[0]) case syntax.OpLiteral: + if !isLiteral(re) { + return nil + } return []string{string(re.Rune)} case syntax.OpEmptyMatch: return []string{""} @@ -609,13 +615,13 @@ func extractRegexpPrefix(b []byte) ([]byte, []byte) { if re == emptyRegexp { return nil, nil } - if re.Op == syntax.OpLiteral && re.Flags&syntax.FoldCase == 0 { + if isLiteral(re) { return []byte(string(re.Rune)), nil } var prefix []byte if re.Op == syntax.OpConcat { sub0 := re.Sub[0] - if sub0.Op == syntax.OpLiteral && sub0.Flags&syntax.FoldCase == 0 { + if isLiteral(sub0) { prefix = []byte(string(sub0.Rune)) re.Sub = re.Sub[1:] if len(re.Sub) == 0 { diff --git a/lib/storage/tag_filters_test.go b/lib/storage/tag_filters_test.go index f8b1ab0d9..b1aeb02f5 100644 --- a/lib/storage/tag_filters_test.go +++ b/lib/storage/tag_filters_test.go @@ -53,11 +53,17 @@ func TestGetRegexpFromCache(t *testing.T) { f("((.*)foo(.*))", nil, []string{"foo", "xfoo", "foox", "xfoobar"}, []string{"", "bar", "foxx"}) f(".+foo", nil, []string{"afoo", "bbfoo"}, []string{"foo", "foobar", "afoox", ""}) f("a|b", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""}) + f("(a|b)", []string{"a", "b"}, []string{"a", "b"}, []string{"xa", "bx", "xab", ""}) f("foo.+", nil, []string{"foox", "foobar"}, []string{"foo", "afoox", "afoo", ""}) f(".*foo.*bar", nil, []string{"foobar", "xfoobar", "xfooxbar", "fooxbar"}, []string{"", "foobarx", "afoobarx", "aaa"}) f("foo.*bar", nil, []string{"foobar", "fooxbar"}, []string{"xfoobar", "", "foobarx", "aaa"}) f("foo.*bar.*", nil, []string{"foobar", "fooxbar", "foobarx", "fooxbarx"}, []string{"", "afoobarx", "aaa", "afoobar"}) + f("(?i)foo", nil, []string{"foo", "Foo", "FOO"}, []string{"xfoo", "foobar", "xFOObar"}) + f("(?i).+foo", nil, []string{"xfoo", "aaFoo", "bArFOO"}, []string{"foosdf", "xFOObar"}) + f("(?i)(foo|bar)", nil, []string{"foo", "Foo", "BAR", "bAR"}, []string{"foobar", "xfoo", "xFOObAR"}) + f("(?i)foo.*bar", nil, []string{"foobar", "FooBAR", "FOOxxbaR"}, []string{"xfoobar", "foobarx", "xFOObarx"}) + f(".*", nil, []string{"", "a", "foo", "foobar"}, nil) f("foo|.*", nil, []string{"", "a", "foo", "foobar"}, nil) f(".+", nil, []string{"a", "foo"}, []string{""}) @@ -323,6 +329,76 @@ func TestTagFilterMatchSuffix(t *testing.T) { mismatch("bar") match("xhttpbar") }) + t.Run("regexp-iflag-no-suffix", func(t *testing.T) { + value := "(?i)http" + isNegative := false + isRegexp := true + expectedPrefix := tvNoTrailingTagSeparator("") + init(value, isNegative, isRegexp, expectedPrefix) + + // Must match case-insenstive http + match("http") + match("HTTP") + match("hTTp") + + mismatch("") + mismatch("foobar") + mismatch("xhttp") + mismatch("xhttp://") + mismatch("hTTp://foobar.com") + }) + t.Run("negative-regexp-iflag-no-suffix", func(t *testing.T) { + value := "(?i)http" + isNegative := true + isRegexp := true + expectedPrefix := tvNoTrailingTagSeparator("") + init(value, isNegative, isRegexp, expectedPrefix) + + // Mustn't match case-insensitive http + mismatch("http") + mismatch("HTTP") + mismatch("hTTp") + + match("") + match("foobar") + match("xhttp") + match("xhttp://") + match("hTTp://foobar.com") + }) + t.Run("regexp-iflag-any-suffix", func(t *testing.T) { + value := "(?i)http.*" + isNegative := false + isRegexp := true + expectedPrefix := tvNoTrailingTagSeparator("") + init(value, isNegative, isRegexp, expectedPrefix) + + // Must match case-insenstive http + match("http") + match("HTTP") + match("hTTp://foobar.com") + + mismatch("") + mismatch("foobar") + mismatch("xhttp") + mismatch("xhttp://") + }) + t.Run("negative-regexp-iflag-any-suffix", func(t *testing.T) { + value := "(?i)http.*" + isNegative := true + isRegexp := true + expectedPrefix := tvNoTrailingTagSeparator("") + init(value, isNegative, isRegexp, expectedPrefix) + + // Mustn't match case-insensitive http + mismatch("http") + mismatch("HTTP") + mismatch("hTTp://foobar.com") + + match("") + match("foobar") + match("xhttp") + match("xhttp://") + }) t.Run("non-empty-string-regexp-negative-match", func(t *testing.T) { value := ".+" isNegative := true @@ -409,6 +485,8 @@ func TestGetOrValues(t *testing.T) { f("foo(?:bar|baz)x(qwe|rt)", []string{"foobarxqwe", "foobarxrt", "foobazxqwe", "foobazxrt"}) f("foo(bar||baz)", []string{"foo", "foobar", "foobaz"}) f("(a|b|c)(d|e|f)(g|h|k)", nil) + f("(?i)foo", nil) + f("(?i)(foo|bar)", nil) } func TestGetRegexpPrefix(t *testing.T) { @@ -463,6 +541,7 @@ func TestGetRegexpPrefix(t *testing.T) { f(t, "a(b|c.*).+", "a", "(?:b|c(?-s:.)*)(?-s:.)+") f(t, "ab|ac", "a", "[b-c]") f(t, "(?i)xyz", "", "(?i:XYZ)") + f(t, "(?i)foo|bar", "", "(?i:FOO)|(?i:BAR)") f(t, "(?i)up.+x", "", "(?i:UP)(?-s:.)+(?i:X)") f(t, "(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)")