lib/storage: properly handle (?i) in the tag filter regexp

Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/161
This commit is contained in:
Aliaksandr Valialkin 2019-08-26 00:41:57 +03:00
parent e2eac858b5
commit 82e813bad3
2 changed files with 85 additions and 61 deletions

View file

@ -609,15 +609,18 @@ func extractRegexpPrefix(b []byte) ([]byte, []byte) {
if re == emptyRegexp { if re == emptyRegexp {
return nil, nil return nil, nil
} }
if re.Op == syntax.OpLiteral { if re.Op == syntax.OpLiteral && re.Flags & syntax.FoldCase == 0 {
return []byte(string(re.Rune)), nil return []byte(string(re.Rune)), nil
} }
var prefix []byte var prefix []byte
if re.Op == syntax.OpConcat && re.Sub[0].Op == syntax.OpLiteral { if re.Op == syntax.OpConcat {
prefix = []byte(string(re.Sub[0].Rune)) sub0 := re.Sub[0]
re.Sub = re.Sub[1:] if sub0.Op == syntax.OpLiteral && sub0.Flags & syntax.FoldCase == 0 {
if len(re.Sub) == 0 { prefix = []byte(string(sub0.Rune))
return nil, nil re.Sub = re.Sub[1:]
if len(re.Sub) == 0 {
return nil, nil
}
} }
} }
if _, err := syntax.Compile(re); err != nil { if _, err := syntax.Compile(re); err != nil {

View file

@ -5,6 +5,21 @@ import (
"testing" "testing"
) )
func TestExtractRegexpPrefix(t *testing.T) {
f := func(s string, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := extractRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for %q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for %q; got %q; want %q", s, suffix, expectedSuffix)
}
}
f("", "", "")
f("foobar", "foobar", "")
}
func TestGetRegexpFromCache(t *testing.T) { func TestGetRegexpFromCache(t *testing.T) {
f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string) { f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string) {
t.Helper() t.Helper()
@ -397,67 +412,73 @@ func TestGetOrValues(t *testing.T) {
} }
func TestGetRegexpPrefix(t *testing.T) { func TestGetRegexpPrefix(t *testing.T) {
testGetRegexpPrefix(t, "", "", "") f := func(t *testing.T, s, expectedPrefix, expectedSuffix string) {
testGetRegexpPrefix(t, "^", "", "") t.Helper()
testGetRegexpPrefix(t, "$", "", "")
testGetRegexpPrefix(t, "^()$", "", "") prefix, suffix := getRegexpPrefix([]byte(s))
testGetRegexpPrefix(t, "^(?:)$", "", "") if string(prefix) != expectedPrefix {
testGetRegexpPrefix(t, "foobar", "foobar", "") t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
testGetRegexpPrefix(t, "foo$|^foobar", "foo", "(?:(?:)|bar)") }
testGetRegexpPrefix(t, "^(foo$|^foobar)$", "foo", "(?:(?:)|bar)") if string(suffix) != expectedSuffix {
testGetRegexpPrefix(t, "foobar|foobaz", "fooba", "[rz]") t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
testGetRegexpPrefix(t, "(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x") }
testGetRegexpPrefix(t, "(тестЧЧ|тест)", "тест", "(?:ЧЧ|(?:))")
testGetRegexpPrefix(t, "foo(bar|baz|bana)", "fooba", "(?:[rz]|na)") // Get the prefix from cache.
testGetRegexpPrefix(t, "^foobar|foobaz", "fooba", "[rz]") prefix, suffix = getRegexpPrefix([]byte(s))
testGetRegexpPrefix(t, "^foobar|^foobaz$", "fooba", "[rz]") if string(prefix) != expectedPrefix {
testGetRegexpPrefix(t, "foobar|foobaz", "fooba", "[rz]") t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
testGetRegexpPrefix(t, "(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa(?-s:.)*") }
testGetRegexpPrefix(t, "foo[bar]+", "foo", "[a-br]+") if string(suffix) != expectedSuffix {
testGetRegexpPrefix(t, "foo[a-z]+", "foo", "[a-z]+") t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
testGetRegexpPrefix(t, "foo[bar]*", "foo", "[a-br]*") }
testGetRegexpPrefix(t, "foo[a-z]*", "foo", "[a-z]*") }
testGetRegexpPrefix(t, "foo[x]+", "foo", "x+")
testGetRegexpPrefix(t, "foo[^x]+", "foo", "[^x]+") f(t, "", "", "")
testGetRegexpPrefix(t, "foo[x]*", "foo", "x*") f(t, "^", "", "")
testGetRegexpPrefix(t, "foo[^x]*", "foo", "[^x]*") f(t, "$", "", "")
testGetRegexpPrefix(t, "foo[x]*bar", "foo", "x*bar") f(t, "^()$", "", "")
testGetRegexpPrefix(t, "fo\\Bo[x]*bar?", "fo", "\\Box*bar?") f(t, "^(?:)$", "", "")
f(t, "foobar", "foobar", "")
f(t, "foo$|^foobar", "foo", "(?:(?:)|bar)")
f(t, "^(foo$|^foobar)$", "foo", "(?:(?:)|bar)")
f(t, "foobar|foobaz", "fooba", "[rz]")
f(t, "(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x")
f(t, "(тестЧЧ|тест)", "тест", "(?:ЧЧ|(?:))")
f(t, "foo(bar|baz|bana)", "fooba", "(?:[rz]|na)")
f(t, "^foobar|foobaz", "fooba", "[rz]")
f(t, "^foobar|^foobaz$", "fooba", "[rz]")
f(t, "foobar|foobaz", "fooba", "[rz]")
f(t, "(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa(?-s:.)*")
f(t, "foo[bar]+", "foo", "[a-br]+")
f(t, "foo[a-z]+", "foo", "[a-z]+")
f(t, "foo[bar]*", "foo", "[a-br]*")
f(t, "foo[a-z]*", "foo", "[a-z]*")
f(t, "foo[x]+", "foo", "x+")
f(t, "foo[^x]+", "foo", "[^x]+")
f(t, "foo[x]*", "foo", "x*")
f(t, "foo[^x]*", "foo", "[^x]*")
f(t, "foo[x]*bar", "foo", "x*bar")
f(t, "fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
f(t, "foo.+bar", "foo", "(?-s:.)+bar")
f(t, "a(b|c.*).+", "a", "(?:b|c(?-s:.)*)(?-s:.)+")
f(t, "ab|ac", "a", "[b-c]")
f(t, "(?i)xyz", "", "(?i:XYZ)")
f(t, "(?i)up.+x", "", "(?i:UP)(?-s:.)+(?i:X)")
f(t, "(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)")
// test invalid regexps // test invalid regexps
testGetRegexpPrefix(t, "a(", "a(", "") f(t, "a(", "a(", "")
testGetRegexpPrefix(t, "a[", "a[", "") f(t, "a[", "a[", "")
testGetRegexpPrefix(t, "a[]", "a[]", "") f(t, "a[]", "a[]", "")
testGetRegexpPrefix(t, "a{", "a{", "") f(t, "a{", "a{", "")
testGetRegexpPrefix(t, "a{}", "a{}", "") f(t, "a{}", "a{}", "")
testGetRegexpPrefix(t, "invalid(regexp", "invalid(regexp", "") f(t, "invalid(regexp", "invalid(regexp", "")
// The transformed regexp mustn't match aba // The transformed regexp mustn't match aba
testGetRegexpPrefix(t, "a?(^ba|c)", "", "a?(?:\\Aba|c)") f(t, "a?(^ba|c)", "", "a?(?:\\Aba|c)")
// The transformed regexp mustn't match barx // The transformed regexp mustn't match barx
testGetRegexpPrefix(t, "(foo|bar$)x*", "", "(?:foo|bar(?-m:$))x*") f(t, "(foo|bar$)x*", "", "(?:foo|bar(?-m:$))x*")
}
func testGetRegexpPrefix(t *testing.T, s, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := getRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
}
// Get the prefix from cache.
prefix, suffix = getRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
}
} }
func TestTagFiltersAddEmpty(t *testing.T) { func TestTagFiltersAddEmpty(t *testing.T) {