app/vmselect,lib/storage: properly parse Graphite selectors with inner wildcards

Example: foo{bar{x,yz},a[b-c],*de}
This commit is contained in:
Aliaksandr Valialkin 2021-02-03 20:12:17 +02:00
parent a5a1b9bd66
commit d16f22f3a1
4 changed files with 117 additions and 70 deletions

View file

@ -12,6 +12,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/bufferedwriter"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
)
@ -338,7 +339,10 @@ func getRegexpForQuery(query string, delimiter byte) (*regexp.Regexp, error) {
if re := regexpCache[k]; re != nil {
return re.re, re.err
}
rs := getRegexpStringForQuery(query, delimiter, false)
rs, tail := getRegexpStringForQuery(query, delimiter, false)
if len(tail) > 0 {
return nil, fmt.Errorf("unexpected tail left after parsing query %q; tail: %q", query, tail)
}
re, err := regexp.Compile(rs)
regexpCache[k] = &regexpCacheEntry{
re: re,
@ -355,61 +359,73 @@ func getRegexpForQuery(query string, delimiter byte) (*regexp.Regexp, error) {
return re, err
}
func getRegexpStringForQuery(query string, delimiter byte, isSubquery bool) string {
func getRegexpStringForQuery(query string, delimiter byte, isSubquery bool) (string, string) {
var a []string
var tail string
quotedDelimiter := regexp.QuoteMeta(string([]byte{delimiter}))
tillNextDelimiter := "[^" + quotedDelimiter + "]*"
j := 0
for i := 0; i < len(query); i++ {
switch query[i] {
case '*':
a = append(a, regexp.QuoteMeta(query[j:i]))
a = append(a, tillNextDelimiter)
j = i + 1
case '{':
for {
n := strings.IndexAny(query, "*{[,}")
if n < 0 {
a = append(a, regexp.QuoteMeta(query))
tail = ""
goto end
}
a = append(a, regexp.QuoteMeta(query[:n]))
query = query[n:]
switch query[0] {
case ',', '}':
if isSubquery {
break
tail = query
goto end
}
a = append(a, regexp.QuoteMeta(query[j:i]))
tmp := query[i+1:]
if n := strings.IndexByte(tmp, '}'); n < 0 {
rs := getRegexpStringForQuery(query[i:], delimiter, true)
a = append(a, rs)
i = len(query)
} else {
a = append(a, "(?:")
opts := strings.Split(tmp[:n], ",")
for j, opt := range opts {
opts[j] = getRegexpStringForQuery(opt, delimiter, true)
a = append(a, regexp.QuoteMeta(query[:1]))
query = query[1:]
case '*':
a = append(a, "[^"+quotedDelimiter+"]*")
query = query[1:]
case '{':
var opts []string
for {
var x string
x, tail = getRegexpStringForQuery(query[1:], delimiter, true)
opts = append(opts, x)
if len(tail) == 0 {
a = append(a, regexp.QuoteMeta("{"))
a = append(a, strings.Join(opts, ","))
goto end
}
a = append(a, strings.Join(opts, "|"))
a = append(a, ")")
i += n + 1
if tail[0] == ',' {
query = tail
continue
}
if tail[0] == '}' {
a = append(a, "(?:"+strings.Join(opts, "|")+")")
query = tail[1:]
break
}
logger.Panicf("BUG: unexpected first char at tail %q; want `.` or `}`", tail)
}
j = i + 1
case '[':
a = append(a, regexp.QuoteMeta(query[j:i]))
tmp := query[i:]
if n := strings.IndexByte(tmp, ']'); n < 0 {
a = append(a, regexp.QuoteMeta(query[i:]))
i = len(query)
} else {
a = append(a, tmp[:n+1])
i += n
n := strings.IndexByte(query, ']')
if n < 0 {
a = append(a, regexp.QuoteMeta(query))
tail = ""
goto end
}
j = i + 1
a = append(a, query[:n+1])
query = query[n+1:]
}
}
a = append(a, regexp.QuoteMeta(query[j:]))
end:
s := strings.Join(a, "")
if isSubquery {
return s
return s, tail
}
if !strings.HasSuffix(s, quotedDelimiter) {
s += quotedDelimiter + "?"
}
s = "^(?:" + s + ")$"
return s
s = "^" + s + "$"
return s, tail
}
type regexpCacheEntry struct {

View file

@ -17,20 +17,25 @@ func TestGetRegexpForQuery(t *testing.T) {
t.Fatalf("unexpected regexp for query=%q, delimiter=%c; got %s; want %s", query, delimiter, reStr, reExpected)
}
}
f("", '.', `^(?:\.?)$`)
f("foobar", '.', `^(?:foobar\.?)$`)
f("*", '.', `^(?:[^\.]*\.?)$`)
f("*", '_', `^(?:[^_]*_?)$`)
f("foo.*.bar", '.', `^(?:foo\.[^\.]*\.bar\.?)$`)
f("fo*b{ar,aaa}[a-z]xx*.d", '.', `^(?:fo[^\.]*b(?:ar|aaa)[a-z]xx[^\.]*\.d\.?)$`)
f("fo*b{ar,aaa}[a-z]xx*_d", '_', `^(?:fo[^_]*b(?:ar|aaa)[a-z]xx[^_]*_d_?)$`)
f("foo.[ab]*z", '.', `^(?:foo\.[ab][^\.]*z\.?)$`)
f("foo_[ab]*", '_', `^(?:foo_[ab][^_]*_?)$`)
f("foo_[ab]_", '_', `^(?:foo_[ab]_)$`)
f("foo.[ab].", '.', `^(?:foo\.[ab]\.)$`)
f("foo{b{ar*,ba*z[1-9]}", '.', `^(?:foo(?:b\{ar[^\.]*|ba[^\.]*z[1-9])\.?)$`)
f("{foo*}", '.', `^(?:(?:foo[^\.]*)\.?)$`)
f("{foo*,}", '.', `^(?:(?:foo[^\.]*|)\.?)$`)
f("", '.', `^\.?$`)
f("foobar", '.', `^foobar\.?$`)
f("*", '.', `^[^\.]*\.?$`)
f("*", '_', `^[^_]*_?$`)
f("foo.*.bar", '.', `^foo\.[^\.]*\.bar\.?$`)
f("fo*b{ar,aaa}[a-z]xx*.d", '.', `^fo[^\.]*b(?:ar|aaa)[a-z]xx[^\.]*\.d\.?$`)
f("fo*b{ar,aaa}[a-z]xx*_d", '_', `^fo[^_]*b(?:ar|aaa)[a-z]xx[^_]*_d_?$`)
f("foo.[ab]*z", '.', `^foo\.[ab][^\.]*z\.?$`)
f("foo_[ab]*", '_', `^foo_[ab][^_]*_?$`)
f("foo_[ab]_", '_', `^foo_[ab]_$`)
f("foo.[ab].", '.', `^foo\.[ab]\.$`)
f("foo{b{ar*,ba*z[1-9]}", '.', `^foo\{b(?:ar[^\.]*|ba[^\.]*z[1-9])\.?$`)
f("{foo*}", '.', `^(?:foo[^\.]*)\.?$`)
f("{foo*,}", '.', `^(?:foo[^\.]*|)\.?$`)
f("foo[bar", '.', `^foo\[bar\.?$`)
f("foo{bar", '.', `^foo\{bar\.?$`)
f("foo{ba,r", '.', `^foo\{ba,r\.?$`)
f("[a-z]", '.', `^[a-z]\.?$`)
f("{foo,x*,x{y,a*b}c}a", '.', `^(?:foo|x[^\.]*|x(?:y|a[^\.]*b)c)a\.?$`)
}
func TestSortPaths(t *testing.T) {

View file

@ -1027,7 +1027,7 @@ func (s *Storage) SearchGraphitePaths(tr TimeRange, query []byte, maxPaths int,
qNode = qNode[:m+1]
mustMatchLeafs = false
}
re, err := getRegexpForGraphiteNodeQuery(qNode)
re, err := getRegexpForGraphiteQuery(qNode)
if err != nil {
return nil, err
}
@ -1054,40 +1054,61 @@ func (s *Storage) SearchGraphitePaths(tr TimeRange, query []byte, maxPaths int,
return paths, nil
}
func getRegexpForGraphiteNodeQuery(q string) (*regexp.Regexp, error) {
parts := getRegexpPartsForGraphiteNodeQuery(q)
func getRegexpForGraphiteQuery(q string) (*regexp.Regexp, error) {
parts, tail := getRegexpPartsForGraphiteQuery(q)
if len(tail) > 0 {
return nil, fmt.Errorf("unexpected tail left after parsing %q: %q", q, tail)
}
reStr := "^" + strings.Join(parts, "") + "$"
return regexp.Compile(reStr)
}
func getRegexpPartsForGraphiteNodeQuery(q string) []string {
func getRegexpPartsForGraphiteQuery(q string) ([]string, string) {
var parts []string
for {
n := strings.IndexAny(q, "*{[")
n := strings.IndexAny(q, "*{}[,")
if n < 0 {
return append(parts, regexp.QuoteMeta(q))
parts = append(parts, regexp.QuoteMeta(q))
return parts, ""
}
parts = append(parts, regexp.QuoteMeta(q[:n]))
q = q[n:]
switch q[0] {
case ',', '}':
return parts, q
case '*':
parts = append(parts, "[^.]*")
q = q[1:]
case '{':
n := strings.IndexByte(q, '}')
if n < 0 {
return append(parts, regexp.QuoteMeta(q))
}
var tmp []string
for _, x := range strings.Split(q[1:n], ",") {
tmp = append(tmp, strings.Join(getRegexpPartsForGraphiteNodeQuery(x), ""))
for {
a, tail := getRegexpPartsForGraphiteQuery(q[1:])
tmp = append(tmp, strings.Join(a, ""))
if len(tail) == 0 {
parts = append(parts, regexp.QuoteMeta("{"))
parts = append(parts, strings.Join(tmp, ","))
return parts, ""
}
if tail[0] == ',' {
q = tail
continue
}
if tail[0] == '}' {
if len(tmp) == 1 {
parts = append(parts, tmp[0])
} else {
parts = append(parts, "(?:"+strings.Join(tmp, "|")+")")
}
q = tail[1:]
break
}
logger.Panicf("BUG: unexpected first char at tail %q; want `.` or `}`", tail)
}
parts = append(parts, "(?:"+strings.Join(tmp, "|")+")")
q = q[n+1:]
case '[':
n := strings.IndexByte(q, ']')
if n < 0 {
return append(parts, regexp.QuoteMeta(q))
parts = append(parts, regexp.QuoteMeta(q))
return parts, ""
}
parts = append(parts, q[:n+1])
q = q[n+1:]

View file

@ -17,7 +17,7 @@ import (
func TestGetRegexpForGraphiteNodeQuery(t *testing.T) {
f := func(q, expectedRegexp string) {
t.Helper()
re, err := getRegexpForGraphiteNodeQuery(q)
re, err := getRegexpForGraphiteQuery(q)
if err != nil {
t.Fatalf("unexpected error for query=%q: %s", q, err)
}
@ -34,6 +34,11 @@ func TestGetRegexpForGraphiteNodeQuery(t *testing.T) {
f(`[-a-zx.]`, `^[-a-zx.]$`)
f(`**`, `^[^.]*[^.]*$`)
f(`a*[de]{x,y}z`, `^a[^.]*[de](?:x|y)z$`)
f(`foo{bar`, `^foo\{bar$`)
f(`foo{ba,r`, `^foo\{ba,r$`)
f(`foo[bar`, `^foo\[bar$`)
f(`foo{bar}`, `^foobar$`)
f(`foo{bar,,b{{a,b*},z},[x-y]*z}a`, `^foo(?:bar||b(?:(?:a|b[^.]*)|z)|[x-y][^.]*z)a$`)
}
func TestDateMetricIDCacheSerial(t *testing.T) {