From 8249f13104ab9c73377faaefb4751fd1475c0a32 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 3 Feb 2021 20:12:17 +0200 Subject: [PATCH] app/vmselect,lib/storage: properly parse Graphite selectors with inner wildcards Example: foo{bar{x,yz},a[b-c],*de} --- app/vmselect/graphite/metrics_api.go | 96 +++++++++++++---------- app/vmselect/graphite/metrics_api_test.go | 33 ++++---- lib/storage/storage.go | 51 ++++++++---- lib/storage/storage_test.go | 7 +- 4 files changed, 117 insertions(+), 70 deletions(-) diff --git a/app/vmselect/graphite/metrics_api.go b/app/vmselect/graphite/metrics_api.go index dcd016b40..916c7114d 100644 --- a/app/vmselect/graphite/metrics_api.go +++ b/app/vmselect/graphite/metrics_api.go @@ -13,6 +13,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" "github.com/VictoriaMetrics/metrics" ) @@ -350,7 +351,10 @@ func getRegexpForQuery(query string, delimiter byte) (*regexp.Regexp, error) { if re := regexpCache[k]; re != nil { return re.re, re.err } - rs := getRegexpStringForQuery(query, delimiter, false) + rs, tail := getRegexpStringForQuery(query, delimiter, false) + if len(tail) > 0 { + return nil, fmt.Errorf("unexpected tail left after parsing query %q; tail: %q", query, tail) + } re, err := regexp.Compile(rs) regexpCache[k] = ®expCacheEntry{ re: re, @@ -367,61 +371,73 @@ func getRegexpForQuery(query string, delimiter byte) (*regexp.Regexp, error) { return re, err } -func getRegexpStringForQuery(query string, delimiter byte, isSubquery bool) string { +func getRegexpStringForQuery(query string, delimiter byte, isSubquery bool) (string, string) { var a []string + var tail string quotedDelimiter := regexp.QuoteMeta(string([]byte{delimiter})) - tillNextDelimiter := "[^" + quotedDelimiter + "]*" - j := 0 - for i := 0; i < len(query); i++ { - switch query[i] { - case '*': - a = append(a, regexp.QuoteMeta(query[j:i])) - a = append(a, tillNextDelimiter) - j = i + 1 - case '{': + for { + n := strings.IndexAny(query, "*{[,}") + if n < 0 { + a = append(a, regexp.QuoteMeta(query)) + tail = "" + goto end + } + a = append(a, regexp.QuoteMeta(query[:n])) + query = query[n:] + switch query[0] { + case ',', '}': if isSubquery { - break + tail = query + goto end } - a = append(a, regexp.QuoteMeta(query[j:i])) - tmp := query[i+1:] - if n := strings.IndexByte(tmp, '}'); n < 0 { - rs := getRegexpStringForQuery(query[i:], delimiter, true) - a = append(a, rs) - i = len(query) - } else { - a = append(a, "(?:") - opts := strings.Split(tmp[:n], ",") - for j, opt := range opts { - opts[j] = getRegexpStringForQuery(opt, delimiter, true) + a = append(a, regexp.QuoteMeta(query[:1])) + query = query[1:] + case '*': + a = append(a, "[^"+quotedDelimiter+"]*") + query = query[1:] + case '{': + var opts []string + for { + var x string + x, tail = getRegexpStringForQuery(query[1:], delimiter, true) + opts = append(opts, x) + if len(tail) == 0 { + a = append(a, regexp.QuoteMeta("{")) + a = append(a, strings.Join(opts, ",")) + goto end } - a = append(a, strings.Join(opts, "|")) - a = append(a, ")") - i += n + 1 + if tail[0] == ',' { + query = tail + continue + } + if tail[0] == '}' { + a = append(a, "(?:"+strings.Join(opts, "|")+")") + query = tail[1:] + break + } + logger.Panicf("BUG: unexpected first char at tail %q; want `.` or `}`", tail) } - j = i + 1 case '[': - a = append(a, regexp.QuoteMeta(query[j:i])) - tmp := query[i:] - if n := strings.IndexByte(tmp, ']'); n < 0 { - a = append(a, regexp.QuoteMeta(query[i:])) - i = len(query) - } else { - a = append(a, tmp[:n+1]) - i += n + n := strings.IndexByte(query, ']') + if n < 0 { + a = append(a, regexp.QuoteMeta(query)) + tail = "" + goto end } - j = i + 1 + a = append(a, query[:n+1]) + query = query[n+1:] } } - a = append(a, regexp.QuoteMeta(query[j:])) +end: s := strings.Join(a, "") if isSubquery { - return s + return s, tail } if !strings.HasSuffix(s, quotedDelimiter) { s += quotedDelimiter + "?" } - s = "^(?:" + s + ")$" - return s + s = "^" + s + "$" + return s, tail } type regexpCacheEntry struct { diff --git a/app/vmselect/graphite/metrics_api_test.go b/app/vmselect/graphite/metrics_api_test.go index 393255085..20556f5c6 100644 --- a/app/vmselect/graphite/metrics_api_test.go +++ b/app/vmselect/graphite/metrics_api_test.go @@ -17,20 +17,25 @@ func TestGetRegexpForQuery(t *testing.T) { t.Fatalf("unexpected regexp for query=%q, delimiter=%c; got %s; want %s", query, delimiter, reStr, reExpected) } } - f("", '.', `^(?:\.?)$`) - f("foobar", '.', `^(?:foobar\.?)$`) - f("*", '.', `^(?:[^\.]*\.?)$`) - f("*", '_', `^(?:[^_]*_?)$`) - f("foo.*.bar", '.', `^(?:foo\.[^\.]*\.bar\.?)$`) - f("fo*b{ar,aaa}[a-z]xx*.d", '.', `^(?:fo[^\.]*b(?:ar|aaa)[a-z]xx[^\.]*\.d\.?)$`) - f("fo*b{ar,aaa}[a-z]xx*_d", '_', `^(?:fo[^_]*b(?:ar|aaa)[a-z]xx[^_]*_d_?)$`) - f("foo.[ab]*z", '.', `^(?:foo\.[ab][^\.]*z\.?)$`) - f("foo_[ab]*", '_', `^(?:foo_[ab][^_]*_?)$`) - f("foo_[ab]_", '_', `^(?:foo_[ab]_)$`) - f("foo.[ab].", '.', `^(?:foo\.[ab]\.)$`) - f("foo{b{ar*,ba*z[1-9]}", '.', `^(?:foo(?:b\{ar[^\.]*|ba[^\.]*z[1-9])\.?)$`) - f("{foo*}", '.', `^(?:(?:foo[^\.]*)\.?)$`) - f("{foo*,}", '.', `^(?:(?:foo[^\.]*|)\.?)$`) + f("", '.', `^\.?$`) + f("foobar", '.', `^foobar\.?$`) + f("*", '.', `^[^\.]*\.?$`) + f("*", '_', `^[^_]*_?$`) + f("foo.*.bar", '.', `^foo\.[^\.]*\.bar\.?$`) + f("fo*b{ar,aaa}[a-z]xx*.d", '.', `^fo[^\.]*b(?:ar|aaa)[a-z]xx[^\.]*\.d\.?$`) + f("fo*b{ar,aaa}[a-z]xx*_d", '_', `^fo[^_]*b(?:ar|aaa)[a-z]xx[^_]*_d_?$`) + f("foo.[ab]*z", '.', `^foo\.[ab][^\.]*z\.?$`) + f("foo_[ab]*", '_', `^foo_[ab][^_]*_?$`) + f("foo_[ab]_", '_', `^foo_[ab]_$`) + f("foo.[ab].", '.', `^foo\.[ab]\.$`) + f("foo{b{ar*,ba*z[1-9]}", '.', `^foo\{b(?:ar[^\.]*|ba[^\.]*z[1-9])\.?$`) + f("{foo*}", '.', `^(?:foo[^\.]*)\.?$`) + f("{foo*,}", '.', `^(?:foo[^\.]*|)\.?$`) + f("foo[bar", '.', `^foo\[bar\.?$`) + f("foo{bar", '.', `^foo\{bar\.?$`) + f("foo{ba,r", '.', `^foo\{ba,r\.?$`) + f("[a-z]", '.', `^[a-z]\.?$`) + f("{foo,x*,x{y,a*b}c}a", '.', `^(?:foo|x[^\.]*|x(?:y|a[^\.]*b)c)a\.?$`) } func TestSortPaths(t *testing.T) { diff --git a/lib/storage/storage.go b/lib/storage/storage.go index 35bbdb5a1..811b62cd9 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -1103,7 +1103,7 @@ func (s *Storage) SearchGraphitePaths(accountID, projectID uint32, tr TimeRange, qNode = qNode[:m+1] mustMatchLeafs = false } - re, err := getRegexpForGraphiteNodeQuery(qNode) + re, err := getRegexpForGraphiteQuery(qNode) if err != nil { return nil, err } @@ -1130,40 +1130,61 @@ func (s *Storage) SearchGraphitePaths(accountID, projectID uint32, tr TimeRange, return paths, nil } -func getRegexpForGraphiteNodeQuery(q string) (*regexp.Regexp, error) { - parts := getRegexpPartsForGraphiteNodeQuery(q) +func getRegexpForGraphiteQuery(q string) (*regexp.Regexp, error) { + parts, tail := getRegexpPartsForGraphiteQuery(q) + if len(tail) > 0 { + return nil, fmt.Errorf("unexpected tail left after parsing %q: %q", q, tail) + } reStr := "^" + strings.Join(parts, "") + "$" return regexp.Compile(reStr) } -func getRegexpPartsForGraphiteNodeQuery(q string) []string { +func getRegexpPartsForGraphiteQuery(q string) ([]string, string) { var parts []string for { - n := strings.IndexAny(q, "*{[") + n := strings.IndexAny(q, "*{}[,") if n < 0 { - return append(parts, regexp.QuoteMeta(q)) + parts = append(parts, regexp.QuoteMeta(q)) + return parts, "" } parts = append(parts, regexp.QuoteMeta(q[:n])) q = q[n:] switch q[0] { + case ',', '}': + return parts, q case '*': parts = append(parts, "[^.]*") q = q[1:] case '{': - n := strings.IndexByte(q, '}') - if n < 0 { - return append(parts, regexp.QuoteMeta(q)) - } var tmp []string - for _, x := range strings.Split(q[1:n], ",") { - tmp = append(tmp, strings.Join(getRegexpPartsForGraphiteNodeQuery(x), "")) + for { + a, tail := getRegexpPartsForGraphiteQuery(q[1:]) + tmp = append(tmp, strings.Join(a, "")) + if len(tail) == 0 { + parts = append(parts, regexp.QuoteMeta("{")) + parts = append(parts, strings.Join(tmp, ",")) + return parts, "" + } + if tail[0] == ',' { + q = tail + continue + } + if tail[0] == '}' { + if len(tmp) == 1 { + parts = append(parts, tmp[0]) + } else { + parts = append(parts, "(?:"+strings.Join(tmp, "|")+")") + } + q = tail[1:] + break + } + logger.Panicf("BUG: unexpected first char at tail %q; want `.` or `}`", tail) } - parts = append(parts, "(?:"+strings.Join(tmp, "|")+")") - q = q[n+1:] case '[': n := strings.IndexByte(q, ']') if n < 0 { - return append(parts, regexp.QuoteMeta(q)) + parts = append(parts, regexp.QuoteMeta(q)) + return parts, "" } parts = append(parts, q[:n+1]) q = q[n+1:] diff --git a/lib/storage/storage_test.go b/lib/storage/storage_test.go index e412e93d4..a742494f2 100644 --- a/lib/storage/storage_test.go +++ b/lib/storage/storage_test.go @@ -17,7 +17,7 @@ import ( func TestGetRegexpForGraphiteNodeQuery(t *testing.T) { f := func(q, expectedRegexp string) { t.Helper() - re, err := getRegexpForGraphiteNodeQuery(q) + re, err := getRegexpForGraphiteQuery(q) if err != nil { t.Fatalf("unexpected error for query=%q: %s", q, err) } @@ -34,6 +34,11 @@ func TestGetRegexpForGraphiteNodeQuery(t *testing.T) { f(`[-a-zx.]`, `^[-a-zx.]$`) f(`**`, `^[^.]*[^.]*$`) f(`a*[de]{x,y}z`, `^a[^.]*[de](?:x|y)z$`) + f(`foo{bar`, `^foo\{bar$`) + f(`foo{ba,r`, `^foo\{ba,r$`) + f(`foo[bar`, `^foo\[bar$`) + f(`foo{bar}`, `^foobar$`) + f(`foo{bar,,b{{a,b*},z},[x-y]*z}a`, `^foo(?:bar||b(?:(?:a|b[^.]*)|z)|[x-y][^.]*z)a$`) } func TestDateMetricIDCacheSerial(t *testing.T) {