app/vmselect,lib/storage: properly parse Graphite selectors with inner wildcards

Example: foo{bar{x,yz},a[b-c],*de}
This commit is contained in:
Aliaksandr Valialkin 2021-02-03 20:12:17 +02:00
parent 2976ec89b8
commit 8249f13104
4 changed files with 117 additions and 70 deletions

View file

@ -13,6 +13,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
) )
@ -350,7 +351,10 @@ func getRegexpForQuery(query string, delimiter byte) (*regexp.Regexp, error) {
if re := regexpCache[k]; re != nil { if re := regexpCache[k]; re != nil {
return re.re, re.err return re.re, re.err
} }
rs := getRegexpStringForQuery(query, delimiter, false) rs, tail := getRegexpStringForQuery(query, delimiter, false)
if len(tail) > 0 {
return nil, fmt.Errorf("unexpected tail left after parsing query %q; tail: %q", query, tail)
}
re, err := regexp.Compile(rs) re, err := regexp.Compile(rs)
regexpCache[k] = &regexpCacheEntry{ regexpCache[k] = &regexpCacheEntry{
re: re, re: re,
@ -367,61 +371,73 @@ func getRegexpForQuery(query string, delimiter byte) (*regexp.Regexp, error) {
return re, err return re, err
} }
func getRegexpStringForQuery(query string, delimiter byte, isSubquery bool) string { func getRegexpStringForQuery(query string, delimiter byte, isSubquery bool) (string, string) {
var a []string var a []string
var tail string
quotedDelimiter := regexp.QuoteMeta(string([]byte{delimiter})) quotedDelimiter := regexp.QuoteMeta(string([]byte{delimiter}))
tillNextDelimiter := "[^" + quotedDelimiter + "]*" for {
j := 0 n := strings.IndexAny(query, "*{[,}")
for i := 0; i < len(query); i++ { if n < 0 {
switch query[i] { a = append(a, regexp.QuoteMeta(query))
case '*': tail = ""
a = append(a, regexp.QuoteMeta(query[j:i])) goto end
a = append(a, tillNextDelimiter) }
j = i + 1 a = append(a, regexp.QuoteMeta(query[:n]))
case '{': query = query[n:]
switch query[0] {
case ',', '}':
if isSubquery { if isSubquery {
break tail = query
goto end
} }
a = append(a, regexp.QuoteMeta(query[j:i])) a = append(a, regexp.QuoteMeta(query[:1]))
tmp := query[i+1:] query = query[1:]
if n := strings.IndexByte(tmp, '}'); n < 0 { case '*':
rs := getRegexpStringForQuery(query[i:], delimiter, true) a = append(a, "[^"+quotedDelimiter+"]*")
a = append(a, rs) query = query[1:]
i = len(query) case '{':
} else { var opts []string
a = append(a, "(?:") for {
opts := strings.Split(tmp[:n], ",") var x string
for j, opt := range opts { x, tail = getRegexpStringForQuery(query[1:], delimiter, true)
opts[j] = getRegexpStringForQuery(opt, delimiter, true) opts = append(opts, x)
if len(tail) == 0 {
a = append(a, regexp.QuoteMeta("{"))
a = append(a, strings.Join(opts, ","))
goto end
} }
a = append(a, strings.Join(opts, "|")) if tail[0] == ',' {
a = append(a, ")") query = tail
i += n + 1 continue
}
if tail[0] == '}' {
a = append(a, "(?:"+strings.Join(opts, "|")+")")
query = tail[1:]
break
}
logger.Panicf("BUG: unexpected first char at tail %q; want `.` or `}`", tail)
} }
j = i + 1
case '[': case '[':
a = append(a, regexp.QuoteMeta(query[j:i])) n := strings.IndexByte(query, ']')
tmp := query[i:] if n < 0 {
if n := strings.IndexByte(tmp, ']'); n < 0 { a = append(a, regexp.QuoteMeta(query))
a = append(a, regexp.QuoteMeta(query[i:])) tail = ""
i = len(query) goto end
} else {
a = append(a, tmp[:n+1])
i += n
} }
j = i + 1 a = append(a, query[:n+1])
query = query[n+1:]
} }
} }
a = append(a, regexp.QuoteMeta(query[j:])) end:
s := strings.Join(a, "") s := strings.Join(a, "")
if isSubquery { if isSubquery {
return s return s, tail
} }
if !strings.HasSuffix(s, quotedDelimiter) { if !strings.HasSuffix(s, quotedDelimiter) {
s += quotedDelimiter + "?" s += quotedDelimiter + "?"
} }
s = "^(?:" + s + ")$" s = "^" + s + "$"
return s return s, tail
} }
type regexpCacheEntry struct { type regexpCacheEntry struct {

View file

@ -17,20 +17,25 @@ func TestGetRegexpForQuery(t *testing.T) {
t.Fatalf("unexpected regexp for query=%q, delimiter=%c; got %s; want %s", query, delimiter, reStr, reExpected) t.Fatalf("unexpected regexp for query=%q, delimiter=%c; got %s; want %s", query, delimiter, reStr, reExpected)
} }
} }
f("", '.', `^(?:\.?)$`) f("", '.', `^\.?$`)
f("foobar", '.', `^(?:foobar\.?)$`) f("foobar", '.', `^foobar\.?$`)
f("*", '.', `^(?:[^\.]*\.?)$`) f("*", '.', `^[^\.]*\.?$`)
f("*", '_', `^(?:[^_]*_?)$`) f("*", '_', `^[^_]*_?$`)
f("foo.*.bar", '.', `^(?:foo\.[^\.]*\.bar\.?)$`) f("foo.*.bar", '.', `^foo\.[^\.]*\.bar\.?$`)
f("fo*b{ar,aaa}[a-z]xx*.d", '.', `^(?:fo[^\.]*b(?:ar|aaa)[a-z]xx[^\.]*\.d\.?)$`) f("fo*b{ar,aaa}[a-z]xx*.d", '.', `^fo[^\.]*b(?:ar|aaa)[a-z]xx[^\.]*\.d\.?$`)
f("fo*b{ar,aaa}[a-z]xx*_d", '_', `^(?:fo[^_]*b(?:ar|aaa)[a-z]xx[^_]*_d_?)$`) f("fo*b{ar,aaa}[a-z]xx*_d", '_', `^fo[^_]*b(?:ar|aaa)[a-z]xx[^_]*_d_?$`)
f("foo.[ab]*z", '.', `^(?:foo\.[ab][^\.]*z\.?)$`) f("foo.[ab]*z", '.', `^foo\.[ab][^\.]*z\.?$`)
f("foo_[ab]*", '_', `^(?:foo_[ab][^_]*_?)$`) f("foo_[ab]*", '_', `^foo_[ab][^_]*_?$`)
f("foo_[ab]_", '_', `^(?:foo_[ab]_)$`) f("foo_[ab]_", '_', `^foo_[ab]_$`)
f("foo.[ab].", '.', `^(?:foo\.[ab]\.)$`) f("foo.[ab].", '.', `^foo\.[ab]\.$`)
f("foo{b{ar*,ba*z[1-9]}", '.', `^(?:foo(?:b\{ar[^\.]*|ba[^\.]*z[1-9])\.?)$`) f("foo{b{ar*,ba*z[1-9]}", '.', `^foo\{b(?:ar[^\.]*|ba[^\.]*z[1-9])\.?$`)
f("{foo*}", '.', `^(?:(?:foo[^\.]*)\.?)$`) f("{foo*}", '.', `^(?:foo[^\.]*)\.?$`)
f("{foo*,}", '.', `^(?:(?:foo[^\.]*|)\.?)$`) f("{foo*,}", '.', `^(?:foo[^\.]*|)\.?$`)
f("foo[bar", '.', `^foo\[bar\.?$`)
f("foo{bar", '.', `^foo\{bar\.?$`)
f("foo{ba,r", '.', `^foo\{ba,r\.?$`)
f("[a-z]", '.', `^[a-z]\.?$`)
f("{foo,x*,x{y,a*b}c}a", '.', `^(?:foo|x[^\.]*|x(?:y|a[^\.]*b)c)a\.?$`)
} }
func TestSortPaths(t *testing.T) { func TestSortPaths(t *testing.T) {

View file

@ -1103,7 +1103,7 @@ func (s *Storage) SearchGraphitePaths(accountID, projectID uint32, tr TimeRange,
qNode = qNode[:m+1] qNode = qNode[:m+1]
mustMatchLeafs = false mustMatchLeafs = false
} }
re, err := getRegexpForGraphiteNodeQuery(qNode) re, err := getRegexpForGraphiteQuery(qNode)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1130,40 +1130,61 @@ func (s *Storage) SearchGraphitePaths(accountID, projectID uint32, tr TimeRange,
return paths, nil return paths, nil
} }
func getRegexpForGraphiteNodeQuery(q string) (*regexp.Regexp, error) { func getRegexpForGraphiteQuery(q string) (*regexp.Regexp, error) {
parts := getRegexpPartsForGraphiteNodeQuery(q) parts, tail := getRegexpPartsForGraphiteQuery(q)
if len(tail) > 0 {
return nil, fmt.Errorf("unexpected tail left after parsing %q: %q", q, tail)
}
reStr := "^" + strings.Join(parts, "") + "$" reStr := "^" + strings.Join(parts, "") + "$"
return regexp.Compile(reStr) return regexp.Compile(reStr)
} }
func getRegexpPartsForGraphiteNodeQuery(q string) []string { func getRegexpPartsForGraphiteQuery(q string) ([]string, string) {
var parts []string var parts []string
for { for {
n := strings.IndexAny(q, "*{[") n := strings.IndexAny(q, "*{}[,")
if n < 0 { if n < 0 {
return append(parts, regexp.QuoteMeta(q)) parts = append(parts, regexp.QuoteMeta(q))
return parts, ""
} }
parts = append(parts, regexp.QuoteMeta(q[:n])) parts = append(parts, regexp.QuoteMeta(q[:n]))
q = q[n:] q = q[n:]
switch q[0] { switch q[0] {
case ',', '}':
return parts, q
case '*': case '*':
parts = append(parts, "[^.]*") parts = append(parts, "[^.]*")
q = q[1:] q = q[1:]
case '{': case '{':
n := strings.IndexByte(q, '}')
if n < 0 {
return append(parts, regexp.QuoteMeta(q))
}
var tmp []string var tmp []string
for _, x := range strings.Split(q[1:n], ",") { for {
tmp = append(tmp, strings.Join(getRegexpPartsForGraphiteNodeQuery(x), "")) a, tail := getRegexpPartsForGraphiteQuery(q[1:])
tmp = append(tmp, strings.Join(a, ""))
if len(tail) == 0 {
parts = append(parts, regexp.QuoteMeta("{"))
parts = append(parts, strings.Join(tmp, ","))
return parts, ""
}
if tail[0] == ',' {
q = tail
continue
}
if tail[0] == '}' {
if len(tmp) == 1 {
parts = append(parts, tmp[0])
} else {
parts = append(parts, "(?:"+strings.Join(tmp, "|")+")")
}
q = tail[1:]
break
}
logger.Panicf("BUG: unexpected first char at tail %q; want `.` or `}`", tail)
} }
parts = append(parts, "(?:"+strings.Join(tmp, "|")+")")
q = q[n+1:]
case '[': case '[':
n := strings.IndexByte(q, ']') n := strings.IndexByte(q, ']')
if n < 0 { if n < 0 {
return append(parts, regexp.QuoteMeta(q)) parts = append(parts, regexp.QuoteMeta(q))
return parts, ""
} }
parts = append(parts, q[:n+1]) parts = append(parts, q[:n+1])
q = q[n+1:] q = q[n+1:]

View file

@ -17,7 +17,7 @@ import (
func TestGetRegexpForGraphiteNodeQuery(t *testing.T) { func TestGetRegexpForGraphiteNodeQuery(t *testing.T) {
f := func(q, expectedRegexp string) { f := func(q, expectedRegexp string) {
t.Helper() t.Helper()
re, err := getRegexpForGraphiteNodeQuery(q) re, err := getRegexpForGraphiteQuery(q)
if err != nil { if err != nil {
t.Fatalf("unexpected error for query=%q: %s", q, err) t.Fatalf("unexpected error for query=%q: %s", q, err)
} }
@ -34,6 +34,11 @@ func TestGetRegexpForGraphiteNodeQuery(t *testing.T) {
f(`[-a-zx.]`, `^[-a-zx.]$`) f(`[-a-zx.]`, `^[-a-zx.]$`)
f(`**`, `^[^.]*[^.]*$`) f(`**`, `^[^.]*[^.]*$`)
f(`a*[de]{x,y}z`, `^a[^.]*[de](?:x|y)z$`) f(`a*[de]{x,y}z`, `^a[^.]*[de](?:x|y)z$`)
f(`foo{bar`, `^foo\{bar$`)
f(`foo{ba,r`, `^foo\{ba,r$`)
f(`foo[bar`, `^foo\[bar$`)
f(`foo{bar}`, `^foobar$`)
f(`foo{bar,,b{{a,b*},z},[x-y]*z}a`, `^foo(?:bar||b(?:(?:a|b[^.]*)|z)|[x-y][^.]*z)a$`)
} }
func TestDateMetricIDCacheSerial(t *testing.T) { func TestDateMetricIDCacheSerial(t *testing.T) {