app/vmselect/graphite: properly handle wildcards and charsets inside curly braces

For example, `foo{bar*,[a-f]a*b}` should match `foobar`, `foobar123`, `foofab`, etc.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/952
This commit is contained in:
Aliaksandr Valialkin 2020-12-11 17:03:12 +02:00
parent 9e79fc27c8
commit d6f9bf2d19
3 changed files with 64 additions and 41 deletions

View file

@ -350,47 +350,8 @@ func getRegexpForQuery(query string, delimiter byte) (*regexp.Regexp, error) {
if re := regexpCache[k]; re != nil {
return re.re, re.err
}
a := make([]string, 0, len(query))
quotedDelimiter := regexp.QuoteMeta(string([]byte{delimiter}))
tillNextDelimiter := "[^" + quotedDelimiter + "]*"
for i := 0; i < len(query); i++ {
switch query[i] {
case '*':
a = append(a, tillNextDelimiter)
case '{':
tmp := query[i+1:]
if n := strings.IndexByte(tmp, '}'); n < 0 {
a = append(a, regexp.QuoteMeta(query[i:]))
i = len(query)
} else {
a = append(a, "(?:")
opts := strings.Split(tmp[:n], ",")
for j, opt := range opts {
opts[j] = regexp.QuoteMeta(opt)
}
a = append(a, strings.Join(opts, "|"))
a = append(a, ")")
i += n + 1
}
case '[':
tmp := query[i:]
if n := strings.IndexByte(tmp, ']'); n < 0 {
a = append(a, regexp.QuoteMeta(query[i:]))
i = len(query)
} else {
a = append(a, tmp[:n+1])
i += n
}
default:
a = append(a, regexp.QuoteMeta(query[i:i+1]))
}
}
s := strings.Join(a, "")
if !strings.HasSuffix(s, quotedDelimiter) {
s += quotedDelimiter + "?"
}
s = "^(?:" + s + ")$"
re, err := regexp.Compile(s)
rs := getRegexpStringForQuery(query, delimiter, false)
re, err := regexp.Compile(rs)
regexpCache[k] = &regexpCacheEntry{
re: re,
err: err,
@ -406,6 +367,63 @@ func getRegexpForQuery(query string, delimiter byte) (*regexp.Regexp, error) {
return re, err
}
func getRegexpStringForQuery(query string, delimiter byte, isSubquery bool) string {
var a []string
quotedDelimiter := regexp.QuoteMeta(string([]byte{delimiter}))
tillNextDelimiter := "[^" + quotedDelimiter + "]*"
j := 0
for i := 0; i < len(query); i++ {
switch query[i] {
case '*':
a = append(a, regexp.QuoteMeta(query[j:i]))
a = append(a, tillNextDelimiter)
j = i + 1
case '{':
if isSubquery {
break
}
a = append(a, regexp.QuoteMeta(query[j:i]))
tmp := query[i+1:]
if n := strings.IndexByte(tmp, '}'); n < 0 {
rs := getRegexpStringForQuery(query[i:], delimiter, true)
a = append(a, rs)
i = len(query)
} else {
a = append(a, "(?:")
opts := strings.Split(tmp[:n], ",")
for j, opt := range opts {
opts[j] = getRegexpStringForQuery(opt, delimiter, true)
}
a = append(a, strings.Join(opts, "|"))
a = append(a, ")")
i += n + 1
}
j = i + 1
case '[':
a = append(a, regexp.QuoteMeta(query[j:i]))
tmp := query[i:]
if n := strings.IndexByte(tmp, ']'); n < 0 {
a = append(a, regexp.QuoteMeta(query[i:]))
i = len(query)
} else {
a = append(a, tmp[:n+1])
i += n
}
j = i + 1
}
}
a = append(a, regexp.QuoteMeta(query[j:]))
s := strings.Join(a, "")
if isSubquery {
return s
}
if !strings.HasSuffix(s, quotedDelimiter) {
s += quotedDelimiter + "?"
}
s = "^(?:" + s + ")$"
return s
}
type regexpCacheEntry struct {
re *regexp.Regexp
err error

View file

@ -28,6 +28,9 @@ func TestGetRegexpForQuery(t *testing.T) {
f("foo_[ab]*", '_', `^(?:foo_[ab][^_]*_?)$`)
f("foo_[ab]_", '_', `^(?:foo_[ab]_)$`)
f("foo.[ab].", '.', `^(?:foo\.[ab]\.)$`)
f("foo{b{ar*,ba*z[1-9]}", '.', `^(?:foo(?:b\{ar[^\.]*|ba[^\.]*z[1-9])\.?)$`)
f("{foo*}", '.', `^(?:(?:foo[^\.]*)\.?)$`)
f("{foo*,}", '.', `^(?:(?:foo[^\.]*|)\.?)$`)
}
func TestSortPaths(t *testing.T) {
@ -72,4 +75,5 @@ func TestAddAutomaticVariants(t *testing.T) {
f("foo,bar.baz", "_", "{foo,bar.baz}")
f("foo,bar_baz*", "_", "{foo,bar}_baz*")
f("foo.bar,baz,aa.bb,cc", ".", "foo.{bar,baz,aa}.{bb,cc}")
f("foo.b*r,b[a-xz]z,aa.bb,cc", ".", "foo.{b*r,b[a-xz]z,aa}.{bb,cc}")
}

View file

@ -9,6 +9,7 @@
* FEATURE: export `vm_promscrape_scrapers_started_total{type="<sd_type>"}` and `vm_promscrape_scrapers_stopped_total{type="<sd_type>"}` metrics for tracking churn rate for scrapers
per each service discovery type.
* BUGFIX: properly handle `*` and `[...]` inside curly braces in query passed to Graphite Metrics API. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/952
* BUGFIX: vmagent: fix memory leak when big number of targets is discovered via service discovery.
* BUGFIX: vmagent: properly pass `datacenter` filter to Consul API server. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/574#issuecomment-740454170
* BUGFIX: properly handle CPU limits set on the host system or host container. The bugfix may result in lower memory usage on systems with CPU limits. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/946