From 348edd92fe336e68dcf95bde5186e0bf4eb4a586 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 11 Nov 2020 12:38:44 +0200 Subject: [PATCH] app/vmselect: add `-search.treatDotsAsIsInRegexps` command-line flag for automatic escaping of dots in regexp label filters --- app/vmselect/promql/exec.go | 47 +++++++++++++++++++++++++++++++- app/vmselect/promql/exec_test.go | 38 ++++++++++++++++++++++++++ docs/CHANGELOG.md | 3 ++ 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/app/vmselect/promql/exec.go b/app/vmselect/promql/exec.go index 789f74ab6..741511503 100644 --- a/app/vmselect/promql/exec.go +++ b/app/vmselect/promql/exec.go @@ -5,6 +5,7 @@ import ( "fmt" "math" "sort" + "strings" "sync" "sync/atomic" "time" @@ -15,7 +16,13 @@ import ( "github.com/VictoriaMetrics/metricsql" ) -var logSlowQueryDuration = flag.Duration("search.logSlowQueryDuration", 5*time.Second, "Log queries with execution time exceeding this value. Zero disables slow query logging") +var ( + logSlowQueryDuration = flag.Duration("search.logSlowQueryDuration", 5*time.Second, "Log queries with execution time exceeding this value. Zero disables slow query logging") + treatDotsAsIsInRegexps = flag.Bool("search.treatDotsAsIsInRegexps", false, "Whether to treat dots as is in regexp label filters used in queries. "+ + `For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped `+ + `in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. `+ + `Such escaping can be useful when querying Graphite data`) +) var slowQueries = metrics.NewCounter(`vm_slow_queries_total`) @@ -177,6 +184,9 @@ func parsePromQLWithCache(q string) (metricsql.Expr, error) { if err == nil { e = metricsql.Optimize(e) e = adjustCmpOps(e) + if *treatDotsAsIsInRegexps { + e = escapeDotsInRegexpLabelFilters(e) + } } pcv = &parseCacheValue{ e: e, @@ -190,6 +200,41 @@ func parsePromQLWithCache(q string) (metricsql.Expr, error) { return pcv.e, nil } +func escapeDotsInRegexpLabelFilters(e metricsql.Expr) metricsql.Expr { + metricsql.VisitAll(e, func(expr metricsql.Expr) { + me, ok := expr.(*metricsql.MetricExpr) + if !ok { + return + } + for i := range me.LabelFilters { + f := &me.LabelFilters[i] + if f.IsRegexp { + f.Value = escapeDots(f.Value) + } + } + }) + return e +} + +func escapeDots(s string) string { + dotsCount := strings.Count(s, ".") + if dotsCount <= 0 { + return s + } + result := make([]byte, 0, len(s)+2*dotsCount) + for i := 0; i < len(s); i++ { + if s[i] == '.' && (i == 0 || s[i-1] != '\\') && (i+1 == len(s) || i+1 < len(s) && s[i+1] != '*' && s[i+1] != '+' && s[i+1] != '{') { + // Escape a dot if the following conditions are met: + // - if it isn't escaped already, i.e. if there is no `\` char before the dot. + // - if there is no regexp modifiers such as '+', '*' or '{' after the dot. + result = append(result, '\\', '.') + } else { + result = append(result, s[i]) + } + } + return string(result) +} + var parseCacheV = func() *parseCache { pc := &parseCache{ m: make(map[string]*parseCacheValue), diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 977ae0575..44a9cf70c 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -7,8 +7,46 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" + "github.com/VictoriaMetrics/metricsql" ) +func TestEscapeDots(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + result := escapeDots(s) + if result != resultExpected { + t.Fatalf("unexpected result for escapeDots(%q); got\n%s\nwant\n%s", s, result, resultExpected) + } + } + f("", "") + f("a", "a") + f("foobar", "foobar") + f(".", `\.`) + f(".*", `.*`) + f(".+", `.+`) + f("..", `\.\.`) + f("foo.b.{2}ar..+baz.*", `foo\.b.{2}ar\..+baz.*`) +} + +func TestEscapeDotsInRegexpLabelFilters(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + e, err := metricsql.Parse(s) + if err != nil { + t.Fatalf("unexpected error in metricsql.Parse(%q): %s", s, err) + } + e = escapeDotsInRegexpLabelFilters(e) + result := e.AppendString(nil) + if string(result) != resultExpected { + t.Fatalf("unexpected result for escapeDotsInRegexpLabelFilters(%q);\ngot\n%s\nwant\n%s", s, result, resultExpected) + } + } + f("2", "2") + f(`foo.bar + 123`, `foo.bar + 123`) + f(`foo{bar=~"baz.xx.yyy"}`, `foo{bar=~"baz\\.xx\\.yyy"}`) + f(`foo(a.b{c="d.e",x=~"a.b.+[.a]",y!~"aaa.bb|cc.dd"}) + x.y(1,sum({x=~"aa.bb"}))`, `foo(a.b{c="d.e", x=~"a\\.b.+[\\.a]", y!~"aaa\\.bb|cc\\.dd"}) + x.y(1, sum({x=~"aa\\.bb"}))`) +} + func TestExecSuccess(t *testing.T) { start := int64(1000e3) end := int64(2000e3) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f85c48a97..759deea64 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -7,6 +7,9 @@ * FEATURE: vmagent: add `-promscrape.dropOriginalLabels` command-line option, which can be used for reducing memory usage when scraping big number of targets. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825#issuecomment-724308361 for details. * FEATURE: vmalert: explicitly set extra labels to alert entities. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/870 +* FEATURE: add `-search.treatDotsAsIsInRegexps` command-line flag, which can be used for automatic escaping of dots in regexp label filters used in queries. + For example, if `-search.treatDotsAsIsInRegexps` is set, then the query `foo{bar=~"aaa.bb.cc|dd.eee"}` is automatically converted to `foo{bar=~"aaa\\.bb\\.cc|dd\\.eee"}`. + This may be useful for querying Graphite data. * BUGFIX: do not return data points in the end of the selected time range for time series ending in the middle of the selected time range. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/887 and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/845