app/vmselect: add -search.treatDotsAsIsInRegexps command-line flag for automatic escaping of dots in regexp label filters

This commit is contained in:
Aliaksandr Valialkin 2020-11-11 12:38:44 +02:00
parent 0769f86a7e
commit 4f2c5877db
3 changed files with 87 additions and 1 deletions

View file

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"math" "math"
"sort" "sort"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
@ -15,7 +16,13 @@ import (
"github.com/VictoriaMetrics/metricsql" "github.com/VictoriaMetrics/metricsql"
) )
var logSlowQueryDuration = flag.Duration("search.logSlowQueryDuration", 5*time.Second, "Log queries with execution time exceeding this value. Zero disables slow query logging") var (
logSlowQueryDuration = flag.Duration("search.logSlowQueryDuration", 5*time.Second, "Log queries with execution time exceeding this value. Zero disables slow query logging")
treatDotsAsIsInRegexps = flag.Bool("search.treatDotsAsIsInRegexps", false, "Whether to treat dots as is in regexp label filters used in queries. "+
`For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped `+
`in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. `+
`Such escaping can be useful when querying Graphite data`)
)
var slowQueries = metrics.NewCounter(`vm_slow_queries_total`) var slowQueries = metrics.NewCounter(`vm_slow_queries_total`)
@ -177,6 +184,9 @@ func parsePromQLWithCache(q string) (metricsql.Expr, error) {
if err == nil { if err == nil {
e = metricsql.Optimize(e) e = metricsql.Optimize(e)
e = adjustCmpOps(e) e = adjustCmpOps(e)
if *treatDotsAsIsInRegexps {
e = escapeDotsInRegexpLabelFilters(e)
}
} }
pcv = &parseCacheValue{ pcv = &parseCacheValue{
e: e, e: e,
@ -190,6 +200,41 @@ func parsePromQLWithCache(q string) (metricsql.Expr, error) {
return pcv.e, nil return pcv.e, nil
} }
func escapeDotsInRegexpLabelFilters(e metricsql.Expr) metricsql.Expr {
metricsql.VisitAll(e, func(expr metricsql.Expr) {
me, ok := expr.(*metricsql.MetricExpr)
if !ok {
return
}
for i := range me.LabelFilters {
f := &me.LabelFilters[i]
if f.IsRegexp {
f.Value = escapeDots(f.Value)
}
}
})
return e
}
func escapeDots(s string) string {
dotsCount := strings.Count(s, ".")
if dotsCount <= 0 {
return s
}
result := make([]byte, 0, len(s)+2*dotsCount)
for i := 0; i < len(s); i++ {
if s[i] == '.' && (i == 0 || s[i-1] != '\\') && (i+1 == len(s) || i+1 < len(s) && s[i+1] != '*' && s[i+1] != '+' && s[i+1] != '{') {
// Escape a dot if the following conditions are met:
// - if it isn't escaped already, i.e. if there is no `\` char before the dot.
// - if there is no regexp modifiers such as '+', '*' or '{' after the dot.
result = append(result, '\\', '.')
} else {
result = append(result, s[i])
}
}
return string(result)
}
var parseCacheV = func() *parseCache { var parseCacheV = func() *parseCache {
pc := &parseCache{ pc := &parseCache{
m: make(map[string]*parseCacheValue), m: make(map[string]*parseCacheValue),

View file

@ -8,8 +8,46 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metricsql"
) )
func TestEscapeDots(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
result := escapeDots(s)
if result != resultExpected {
t.Fatalf("unexpected result for escapeDots(%q); got\n%s\nwant\n%s", s, result, resultExpected)
}
}
f("", "")
f("a", "a")
f("foobar", "foobar")
f(".", `\.`)
f(".*", `.*`)
f(".+", `.+`)
f("..", `\.\.`)
f("foo.b.{2}ar..+baz.*", `foo\.b.{2}ar\..+baz.*`)
}
func TestEscapeDotsInRegexpLabelFilters(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
e, err := metricsql.Parse(s)
if err != nil {
t.Fatalf("unexpected error in metricsql.Parse(%q): %s", s, err)
}
e = escapeDotsInRegexpLabelFilters(e)
result := e.AppendString(nil)
if string(result) != resultExpected {
t.Fatalf("unexpected result for escapeDotsInRegexpLabelFilters(%q);\ngot\n%s\nwant\n%s", s, result, resultExpected)
}
}
f("2", "2")
f(`foo.bar + 123`, `foo.bar + 123`)
f(`foo{bar=~"baz.xx.yyy"}`, `foo{bar=~"baz\\.xx\\.yyy"}`)
f(`foo(a.b{c="d.e",x=~"a.b.+[.a]",y!~"aaa.bb|cc.dd"}) + x.y(1,sum({x=~"aa.bb"}))`, `foo(a.b{c="d.e", x=~"a\\.b.+[\\.a]", y!~"aaa\\.bb|cc\\.dd"}) + x.y(1, sum({x=~"aa\\.bb"}))`)
}
func TestExecSuccess(t *testing.T) { func TestExecSuccess(t *testing.T) {
accountID := uint32(123) accountID := uint32(123)
projectID := uint32(567) projectID := uint32(567)

View file

@ -7,6 +7,9 @@
* FEATURE: vmagent: add `-promscrape.dropOriginalLabels` command-line option, which can be used for reducing memory usage when scraping big number of targets. * FEATURE: vmagent: add `-promscrape.dropOriginalLabels` command-line option, which can be used for reducing memory usage when scraping big number of targets.
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825#issuecomment-724308361 for details. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825#issuecomment-724308361 for details.
* FEATURE: vmalert: explicitly set extra labels to alert entities. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/870 * FEATURE: vmalert: explicitly set extra labels to alert entities. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/870
* FEATURE: add `-search.treatDotsAsIsInRegexps` command-line flag, which can be used for automatic escaping of dots in regexp label filters used in queries.
For example, if `-search.treatDotsAsIsInRegexps` is set, then the query `foo{bar=~"aaa.bb.cc|dd.eee"}` is automatically converted to `foo{bar=~"aaa\\.bb\\.cc|dd\\.eee"}`.
This may be useful for querying Graphite data.
* BUGFIX: do not return data points in the end of the selected time range for time series ending in the middle of the selected time range. * BUGFIX: do not return data points in the end of the selected time range for time series ending in the middle of the selected time range.
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/887 and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/845 See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/887 and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/845