From f2cf5d8e36af7f68581ad2a7972896f7eab596f3 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 31 May 2019 17:35:15 +0300 Subject: [PATCH] app/vmselect/promql: allow escaping identifiers with `\` and `\xXX` Fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/42 --- app/vmselect/promql/lexer.go | 115 ++++++++++++++++++++++++++--- app/vmselect/promql/lexer_test.go | 51 +++++++++++++ app/vmselect/promql/parser.go | 18 ++--- app/vmselect/promql/parser_test.go | 19 ++++- 4 files changed, 179 insertions(+), 24 deletions(-) diff --git a/app/vmselect/promql/lexer.go b/app/vmselect/promql/lexer.go index 4b54f23d64..6be24bec5f 100644 --- a/app/vmselect/promql/lexer.go +++ b/app/vmselect/promql/lexer.go @@ -4,6 +4,8 @@ import ( "fmt" "strconv" "strings" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) type lexer struct { @@ -85,10 +87,7 @@ again: goto tokenFoundLabel } if isIdentPrefix(s) { - token, err = scanIdent(s) - if err != nil { - return "", err - } + token = scanIdent(s) goto tokenFoundLabel } if isStringPrefix(s) { @@ -210,15 +209,103 @@ func scanPositiveNumber(s string) (string, error) { return s[:j], nil } -func scanIdent(s string) (string, error) { - if len(s) == 0 { - return "", fmt.Errorf("ident cannot be empty") - } +func scanIdent(s string) string { i := 0 - for i < len(s) && isIdentChar(s[i]) { - i++ + for i < len(s) { + if isIdentChar(s[i]) { + i++ + continue + } + if s[i] != '\\' { + break + } + + // Do not verify the next char, since it is escaped. + i += 2 + if i > len(s) { + i-- + break + } } - return s[:i], nil + if i == 0 { + logger.Panicf("BUG: scanIdent couldn't find a single ident char; make sure isIdentPrefix called before scanIdent") + } + return s[:i] +} + +func unescapeIdent(s string) string { + n := strings.IndexByte(s, '\\') + if n < 0 { + return s + } + dst := make([]byte, 0, len(s)) + for { + dst = append(dst, s[:n]...) + s = s[n+1:] + if len(s) == 0 { + return string(dst) + } + if s[0] == 'x' && len(s) >= 3 { + h1 := fromHex(s[1]) + h2 := fromHex(s[2]) + if h1 >= 0 && h2 >= 0 { + dst = append(dst, byte((h1<<4)|h2)) + s = s[3:] + } else { + dst = append(dst, s[0]) + s = s[1:] + } + } else { + dst = append(dst, s[0]) + s = s[1:] + } + n = strings.IndexByte(s, '\\') + if n < 0 { + dst = append(dst, s...) + return string(dst) + } + } +} + +func fromHex(ch byte) int { + if ch >= '0' && ch <= '9' { + return int(ch - '0') + } + if ch >= 'a' && ch <= 'f' { + return int((ch - 'a') + 10) + } + if ch >= 'A' && ch <= 'F' { + return int((ch - 'A') + 10) + } + return -1 +} + +func toHex(n byte) byte { + if n < 10 { + return '0' + n + } + return 'a' + (n - 10) +} + +func appendEscapedIdent(dst, s []byte) []byte { + for i := 0; i < len(s); i++ { + ch := s[i] + if isIdentChar(ch) { + if i == 0 && !isFirstIdentChar(ch) { + // hex-encode the first char + dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf)) + } else { + dst = append(dst, ch) + } + } else if ch >= 0x20 && ch < 0x7f { + // Leave ASCII printable chars as is + dst = append(dst, '\\', ch) + } else { + // hex-encode non-printable chars + dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf)) + } + } + return dst } func (lex *lexer) Prev() { @@ -353,6 +440,10 @@ func isIdentPrefix(s string) bool { if len(s) == 0 { return false } + if s[0] == '\\' { + // Assume this is an escape char for the next char. + return true + } return isFirstIdentChar(s[0]) } @@ -367,7 +458,7 @@ func isIdentChar(ch byte) bool { if isFirstIdentChar(ch) { return true } - return isDecimalChar(ch) || ch == ':' || ch == '.' + return isDecimalChar(ch) || ch == '.' } func isSpaceChar(ch byte) bool { diff --git a/app/vmselect/promql/lexer_test.go b/app/vmselect/promql/lexer_test.go index d6a9abd3fe..0685f797e1 100644 --- a/app/vmselect/promql/lexer_test.go +++ b/app/vmselect/promql/lexer_test.go @@ -5,6 +5,57 @@ import ( "testing" ) +func TestUnescapeIdent(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + result := unescapeIdent(s) + if result != resultExpected { + t.Fatalf("unexpected result for unescapeIdent(%q); got %q; want %q", s, result, resultExpected) + } + } + f("", "") + f("a", "a") + f("\\", "") + f(`\\`, `\`) + f(`\foo\-bar`, `foo-bar`) + f(`a\\\\b\"c\d`, `a\\b"cd`) + f(`foo.bar:baz_123`, `foo.bar:baz_123`) + f(`foo\ bar`, `foo bar`) + f(`\x21`, `!`) + f(`\xeDfoo\x2Fbar\-\xqw\x`, "\xedfoo\x2fbar-xqwx") +} + +func TestAppendEscapedIdent(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + result := appendEscapedIdent(nil, []byte(s)) + if string(result) != resultExpected { + t.Fatalf("unexpected result for appendEscapedIdent(%q); got %q; want %q", s, result, resultExpected) + } + } + f(`a`, `a`) + f(`a.b:c_23`, `a.b:c_23`) + f(`a b-cd+dd\`, `a\ b\-cd\+dd\\`) + f("a\x1E\x20\xee", `a\x1e\ \xee`) + f("\x2e\x2e", `\x2e.`) +} + +func TestScanIdent(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + result := scanIdent(s) + if result != resultExpected { + t.Fatalf("unexpected result for scanIdent(%q): got %q; want %q", s, result, resultExpected) + } + } + f("a", "a") + f("foo.bar:baz_123", "foo.bar:baz_123") + f("a+b", "a") + f("foo()", "foo") + f(`a\-b+c`, `a\-b`) + f(`a\ b\\\ c\`, `a\ b\\\ c\`) +} + func TestLexerNextPrev(t *testing.T) { var lex lexer lex.Init("foo bar baz") diff --git a/app/vmselect/promql/parser.go b/app/vmselect/promql/parser.go index 07a7f86b46..7ac978efd0 100644 --- a/app/vmselect/promql/parser.go +++ b/app/vmselect/promql/parser.go @@ -6,7 +6,6 @@ import ( "strings" "sync" - "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" ) @@ -745,7 +744,7 @@ func expandWithExpr(was []*withArgExpr, e expr) (expr, error) { if !t.HasNonEmptyMetricGroup() { return t, nil } - k := bytesutil.ToUnsafeString(t.TagFilters[0].Value) + k := string(appendEscapedIdent(nil, t.TagFilters[0].Value)) wa := getWithArgExpr(was, k) if wa == nil { return t, nil @@ -1075,9 +1074,6 @@ func (p *parser) parseTagFilterExpr() (*tagFilterExpr, error) { } var tfe tagFilterExpr tfe.Key = p.lex.Token - if tfe.Key == "__name__" { - tfe.Key = "" - } if err := p.lex.Next(); err != nil { return nil, err } @@ -1126,8 +1122,12 @@ func (tfe *tagFilterExpr) toTagFilter() (*storage.TagFilter, error) { } var tf storage.TagFilter - tf.Key = []byte(tfe.Key) - tf.Value = []byte(tfe.Value.S) + tf.Key = []byte(unescapeIdent(tfe.Key)) + if len(tfe.Key) == 0 { + tf.Value = []byte(unescapeIdent(tfe.Value.S)) + } else { + tf.Value = []byte(tfe.Value.S) + } tf.IsRegexp = tfe.IsRegexp tf.IsNegative = tfe.IsNegative if !tf.IsRegexp { @@ -1586,7 +1586,7 @@ func (me *metricExpr) AppendString(dst []byte) []byte { if len(tfs) > 0 { tf := &tfs[0] if len(tf.Key) == 0 && !tf.IsNegative && !tf.IsRegexp { - dst = append(dst, tf.Value...) + dst = appendEscapedIdent(dst, tf.Value) tfs = tfs[1:] } } @@ -1628,7 +1628,7 @@ func appendStringTagFilter(dst []byte, tf *storage.TagFilter) []byte { if len(tf.Key) == 0 { dst = append(dst, "__name__"...) } else { - dst = append(dst, tf.Key...) + dst = appendEscapedIdent(dst, tf.Key) } var op string if tf.IsNegative { diff --git a/app/vmselect/promql/parser_test.go b/app/vmselect/promql/parser_test.go index 903e0389c5..0352146d71 100644 --- a/app/vmselect/promql/parser_test.go +++ b/app/vmselect/promql/parser_test.go @@ -118,6 +118,13 @@ func TestParsePromQLSuccess(t *testing.T) { same("with") same("WITH") same("With") + // identifiers with with escape chars + same(`{__name__="foo bar"}`) + same(`foo\-bar\{{baz\+bar="aa"}`) + another(`\x2E\x2ef\oo{b\xEF\ar="aa"}`, `\x2e.foo{b\xefar="aa"}`) + // Duplicate filters + same(`foo{__name__="bar"}`) + same(`foo{a="b", a="c", __name__="aaa", b="d"}`) // Metric filters ending with comma another(`m{foo="bar",}`, `m{foo="bar"}`) // String concat in tag value @@ -251,6 +258,8 @@ func TestParsePromQLSuccess(t *testing.T) { same(`rate(rate(m[5m]))`) same(`rate(rate(m[5m])[1h:])`) same(`rate(rate(m[5m])[1h:3s])`) + // funcName with escape chars + same(`foo\(ba\-r()`) // aggrFuncExpr same(`sum(http_server_request) by ()`) @@ -295,10 +304,14 @@ func TestParsePromQLSuccess(t *testing.T) { another(`with (ct={job="test", i="bar"}) ct + {ct, x="d"} + foo{ct, ct} + ctx(1)`, `(({job="test", i="bar"} + {job="test", i="bar", x="d"}) + foo{job="test", i="bar"}) + ctx(1)`) another(`with (foo = bar) {__name__=~"foo"}`, `{__name__=~"foo"}`) - another(`with (foo = bar) {__name__="foo"}`, `bar`) - another(`with (foo = bar) {__name__="foo", x="y"}`, `bar{x="y"}`) + another(`with (foo = bar) foo{__name__="foo"}`, `bar`) + another(`with (foo = bar) {__name__="foo", x="y"}`, `{__name__="foo", x="y"}`) another(`with (foo(bar) = {__name__!="bar"}) foo(x)`, `{__name__!="bar"}`) - another(`with (foo(bar) = {__name__="bar"}) foo(x)`, `x`) + another(`with (foo(bar) = bar{__name__="bar"}) foo(x)`, `x`) + another(`with (foo\-bar(baz) = baz + baz) foo\-bar((x,y))`, `(x, y) + (x, y)`) + another(`with (foo\-bar(baz) = baz + baz) foo\-bar(x*y)`, `(x * y) + (x * y)`) + another(`with (foo\-bar(baz) = baz + baz) foo\-bar(x\*y)`, `x\*y + x\*y`) + another(`with (foo\-bar(b\ az) = b\ az + b\ az) foo\-bar(x\*y)`, `x\*y + x\*y`) // override ttf to something new. another(`with (ttf = a) ttf + b`, `a + b`) // override ttf to ru