From c86edb59e608145323fb73ad3101dc85a7aca049 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 11 Jun 2024 15:13:55 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/CHANGELOG.md | 1 + lib/logstorage/parser.go | 11 +++++++++ lib/logstorage/parser_test.go | 7 ++++++ lib/logstorage/pipe.go | 41 ++++++++++++++++++++++++++++++++++ lib/logstorage/pipe_stats.go | 18 +++++++++++++++ 5 files changed, 78 insertions(+) diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 27759b5e0e..d3f22d738b 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -19,6 +19,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* FEATURE: do not allow starting the [filter](https://docs.victoriametrics.com/victorialogs/logsql/#filters) with [pipe names](https://docs.victoriametrics.com/victorialogs/logsql/#pipes) and [stats function names](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions). This prevents from unexpected results returned by incorrect queries, which miss mandatory [filter](https://docs.victoriametrics.com/victorialogs/logsql/#query-syntax). * FEATURE: treat unexpected syslog message as [RFC3164](https://datatracker.ietf.org/doc/html/rfc3164) containing only the `message` field when using [`unpack_syslog` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe). * FEATURE: allow using `where` prefix instead of `filter` prefix in [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe). * FEATURE: disallow unescaped `!` char in [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) queries, since it permits writing incorrect query, which may look like correct one. For example, `foo!:bar` instead of `foo:!bar`. diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index d3122f5d83..9aaaa3b26b 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -514,6 +514,14 @@ func (q *Query) getNeededColumns() ([]string, []string) { // ParseQuery parses s. func ParseQuery(s string) (*Query, error) { lex := newLexer(s) + + // Verify the first token doesn't match pipe names. + firstToken := strings.ToLower(lex.rawToken) + if _, ok := pipeNames[firstToken]; ok { + return nil, fmt.Errorf("the query [%s] cannot start with pipe - it must start with madatory filter; see https://docs.victoriametrics.com/victorialogs/logsql/#query-syntax; "+ + "if the filter isn't missing, then please put the first word of the filter into quotes: %q", s, firstToken) + } + q, err := parseQuery(lex) if err != nil { return nil, err @@ -1842,6 +1850,9 @@ func needQuoteToken(s string) bool { if _, ok := reservedKeywords[sLower]; ok { return true } + if _, ok := pipeNames[sLower]; ok { + return true + } for _, r := range s { if !isTokenRune(r) && r != '.' && r != '-' { return true diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index 815c3ab128..af013edfbe 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -676,6 +676,8 @@ func TestParseQuerySuccess(t *testing.T) { f(`foo or bar baz or xyz`, `foo or bar baz or xyz`) f(`(foo or bar) (baz or xyz)`, `(foo or bar) (baz or xyz)`) f(`(foo OR bar) AND baz`, `(foo or bar) baz`) + f(`'stats' foo`, `"stats" foo`) + f(`"filter" bar copy fields avg baz`, `"filter" bar "copy" "fields" "avg" baz`) // parens f(`foo:(bar baz or not :xxx)`, `foo:bar foo:baz or !foo:xxx`) @@ -1214,6 +1216,11 @@ func TestParseQueryFailure(t *testing.T) { f("not (abc") f("!") + // pipe names without quoutes + f(`filter foo:bar`) + f(`stats count()`) + f(`count()`) + // invalid parens f("(") f("foo (bar ") diff --git a/lib/logstorage/pipe.go b/lib/logstorage/pipe.go index abc9cd80c2..d9890f043e 100644 --- a/lib/logstorage/pipe.go +++ b/lib/logstorage/pipe.go @@ -264,3 +264,44 @@ func parsePipe(lex *lexer) (pipe, error) { return nil, fmt.Errorf("unexpected pipe %q", lex.token) } } + +var pipeNames = func() map[string]struct{} { + a := []string{ + "copy", "cp", + "delete", "del", "rm", "drop", + "drop_empty_fields", + "extract", + "extract_regexp", + "field_names", + "field_values", + "fields", "keep", + "filter", "where", + "format", + "limit", "head", + "math", "eval", + "offset", "skip", + "pack_json", + "pack_logmft", + "rename", "mv", + "replace", + "replace_regexp", + "sort", + "stats", + "uniq", + "unpack_json", + "unpack_logfmt", + "unpack_syslog", + "unroll", + } + + m := make(map[string]struct{}, len(a)) + for _, s := range a { + m[s] = struct{}{} + } + + // add stats names here, since they can be used without the initial `stats` keyword + for _, s := range statsNames { + m[s] = struct{}{} + } + return m +}() diff --git a/lib/logstorage/pipe_stats.go b/lib/logstorage/pipe_stats.go index b60c51ac51..be05877849 100644 --- a/lib/logstorage/pipe_stats.go +++ b/lib/logstorage/pipe_stats.go @@ -702,6 +702,24 @@ func parseStatsFunc(lex *lexer) (statsFunc, error) { } } +var statsNames = []string{ + "avg", + "count", + "count_empty", + "count_uniq", + "max", + "median", + "min", + "quantile", + "row_any", + "row_max", + "row_min", + "sum", + "sum_len", + "uniq_values", + "values", +} + var zeroByStatsField = &byStatsField{} // byStatsField represents 'by (...)' part of the pipeStats.