wip

2024-12-31 15:06:26 +00:00 · 2024-05-30 14:26:05 +02:00 · 2024-05-30 14:26:05 +02:00 · b7c062ac61
commit b7c062ac61
parent 15598986e6
4 changed files with 61 additions and 30 deletions
--- a/docs/VictoriaLogs/CHANGELOG.md
+++ b/docs/VictoriaLogs/CHANGELOG.md
@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta

 ## tip

+* FEATURE: allow omitting result name in [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe). In this case the result name is automatically set to string representation of the corresponding [stats function expression](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions). For example, `_time:5m | count(*)` is valid [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) now. It is equivalent to `_time:5m | stats count(*) as "count(*)"`.
+
 * BUGFIX: properly calculate the number of matching rows in `* | field_values x | stats count() rows` and in `* | unroll (x) | stats count() rows` queries.

 ## [v0.14.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.14.0-victorialogs)
--- a/docs/VictoriaLogs/LogsQL.md
+++ b/docs/VictoriaLogs/LogsQL.md
@ -1885,7 +1885,7 @@ See also:
 uses [`count` stats function](#count-stats) for calculating the number of logs for the last 5 minutes:

 ```logsql
-_time:5m | stats count() logs_total
+_time:5m | stats count() as logs_total
 ```

 `| stats ...` pipe has the following basic format:
@ -1909,12 +1909,19 @@ For example, the following query calculates the following stats for logs over th
 _time:5m | stats count() logs_total, count_uniq(_stream) streams_total
 ```

-It is allowed to omit `stats` prefix for convenience. So the following query is equivalent to the previous one:
+It is allowed omitting `stats` prefix for convenience. So the following query is equivalent to the previous one:

 ```logsql
 _time:5m | count() logs_total, count_uniq(_stream) streams_total
 ```

+It is allowed omitting the result name. In this case the result name equals to the string representation of the used [stats function](#stats-pipe-functions).
+For example, the following query returns the same stats as the previous one, but gives uses `count()` and `count_uniq(_stream)` names for the returned fields:
+
+```logsql
+_time:5m | count(), count_uniq(_stream)
+```
+
 See also:

 - [stats by fields](#stats-by-fields)
--- a/lib/logstorage/parser_test.go
+++ b/lib/logstorage/parser_test.go
@ -909,15 +909,18 @@ func TestParseQuerySuccess(t *testing.T) {
 	f(`* | stats count(foo,*,bar) x`, `* | stats count(*) as x`)
 	f(`* | stats count('') foo`, `* | stats count(_msg) as foo`)
 	f(`* | stats count(foo) ''`, `* | stats count(foo) as _msg`)
+	f(`* | count()`, `* | stats count(*) as "count(*)"`)

 	// stats pipe count_empty
 	f(`* | stats count_empty() x`, `* | stats count_empty(*) as x`)
-	f(`* | stats by (x, y) count_empty(a,b,c) x`, `* | stats by (x, y) count_empty(a, b, c) as x`)
+	f(`* | stats by (x, y) count_empty(a,b,c) z`, `* | stats by (x, y) count_empty(a, b, c) as z`)
+	f(`* | count_empty()`, `* | stats count_empty(*) as "count_empty(*)"`)

 	// stats pipe sum
 	f(`* | stats Sum(foo) bar`, `* | stats sum(foo) as bar`)
 	f(`* | stats BY(x, y, ) SUM(foo,bar,) bar`, `* | stats by (x, y) sum(foo, bar) as bar`)
 	f(`* | stats sum() x`, `* | stats sum(*) as x`)
+	f(`* | sum()`, `* | stats sum(*) as "sum(*)"`)
 	f(`* | stats sum(*) x`, `* | stats sum(*) as x`)
 	f(`* | stats sum(foo,*,bar) x`, `* | stats sum(*) as x`)

@ -925,6 +928,7 @@ func TestParseQuerySuccess(t *testing.T) {
 	f(`* | stats Max(foo) bar`, `* | stats max(foo) as bar`)
 	f(`* | stats BY(x, y, ) MAX(foo,bar,) bar`, `* | stats by (x, y) max(foo, bar) as bar`)
 	f(`* | stats max() x`, `* | stats max(*) as x`)
+	f(`* | max()`, `* | stats max(*) as "max(*)"`)
 	f(`* | stats max(*) x`, `* | stats max(*) as x`)
 	f(`* | stats max(foo,*,bar) x`, `* | stats max(*) as x`)

@ -932,22 +936,26 @@ func TestParseQuerySuccess(t *testing.T) {
 	f(`* | stats Min(foo) bar`, `* | stats min(foo) as bar`)
 	f(`* | stats BY(x, y, ) MIN(foo,bar,) bar`, `* | stats by (x, y) min(foo, bar) as bar`)
 	f(`* | stats min() x`, `* | stats min(*) as x`)
+	f(`* | min()`, `* | stats min(*) as "min(*)"`)
 	f(`* | stats min(*) x`, `* | stats min(*) as x`)
 	f(`* | stats min(foo,*,bar) x`, `* | stats min(*) as x`)

 	// stats pipe fields_min
 	f(`* | stats fields_Min(foo) bar`, `* | stats fields_min(foo) as bar`)
+	f(`* | fields_Min(foo)`, `* | stats fields_min(foo) as "fields_min(foo)"`)
 	f(`* | stats BY(x, y, ) fields_MIN(foo,bar,) bar`, `* | stats by (x, y) fields_min(foo, bar) as bar`)

 	// stats pipe avg
 	f(`* | stats Avg(foo) bar`, `* | stats avg(foo) as bar`)
 	f(`* | stats BY(x, y, ) AVG(foo,bar,) bar`, `* | stats by (x, y) avg(foo, bar) as bar`)
 	f(`* | stats avg() x`, `* | stats avg(*) as x`)
+	f(`* | avg()`, `* | stats avg(*) as "avg(*)"`)
 	f(`* | stats avg(*) x`, `* | stats avg(*) as x`)
 	f(`* | stats avg(foo,*,bar) x`, `* | stats avg(*) as x`)

 	// stats pipe count_uniq
 	f(`* | stats count_uniq(foo) bar`, `* | stats count_uniq(foo) as bar`)
+	f(`* | count_uniq(foo)`, `* | stats count_uniq(foo) as "count_uniq(foo)"`)
 	f(`* | stats by(x, y) count_uniq(foo,bar) LiMit 10 As baz`, `* | stats by (x, y) count_uniq(foo, bar) limit 10 as baz`)
 	f(`* | stats by(x) count_uniq(*) z`, `* | stats by (x) count_uniq(*) as z`)
 	f(`* | stats by(x) count_uniq() z`, `* | stats by (x) count_uniq(*) as z`)
@ -955,6 +963,7 @@ func TestParseQuerySuccess(t *testing.T) {

 	// stats pipe uniq_values
 	f(`* | stats uniq_values(foo) bar`, `* | stats uniq_values(foo) as bar`)
+	f(`* | uniq_values(foo)`, `* | stats uniq_values(foo) as "uniq_values(foo)"`)
 	f(`* | stats uniq_values(foo) limit 10 bar`, `* | stats uniq_values(foo) limit 10 as bar`)
 	f(`* | stats by(x, y) uniq_values(foo, bar) as baz`, `* | stats by (x, y) uniq_values(foo, bar) as baz`)
 	f(`* | stats by(x) uniq_values(*) y`, `* | stats by (x) uniq_values(*) as y`)
@ -963,6 +972,7 @@ func TestParseQuerySuccess(t *testing.T) {

 	// stats pipe values
 	f(`* | stats values(foo) bar`, `* | stats values(foo) as bar`)
+	f(`* | values(foo)`, `* | stats values(foo) as "values(foo)"`)
 	f(`* | stats values(foo) limit 10 bar`, `* | stats values(foo) limit 10 as bar`)
 	f(`* | stats by(x, y) values(foo, bar) as baz`, `* | stats by (x, y) values(foo, bar) as baz`)
 	f(`* | stats by(x) values(*) y`, `* | stats by (x) values(*) as y`)
@ -973,6 +983,7 @@ func TestParseQuerySuccess(t *testing.T) {
 	f(`* | stats Sum_len(foo) bar`, `* | stats sum_len(foo) as bar`)
 	f(`* | stats BY(x, y, ) SUM_Len(foo,bar,) bar`, `* | stats by (x, y) sum_len(foo, bar) as bar`)
 	f(`* | stats sum_len() x`, `* | stats sum_len(*) as x`)
+	f(`* | sum_len()`, `* | stats sum_len(*) as "sum_len(*)"`)
 	f(`* | stats sum_len(*) x`, `* | stats sum_len(*) as x`)
 	f(`* | stats sum_len(foo,*,bar) x`, `* | stats sum_len(*) as x`)

@ -981,12 +992,14 @@ func TestParseQuerySuccess(t *testing.T) {
 	f(`* | stats quantile(1, foo) bar`, `* | stats quantile(1, foo) as bar`)
 	f(`* | stats quantile(0.5, a, b, c) bar`, `* | stats quantile(0.5, a, b, c) as bar`)
 	f(`* | stats quantile(0.99) bar`, `* | stats quantile(0.99) as bar`)
+	f(`* | quantile(0.99)`, `* | stats quantile(0.99) as "quantile(0.99)"`)
 	f(`* | stats quantile(0.99, a, *, b) bar`, `* | stats quantile(0.99) as bar`)

 	// stats pipe median
 	f(`* | stats Median(foo) bar`, `* | stats median(foo) as bar`)
 	f(`* | stats BY(x, y, ) MEDIAN(foo,bar,) bar`, `* | stats by (x, y) median(foo, bar) as bar`)
 	f(`* | stats median() x`, `* | stats median(*) as x`)
+	f(`* | median()`, `* | stats median(*) as "median(*)"`)
 	f(`* | stats median(*) x`, `* | stats median(*) as x`)
 	f(`* | stats median(foo,*,bar) x`, `* | stats median(*) as x`)

@ -995,7 +1008,7 @@ func TestParseQuerySuccess(t *testing.T) {
 	f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`)

 	// stats pipe with grouping buckets
-	f(`* | stats by(_time:1d, response_size:1_000KiB, request_duration:5s, foo) count() as foo`, `* | stats by (_time:1d, response_size:1_000KiB, request_duration:5s, foo) count(*) as foo`)
+	f(`* | stats by(_time:1d, response_size:1_000KiB, request_duration:5s, foo) count() as bar`, `* | stats by (_time:1d, response_size:1_000KiB, request_duration:5s, foo) count(*) as bar`)
 	f(`*|stats by(client_ip:/24, server_ip:/16) count() foo`, `* | stats by (client_ip:/24, server_ip:/16) count(*) as foo`)
 	f(`* | stats by(_time:1d offset 2h) count() as foo`, `* | stats by (_time:1d offset 2h) count(*) as foo`)
 	f(`* | stats by(_time:1d offset -2.5h5m) count() as foo`, `* | stats by (_time:1d offset -2.5h5m) count(*) as foo`)
@ -1357,7 +1370,6 @@ func TestParseQueryFailure(t *testing.T) {
 	f(`foo | stats count(`)
 	f(`foo | stats count bar`)
 	f(`foo | stats count(bar`)
-	f(`foo | stats count(bar)`)
 	f(`foo | stats count() as`)
 	f(`foo | stats count() as |`)

@ -1368,27 +1380,21 @@ func TestParseQueryFailure(t *testing.T) {

 	// invalid stats sum
 	f(`foo | stats sum`)
-	f(`foo | stats sum()`)

 	// invalid stats max
 	f(`foo | stats max`)
-	f(`foo | stats max()`)

 	// invalid stats min
 	f(`foo | stats min`)
-	f(`foo | stats min()`)

 	// invalid stats min
 	f(`foo | stats fields_min`)
-	f(`foo | stats fields_min()`)

 	// invalid stats avg
 	f(`foo | stats avg`)
-	f(`foo | stats avg()`)

 	// invalid stats count_uniq
 	f(`foo | stats count_uniq`)
-	f(`foo | stats count_uniq()`)
 	f(`foo | stats count_uniq() limit`)
 	f(`foo | stats count_uniq() limit foo`)
 	f(`foo | stats count_uniq() limit 0.5`)
@ -1396,7 +1402,6 @@ func TestParseQueryFailure(t *testing.T) {

 	// invalid stats uniq_values
 	f(`foo | stats uniq_values`)
-	f(`foo | stats uniq_values()`)
 	f(`foo | stats uniq_values() limit`)
 	f(`foo | stats uniq_values(a) limit foo`)
 	f(`foo | stats uniq_values(a) limit 0.5`)
@ -1404,7 +1409,6 @@ func TestParseQueryFailure(t *testing.T) {

 	// invalid stats values
 	f(`foo | stats values`)
-	f(`foo | stats values()`)
 	f(`foo | stats values() limit`)
 	f(`foo | stats values(a) limit foo`)
 	f(`foo | stats values(a) limit 0.5`)
@ -1412,7 +1416,6 @@ func TestParseQueryFailure(t *testing.T) {

 	// invalid stats sum_len
 	f(`foo | stats sum_len`)
-	f(`foo | stats sum_len()`)

 	// invalid stats quantile
 	f(`foo | stats quantile`)
@ -1436,6 +1439,12 @@ func TestParseQueryFailure(t *testing.T) {
 	f(`foo | stats by(bar,`)
 	f(`foo | stats by(bar)`)

+	// duplicate stats result names
+	f(`foo | stats min() x, max() x`)
+
+	// stats result names identical to by fields
+	f(`foo | stats by (x) count() x`)
+
 	// invalid sort pipe
 	f(`foo | sort bar`)
 	f(`foo | sort by`)
--- a/lib/logstorage/pipe_stats.go
+++ b/lib/logstorage/pipe_stats.go
@ -545,9 +545,17 @@ func parsePipeStats(lex *lexer, needStatsKeyword bool) (*pipeStats, error) {
 		ps.byFields = bfs
 	}

+	seenByFields := make(map[string]*byStatsField, len(ps.byFields))
+	for _, bf := range ps.byFields {
+		seenByFields[bf.name] = bf
+	}
+
+	seenResultNames := make(map[string]statsFunc)
+
 	var funcs []pipeStatsFunc
 	for {
 		var f pipeStatsFunc
+
 		sf, err := parseStatsFunc(lex)
 		if err != nil {
 			return nil, err
@ -557,15 +565,31 @@ func parsePipeStats(lex *lexer, needStatsKeyword bool) (*pipeStats, error) {
 		if lex.isKeyword("if") {
 			iff, err := parseIfFilter(lex)
 			if err != nil {
-				return nil, err
+				return nil, fmt.Errorf("cannot parse 'if' filter for [%s]: %w", sf, err)
 			}
 			f.iff = iff
 		}

-		resultName, err := parseResultName(lex)
-		if err != nil {
-			return nil, fmt.Errorf("cannot parse result name for [%s]: %w", sf, err)
+		resultName := ""
+		if lex.isKeyword(",", "|", ")", "") {
+			resultName = sf.String()
+		} else {
+			if lex.isKeyword("as") {
+				lex.nextToken()
+			}
+			fieldName, err := parseFieldName(lex)
+			if err != nil {
+				return nil, fmt.Errorf("cannot parse result name for [%s]: %w", sf, err)
+			}
+			resultName = fieldName
 		}
+		if bf := seenByFields[resultName]; bf != nil {
+			return nil, fmt.Errorf("the %q is used as 'by' field [%s], so it cannot be used as result name for [%s]", resultName, bf, sf)
+		}
+		if sfPrev := seenResultNames[resultName]; sfPrev != nil {
+			return nil, fmt.Errorf("cannot use identical result name %q for [%s] and [%s]", resultName, sfPrev, sf)
+		}
+		seenResultNames[resultName] = sf
 		f.resultName = resultName

 		funcs = append(funcs, f)
@ -575,7 +599,7 @@ func parsePipeStats(lex *lexer, needStatsKeyword bool) (*pipeStats, error) {
 			return &ps, nil
 		}
 		if !lex.isKeyword(",") {
-			return nil, fmt.Errorf("unexpected token %q; want ',', '|' or ')'", lex.token)
+			return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", sf, lex.token)
 		}
 		lex.nextToken()
 	}
@ -672,17 +696,6 @@ func parseStatsFunc(lex *lexer) (statsFunc, error) {
 	}
 }

-func parseResultName(lex *lexer) (string, error) {
-	if lex.isKeyword("as") {
-		lex.nextToken()
-	}
-	resultName, err := parseFieldName(lex)
-	if err != nil {
-		return "", err
-	}
-	return resultName, nil
-}
-
 var zeroByStatsField = &byStatsField{}

 // byStatsField represents 'by (...)' part of the pipeStats.