diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index 8c37682cf..8b2d7f2ae 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -1230,10 +1230,10 @@ to store the result of the corresponding stats function. The `as` keyword is opt For example, the following query calculates the following stats for logs over the last 5 minutes: - the number of logs with the help of [`count` stats function](#count-stats); -- the number of unique [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) with the help of [`uniq_count` stats function](#uniq_count-stats): +- the number of unique [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) with the help of [`count_uniq` stats function](#count_uniq-stats): ```logsql -_time:5m | stats count() logs_total, uniq_count(_stream) streams_total +_time:5m | stats count() logs_total, count_uniq(_stream) streams_total ``` See also: @@ -1258,7 +1258,7 @@ For example, the following query calculates the number of logs and unique ip add grouped by `(host, path)` fields: ```logsql -_time:5m | stats by (host, path) count() logs_total, uniq_count(ip) ips_total +_time:5m | stats by (host, path) count() logs_total, count_uniq(ip) ips_total ``` #### Stats by time buckets @@ -1277,7 +1277,7 @@ The `step` can have any [duration value](#duration-values). For example, the fol over the last 5 minutes: ``` -_time:5m | stats by (_time:1m) count() logs_total, uniq_count(ip) ips_total +_time:5m | stats by (_time:1m) count() logs_total, count_uniq(ip) ips_total ``` #### Stats by time buckets with timezone offset @@ -1323,10 +1323,10 @@ LogsQL supports the following functions for [`stats` pipe](#stats-pipe): - [`avg`](#avg-stats) calculates the average value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`count`](#count-stats) calculates the number of log entries. - [`count_empty`](#count_empty-stats) calculates the number logs with empty [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). +- [`count_uniq`](#count_uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). -- [`uniq_count`](#uniq_count-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`uniq_values`](#uniq_values-stats) returns unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). ### avg stats @@ -1349,22 +1349,6 @@ See also: - [`sum`](#sum-stats) - [`count`](#count-stats) -### count_empty stats - -`count_empty(field1, ..., fieldN)` calculates the number of logs with empty `(field1, ..., fieldN)` tuples. - -For example, the following query calculates the number of logs with empty `username` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) -during the last 5 minutes: - -```logsql -_time:5m | stats count_empty(username) logs_with_missing_username -``` - -See also: - -- [`count`](#count-stats) -- [`uniq_count`](#uniq_count-stats) - ### count stats `count()` calculates the number of selected logs. @@ -1392,10 +1376,49 @@ _time:5m | stats count(username, password) logs_with_username_or_password See also: -- [`uniq_count`](#uniq_count-stats) +- [`count_uniq`](#count_uniq-stats) - [`sum`](#sum-stats) - [`avg`](#avg-stats) +### count_empty stats + +`count_empty(field1, ..., fieldN)` calculates the number of logs with empty `(field1, ..., fieldN)` tuples. + +For example, the following query calculates the number of logs with empty `username` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) +during the last 5 minutes: + +```logsql +_time:5m | stats count_empty(username) logs_with_missing_username +``` + +See also: + +- [`count`](#count-stats) +- [`count_uniq`](#count_uniq-stats) + +### count_uniq stats + +`count_uniq(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the number of unique non-empty `(field1, ..., fieldN)` tuples. + +For example, the following query returns the number of unique non-empty values for `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) +over the last 5 minutes: + +```logsql +_time:5m | stats count_uniq(ip) ips +``` + +The following query returns the number of unique `(host, path)` pairs for the corresponding [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) +over the last 5 minutes: + +```logsql +_time:5m | stats count_uniq(host, path) unique_host_path_pairs +``` + +See also: + +- [`uniq_values`](#uniq_values-stats) +- [`count`](#count-stats) + ### max stats `max(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the maximum value across @@ -1455,29 +1478,6 @@ See also: - [`max`](#max-stats) - [`min`](#min-stats) -### uniq_count stats - -`uniq_count(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the number of unique non-empty `(field1, ..., fieldN)` tuples. - -For example, the following query returns the number of unique non-empty values for `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) -over the last 5 minutes: - -```logsql -_time:5m | stats uniq_count(ip) ips -``` - -The following query returns the number of unique `(host, path)` pairs for the corresponding [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) -over the last 5 minutes: - -```logsql -_time:5m | stats uniq_count(host, path) unique_host_path_pairs -``` - -See also: - -- [`uniq_values`](#uniq_values-stats) -- [`count`](#count-stats) - ### uniq_values stats `uniq_values(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the unique non-empty values across @@ -1493,7 +1493,7 @@ _time:5m | stats uniq_values(ip) unique_ips See also: -- [`uniq_count`](#uniq_count-stats) +- [`count_uniq`](#count_uniq-stats) - [`count`](#count-stats) ## Stream context diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index c35f44ecf..bc40c3326 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -904,12 +904,12 @@ func TestParseQuerySuccess(t *testing.T) { f(`* | stats avg(*) x`, `* | stats avg(*) as x`) f(`* | stats avg(foo,*,bar) x`, `* | stats avg(*) as x`) - // stats pipe uniq_count - f(`* | stats uniq_count(foo) bar`, `* | stats uniq_count(foo) as bar`) - f(`* | stats by(x, y) uniq_count(foo,bar) as baz`, `* | stats by (x, y) uniq_count(foo, bar) as baz`) - f(`* | stats by(x) uniq_count(*) z`, `* | stats by (x) uniq_count(*) as z`) - f(`* | stats by(x) uniq_count() z`, `* | stats by (x) uniq_count(*) as z`) - f(`* | stats by(x) uniq_count(a,*,b) z`, `* | stats by (x) uniq_count(*) as z`) + // stats pipe count_uniq + f(`* | stats count_uniq(foo) bar`, `* | stats count_uniq(foo) as bar`) + f(`* | stats by(x, y) count_uniq(foo,bar) as baz`, `* | stats by (x, y) count_uniq(foo, bar) as baz`) + f(`* | stats by(x) count_uniq(*) z`, `* | stats by (x) count_uniq(*) as z`) + f(`* | stats by(x) count_uniq() z`, `* | stats by (x) count_uniq(*) as z`) + f(`* | stats by(x) count_uniq(a,*,b) z`, `* | stats by (x) count_uniq(*) as z`) // stats pipe uniq_values f(`* | stats uniq_values(foo) bar`, `* | stats uniq_values(foo) as bar`) @@ -919,8 +919,8 @@ func TestParseQuerySuccess(t *testing.T) { f(`* | stats by(x) uniq_values(a,*,b) y`, `* | stats by (x) uniq_values(*) as y`) // stats pipe multiple funcs - f(`* | stats count() "foo.bar:baz", uniq_count(a) bar`, `* | stats count(*) as "foo.bar:baz", uniq_count(a) as bar`) - f(`* | stats by (x, y) count(*) foo, uniq_count(a,b) bar`, `* | stats by (x, y) count(*) as foo, uniq_count(a, b) as bar`) + f(`* | stats count() "foo.bar:baz", count_uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", count_uniq(a) as bar`) + f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`) // stats pipe with grouping buckets f(`* | stats by(_time:1d, response_size:1_000KiB, request_duration:5s, foo) count() as foo`, `* | stats by (_time:1d, response_size:1_000KiB, request_duration:5s, foo) count(*) as foo`) @@ -1224,9 +1224,9 @@ func TestParseQueryFailure(t *testing.T) { f(`foo | stats avg`) f(`foo | stats avg()`) - // invalid stats uniq_count - f(`foo | stats uniq_count`) - f(`foo | stats uniq_count()`) + // invalid stats count_uniq + f(`foo | stats count_uniq`) + f(`foo | stats count_uniq()`) // invalid stats uniq_values f(`foo | stats uniq_values`) diff --git a/lib/logstorage/pipe_stats.go b/lib/logstorage/pipe_stats.go index 3b48af883..78c56e782 100644 --- a/lib/logstorage/pipe_stats.go +++ b/lib/logstorage/pipe_stats.go @@ -476,10 +476,10 @@ func parseStatsFunc(lex *lexer) (statsFunc, string, error) { return nil, "", fmt.Errorf("cannot parse 'count_empty' func: %w", err) } sf = scs - case lex.isKeyword("uniq_count"): - sus, err := parseStatsUniqCount(lex) + case lex.isKeyword("count_uniq"): + sus, err := parseStatsCountUniq(lex) if err != nil { - return nil, "", fmt.Errorf("cannot parse 'uniq_count' func: %w", err) + return nil, "", fmt.Errorf("cannot parse 'count_uniq' func: %w", err) } sf = sus case lex.isKeyword("sum"): diff --git a/lib/logstorage/stats_uniq_count.go b/lib/logstorage/stats_count_uniq.go similarity index 91% rename from lib/logstorage/stats_uniq_count.go rename to lib/logstorage/stats_count_uniq.go index 1889d11d0..ca5f9b0cf 100644 --- a/lib/logstorage/stats_uniq_count.go +++ b/lib/logstorage/stats_count_uniq.go @@ -9,21 +9,21 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" ) -type statsUniqCount struct { +type statsCountUniq struct { fields []string containsStar bool } -func (su *statsUniqCount) String() string { - return "uniq_count(" + fieldNamesString(su.fields) + ")" +func (su *statsCountUniq) String() string { + return "count_uniq(" + fieldNamesString(su.fields) + ")" } -func (su *statsUniqCount) neededFields() []string { +func (su *statsCountUniq) neededFields() []string { return su.fields } -func (su *statsUniqCount) newStatsProcessor() (statsProcessor, int) { - sup := &statsUniqCountProcessor{ +func (su *statsCountUniq) newStatsProcessor() (statsProcessor, int) { + sup := &statsCountUniqProcessor{ su: su, m: make(map[string]struct{}), @@ -31,8 +31,8 @@ func (su *statsUniqCount) newStatsProcessor() (statsProcessor, int) { return sup, int(unsafe.Sizeof(*sup)) } -type statsUniqCountProcessor struct { - su *statsUniqCount +type statsCountUniqProcessor struct { + su *statsCountUniq m map[string]struct{} @@ -40,7 +40,7 @@ type statsUniqCountProcessor struct { keyBuf []byte } -func (sup *statsUniqCountProcessor) updateStatsForAllRows(br *blockResult) int { +func (sup *statsCountUniqProcessor) updateStatsForAllRows(br *blockResult) int { fields := sup.su.fields m := sup.m @@ -215,7 +215,7 @@ func (sup *statsUniqCountProcessor) updateStatsForAllRows(br *blockResult) int { return stateSizeIncrease } -func (sup *statsUniqCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) int { +func (sup *statsCountUniqProcessor) updateStatsForRow(br *blockResult, rowIdx int) int { fields := sup.su.fields m := sup.m @@ -339,8 +339,8 @@ func (sup *statsUniqCountProcessor) updateStatsForRow(br *blockResult, rowIdx in return stateSizeIncrease } -func (sup *statsUniqCountProcessor) mergeState(sfp statsProcessor) { - src := sfp.(*statsUniqCountProcessor) +func (sup *statsCountUniqProcessor) mergeState(sfp statsProcessor) { + src := sfp.(*statsCountUniqProcessor) m := sup.m for k := range src.m { if _, ok := m[k]; !ok { @@ -349,17 +349,17 @@ func (sup *statsUniqCountProcessor) mergeState(sfp statsProcessor) { } } -func (sup *statsUniqCountProcessor) finalizeStats() string { +func (sup *statsCountUniqProcessor) finalizeStats() string { n := uint64(len(sup.m)) return strconv.FormatUint(n, 10) } -func parseStatsUniqCount(lex *lexer) (*statsUniqCount, error) { - fields, err := parseFieldNamesForStatsFunc(lex, "uniq_count") +func parseStatsCountUniq(lex *lexer) (*statsCountUniq, error) { + fields, err := parseFieldNamesForStatsFunc(lex, "count_uniq") if err != nil { return nil, err } - su := &statsUniqCount{ + su := &statsCountUniq{ fields: fields, containsStar: slices.Contains(fields, "*"), }