From b91501cfa86a01a9e3b87e74db63f63755144922 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 7 May 2024 23:44:12 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/LogsQL.md | 24 +++++++------- lib/logstorage/parser_test.go | 22 ++++++------- lib/logstorage/pipe_stats.go | 6 ++-- .../{stats_uniq.go => stats_uniq_count.go} | 32 +++++++++---------- 4 files changed, 42 insertions(+), 42 deletions(-) rename lib/logstorage/{stats_uniq.go => stats_uniq_count.go} (91%) diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index af800a98b..8c37682cf 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -1230,10 +1230,10 @@ to store the result of the corresponding stats function. The `as` keyword is opt For example, the following query calculates the following stats for logs over the last 5 minutes: - the number of logs with the help of [`count` stats function](#count-stats); -- the number of unique [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) with the help of [`uniq` stats function](#uniq-stats): +- the number of unique [log streams](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) with the help of [`uniq_count` stats function](#uniq_count-stats): ```logsql -_time:5m | stats count() logs_total, uniq(_stream) streams_total +_time:5m | stats count() logs_total, uniq_count(_stream) streams_total ``` See also: @@ -1258,7 +1258,7 @@ For example, the following query calculates the number of logs and unique ip add grouped by `(host, path)` fields: ```logsql -_time:5m | stats by (host, path) count() logs_total, uniq(ip) ips_total +_time:5m | stats by (host, path) count() logs_total, uniq_count(ip) ips_total ``` #### Stats by time buckets @@ -1277,7 +1277,7 @@ The `step` can have any [duration value](#duration-values). For example, the fol over the last 5 minutes: ``` -_time:5m | stats by (_time:1m) count() logs_total, uniq(ip) ips_total +_time:5m | stats by (_time:1m) count() logs_total, uniq_count(ip) ips_total ``` #### Stats by time buckets with timezone offset @@ -1326,7 +1326,7 @@ LogsQL supports the following functions for [`stats` pipe](#stats-pipe): - [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). -- [`uniq`](#uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). +- [`uniq_count`](#uniq_count-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`uniq_values`](#uniq_values-stats) returns unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). ### avg stats @@ -1363,7 +1363,7 @@ _time:5m | stats count_empty(username) logs_with_missing_username See also: - [`count`](#count-stats) -- [`uniq`](#uniq-stats) +- [`uniq_count`](#uniq_count-stats) ### count stats @@ -1392,7 +1392,7 @@ _time:5m | stats count(username, password) logs_with_username_or_password See also: -- [`uniq`](#uniq-stats) +- [`uniq_count`](#uniq_count-stats) - [`sum`](#sum-stats) - [`avg`](#avg-stats) @@ -1455,22 +1455,22 @@ See also: - [`max`](#max-stats) - [`min`](#min-stats) -### uniq stats +### uniq_count stats -`uniq(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the number of unique non-empty `(field1, ..., fieldN)` tuples. +`uniq_count(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the number of unique non-empty `(field1, ..., fieldN)` tuples. For example, the following query returns the number of unique non-empty values for `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) over the last 5 minutes: ```logsql -_time:5m | stats uniq(ip) ips +_time:5m | stats uniq_count(ip) ips ``` The following query returns the number of unique `(host, path)` pairs for the corresponding [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) over the last 5 minutes: ```logsql -_time:5m | stats uniq(host, path) unique_host_path_pairs +_time:5m | stats uniq_count(host, path) unique_host_path_pairs ``` See also: @@ -1493,7 +1493,7 @@ _time:5m | stats uniq_values(ip) unique_ips See also: -- [`uniq`](#uniq-stats) +- [`uniq_count`](#uniq_count-stats) - [`count`](#count-stats) ## Stream context diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index 06658344a..c35f44ecf 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -904,12 +904,12 @@ func TestParseQuerySuccess(t *testing.T) { f(`* | stats avg(*) x`, `* | stats avg(*) as x`) f(`* | stats avg(foo,*,bar) x`, `* | stats avg(*) as x`) - // stats pipe uniq - f(`* | stats uniq(foo) bar`, `* | stats uniq(foo) as bar`) - f(`* | stats by(x, y) uniq(foo,bar) as baz`, `* | stats by (x, y) uniq(foo, bar) as baz`) - f(`* | stats by(x) uniq(*) z`, `* | stats by (x) uniq(*) as z`) - f(`* | stats by(x) uniq() z`, `* | stats by (x) uniq(*) as z`) - f(`* | stats by(x) uniq(a,*,b) z`, `* | stats by (x) uniq(*) as z`) + // stats pipe uniq_count + f(`* | stats uniq_count(foo) bar`, `* | stats uniq_count(foo) as bar`) + f(`* | stats by(x, y) uniq_count(foo,bar) as baz`, `* | stats by (x, y) uniq_count(foo, bar) as baz`) + f(`* | stats by(x) uniq_count(*) z`, `* | stats by (x) uniq_count(*) as z`) + f(`* | stats by(x) uniq_count() z`, `* | stats by (x) uniq_count(*) as z`) + f(`* | stats by(x) uniq_count(a,*,b) z`, `* | stats by (x) uniq_count(*) as z`) // stats pipe uniq_values f(`* | stats uniq_values(foo) bar`, `* | stats uniq_values(foo) as bar`) @@ -919,8 +919,8 @@ func TestParseQuerySuccess(t *testing.T) { f(`* | stats by(x) uniq_values(a,*,b) y`, `* | stats by (x) uniq_values(*) as y`) // stats pipe multiple funcs - f(`* | stats count() "foo.bar:baz", uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", uniq(a) as bar`) - f(`* | stats by (x, y) count(*) foo, uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, uniq(a, b) as bar`) + f(`* | stats count() "foo.bar:baz", uniq_count(a) bar`, `* | stats count(*) as "foo.bar:baz", uniq_count(a) as bar`) + f(`* | stats by (x, y) count(*) foo, uniq_count(a,b) bar`, `* | stats by (x, y) count(*) as foo, uniq_count(a, b) as bar`) // stats pipe with grouping buckets f(`* | stats by(_time:1d, response_size:1_000KiB, request_duration:5s, foo) count() as foo`, `* | stats by (_time:1d, response_size:1_000KiB, request_duration:5s, foo) count(*) as foo`) @@ -1224,9 +1224,9 @@ func TestParseQueryFailure(t *testing.T) { f(`foo | stats avg`) f(`foo | stats avg()`) - // invalid stats uniq - f(`foo | stats uniq`) - f(`foo | stats uniq()`) + // invalid stats uniq_count + f(`foo | stats uniq_count`) + f(`foo | stats uniq_count()`) // invalid stats uniq_values f(`foo | stats uniq_values`) diff --git a/lib/logstorage/pipe_stats.go b/lib/logstorage/pipe_stats.go index 0288ac2ac..3b48af883 100644 --- a/lib/logstorage/pipe_stats.go +++ b/lib/logstorage/pipe_stats.go @@ -476,10 +476,10 @@ func parseStatsFunc(lex *lexer) (statsFunc, string, error) { return nil, "", fmt.Errorf("cannot parse 'count_empty' func: %w", err) } sf = scs - case lex.isKeyword("uniq"): - sus, err := parseStatsUniq(lex) + case lex.isKeyword("uniq_count"): + sus, err := parseStatsUniqCount(lex) if err != nil { - return nil, "", fmt.Errorf("cannot parse 'uniq' func: %w", err) + return nil, "", fmt.Errorf("cannot parse 'uniq_count' func: %w", err) } sf = sus case lex.isKeyword("sum"): diff --git a/lib/logstorage/stats_uniq.go b/lib/logstorage/stats_uniq_count.go similarity index 91% rename from lib/logstorage/stats_uniq.go rename to lib/logstorage/stats_uniq_count.go index 83d0e500e..1889d11d0 100644 --- a/lib/logstorage/stats_uniq.go +++ b/lib/logstorage/stats_uniq_count.go @@ -9,21 +9,21 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" ) -type statsUniq struct { +type statsUniqCount struct { fields []string containsStar bool } -func (su *statsUniq) String() string { - return "uniq(" + fieldNamesString(su.fields) + ")" +func (su *statsUniqCount) String() string { + return "uniq_count(" + fieldNamesString(su.fields) + ")" } -func (su *statsUniq) neededFields() []string { +func (su *statsUniqCount) neededFields() []string { return su.fields } -func (su *statsUniq) newStatsProcessor() (statsProcessor, int) { - sup := &statsUniqProcessor{ +func (su *statsUniqCount) newStatsProcessor() (statsProcessor, int) { + sup := &statsUniqCountProcessor{ su: su, m: make(map[string]struct{}), @@ -31,8 +31,8 @@ func (su *statsUniq) newStatsProcessor() (statsProcessor, int) { return sup, int(unsafe.Sizeof(*sup)) } -type statsUniqProcessor struct { - su *statsUniq +type statsUniqCountProcessor struct { + su *statsUniqCount m map[string]struct{} @@ -40,7 +40,7 @@ type statsUniqProcessor struct { keyBuf []byte } -func (sup *statsUniqProcessor) updateStatsForAllRows(br *blockResult) int { +func (sup *statsUniqCountProcessor) updateStatsForAllRows(br *blockResult) int { fields := sup.su.fields m := sup.m @@ -215,7 +215,7 @@ func (sup *statsUniqProcessor) updateStatsForAllRows(br *blockResult) int { return stateSizeIncrease } -func (sup *statsUniqProcessor) updateStatsForRow(br *blockResult, rowIdx int) int { +func (sup *statsUniqCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) int { fields := sup.su.fields m := sup.m @@ -339,8 +339,8 @@ func (sup *statsUniqProcessor) updateStatsForRow(br *blockResult, rowIdx int) in return stateSizeIncrease } -func (sup *statsUniqProcessor) mergeState(sfp statsProcessor) { - src := sfp.(*statsUniqProcessor) +func (sup *statsUniqCountProcessor) mergeState(sfp statsProcessor) { + src := sfp.(*statsUniqCountProcessor) m := sup.m for k := range src.m { if _, ok := m[k]; !ok { @@ -349,17 +349,17 @@ func (sup *statsUniqProcessor) mergeState(sfp statsProcessor) { } } -func (sup *statsUniqProcessor) finalizeStats() string { +func (sup *statsUniqCountProcessor) finalizeStats() string { n := uint64(len(sup.m)) return strconv.FormatUint(n, 10) } -func parseStatsUniq(lex *lexer) (*statsUniq, error) { - fields, err := parseFieldNamesForStatsFunc(lex, "uniq") +func parseStatsUniqCount(lex *lexer) (*statsUniqCount, error) { + fields, err := parseFieldNamesForStatsFunc(lex, "uniq_count") if err != nil { return nil, err } - su := &statsUniq{ + su := &statsUniqCount{ fields: fields, containsStar: slices.Contains(fields, "*"), }