From a42a87319dfadf0c900c6f403ba8b559bcbc1d87 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 21 May 2024 10:53:32 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/LogsQL.md | 6 +- lib/logstorage/pipe_stats_test.go | 88 +++++++++++++++++++++++++++++ lib/logstorage/stats_uniq_values.go | 13 +++++ 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index d9d6ad219..fda35eca6 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -1865,7 +1865,7 @@ See also: `uniq_values(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the unique non-empty values across the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). -The returned values are encoded in JSON array. The order of the returned values is arbitrary. +The returned values are encoded in sorted JSON array. For example, the following query returns unique non-empty values for the `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) over logs for the last 5 minutes: @@ -1878,12 +1878,14 @@ Every unique value is stored in memory during query execution. Big number of uni only a subset of unique values. In this case add `limit N` after `uniq_values(...)` in order to limit the number of returned unique values to `N`, while limiting the maximum memory usage. For example, the following query returns up to `100` unique values for the `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) -over the logs for the last 5 minutes. Note that arbitrary subset of unique `ip` values is returned every time: +over the logs for the last 5 minutes: ```logsql _time:5m | stats uniq_values(ip) limit 100 as unique_ips_100 ``` +Arbitrary subset of unique `ip` values is returned every time if the `limit` is reached. + See also: - [`uniq` pipe](#uniq-pipe) diff --git a/lib/logstorage/pipe_stats_test.go b/lib/logstorage/pipe_stats_test.go index d2adf9603..8b57bf878 100644 --- a/lib/logstorage/pipe_stats_test.go +++ b/lib/logstorage/pipe_stats_test.go @@ -81,6 +81,94 @@ func TestPipeStats(t *testing.T) { }, }) + f("stats uniq_values(*) as values", [][]Field{ + { + {"_msg", `abc`}, + {"a", `2`}, + {"b", `3`}, + }, + { + {"_msg", `def`}, + {"a", `1`}, + }, + {}, + { + {"a", `2`}, + {"b", `54`}, + }, + {}, + }, [][]Field{ + { + {"values", `["1","2","3","54","abc","def"]`}, + }, + }) + + f("stats uniq_values(*) limit 6 as values", [][]Field{ + { + {"_msg", `abc`}, + {"a", `2`}, + {"b", `3`}, + }, + { + {"_msg", `def`}, + {"a", `1`}, + }, + {}, + { + {"a", `2`}, + {"b", `54`}, + }, + {}, + }, [][]Field{ + { + {"values", `["1","2","3","54","abc","def"]`}, + }, + }) + + f("stats uniq_values(a) as values", [][]Field{ + { + {"_msg", `abc`}, + {"a", `2`}, + {"b", `3`}, + }, + { + {"_msg", `def`}, + {"a", `1`}, + }, + {}, + { + {"a", `2`}, + {"b", `54`}, + }, + {}, + }, [][]Field{ + { + {"values", `["1","2"]`}, + }, + }) + + f("stats uniq_values(a, b, c) as values", [][]Field{ + { + {"_msg", `abc`}, + {"a", `2`}, + {"b", `3`}, + }, + { + {"_msg", `def`}, + {"a", `1`}, + }, + {}, + { + {"a", `2`}, + {"b", `54`}, + }, + {}, + }, [][]Field{ + { + {"values", `["1","2","3","54"]`}, + }, + }) + f("stats count_empty(*) as rows", [][]Field{ { {"_msg", `abc`}, diff --git a/lib/logstorage/stats_uniq_values.go b/lib/logstorage/stats_uniq_values.go index 4942f1213..0d2778798 100644 --- a/lib/logstorage/stats_uniq_values.go +++ b/lib/logstorage/stats_uniq_values.go @@ -207,6 +207,7 @@ func (sup *statsUniqValuesProcessor) finalizeStats() string { for k := range sup.m { items = append(items, k) } + sortStrings(items) if limit := sup.su.limit; limit > 0 && uint64(len(items)) > limit { items = items[:limit] @@ -215,6 +216,18 @@ func (sup *statsUniqValuesProcessor) finalizeStats() string { return marshalJSONArray(items) } +func sortStrings(a []string) { + slices.SortFunc(a, func(x, y string) int { + if x == y { + return 0 + } + if lessString(x, y) { + return -1 + } + return 1 + }) +} + func (sup *statsUniqValuesProcessor) limitReached() bool { limit := sup.su.limit return limit > 0 && uint64(len(sup.m)) >= limit