This commit is contained in:
Aliaksandr Valialkin 2024-05-21 10:53:32 +02:00
parent b593065865
commit a42a87319d
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
3 changed files with 105 additions and 2 deletions

View file

@ -1865,7 +1865,7 @@ See also:
`uniq_values(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the unique non-empty values across
the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
The returned values are encoded in JSON array. The order of the returned values is arbitrary.
The returned values are encoded in sorted JSON array.
For example, the following query returns unique non-empty values for the `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
@ -1878,12 +1878,14 @@ Every unique value is stored in memory during query execution. Big number of uni
only a subset of unique values. In this case add `limit N` after `uniq_values(...)` in order to limit the number of returned unique values to `N`,
while limiting the maximum memory usage.
For example, the following query returns up to `100` unique values for the `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over the logs for the last 5 minutes. Note that arbitrary subset of unique `ip` values is returned every time:
over the logs for the last 5 minutes:
```logsql
_time:5m | stats uniq_values(ip) limit 100 as unique_ips_100
```
Arbitrary subset of unique `ip` values is returned every time if the `limit` is reached.
See also:
- [`uniq` pipe](#uniq-pipe)

View file

@ -81,6 +81,94 @@ func TestPipeStats(t *testing.T) {
},
})
f("stats uniq_values(*) as values", [][]Field{
{
{"_msg", `abc`},
{"a", `2`},
{"b", `3`},
},
{
{"_msg", `def`},
{"a", `1`},
},
{},
{
{"a", `2`},
{"b", `54`},
},
{},
}, [][]Field{
{
{"values", `["1","2","3","54","abc","def"]`},
},
})
f("stats uniq_values(*) limit 6 as values", [][]Field{
{
{"_msg", `abc`},
{"a", `2`},
{"b", `3`},
},
{
{"_msg", `def`},
{"a", `1`},
},
{},
{
{"a", `2`},
{"b", `54`},
},
{},
}, [][]Field{
{
{"values", `["1","2","3","54","abc","def"]`},
},
})
f("stats uniq_values(a) as values", [][]Field{
{
{"_msg", `abc`},
{"a", `2`},
{"b", `3`},
},
{
{"_msg", `def`},
{"a", `1`},
},
{},
{
{"a", `2`},
{"b", `54`},
},
{},
}, [][]Field{
{
{"values", `["1","2"]`},
},
})
f("stats uniq_values(a, b, c) as values", [][]Field{
{
{"_msg", `abc`},
{"a", `2`},
{"b", `3`},
},
{
{"_msg", `def`},
{"a", `1`},
},
{},
{
{"a", `2`},
{"b", `54`},
},
{},
}, [][]Field{
{
{"values", `["1","2","3","54"]`},
},
})
f("stats count_empty(*) as rows", [][]Field{
{
{"_msg", `abc`},

View file

@ -207,6 +207,7 @@ func (sup *statsUniqValuesProcessor) finalizeStats() string {
for k := range sup.m {
items = append(items, k)
}
sortStrings(items)
if limit := sup.su.limit; limit > 0 && uint64(len(items)) > limit {
items = items[:limit]
@ -215,6 +216,18 @@ func (sup *statsUniqValuesProcessor) finalizeStats() string {
return marshalJSONArray(items)
}
func sortStrings(a []string) {
slices.SortFunc(a, func(x, y string) int {
if x == y {
return 0
}
if lessString(x, y) {
return -1
}
return 1
})
}
func (sup *statsUniqValuesProcessor) limitReached() bool {
limit := sup.su.limit
return limit > 0 && uint64(len(sup.m)) >= limit