From c2050495c4383b6a34f0c511bf7924df652a197e Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 25 May 2024 10:51:44 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/LogsQL.md | 4 ++-- lib/logstorage/pipe_uniq.go | 8 +++++++- lib/logstorage/pipe_uniq_test.go | 11 +++++++---- lib/logstorage/storage_search.go | 2 +- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index 508b0b221..dfaee181e 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -1720,10 +1720,10 @@ _time:5m | uniq by (host, path) The unique entries are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed. -Add `hits` after `uniq by (...)` in order to return the number of matching logs per each field value: +Add `with hits` after `uniq by (...)` in order to return the number of matching logs per each field value: ```logsql -_time:5m | uniq by (host) hits +_time:5m | uniq by (host) with hits ``` Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory. diff --git a/lib/logstorage/pipe_uniq.go b/lib/logstorage/pipe_uniq.go index ab8584d57..794a25d8e 100644 --- a/lib/logstorage/pipe_uniq.go +++ b/lib/logstorage/pipe_uniq.go @@ -32,7 +32,7 @@ func (pu *pipeUniq) String() string { s += " by (" + fieldNamesString(pu.byFields) + ")" } if pu.hitsFieldName != "" { - s += " hits" + s += " with hits" } if pu.limit > 0 { s += fmt.Sprintf(" limit %d", pu.limit) @@ -477,6 +477,12 @@ func parsePipeUniq(lex *lexer) (*pipeUniq, error) { pu.byFields = bfs } + if lex.isKeyword("with") { + lex.nextToken() + if !lex.isKeyword("hits") { + return nil, fmt.Errorf("missing 'hits' after 'with'") + } + } if lex.isKeyword("hits") { lex.nextToken() hitsFieldName := "hits" diff --git a/lib/logstorage/pipe_uniq_test.go b/lib/logstorage/pipe_uniq_test.go index 68e8f0042..3c1cb7372 100644 --- a/lib/logstorage/pipe_uniq_test.go +++ b/lib/logstorage/pipe_uniq_test.go @@ -11,15 +11,15 @@ func TestParsePipeUniqSuccess(t *testing.T) { } f(`uniq`) - f(`uniq hits`) + f(`uniq with hits`) f(`uniq limit 10`) - f(`uniq hits limit 10`) + f(`uniq with hits limit 10`) f(`uniq by (x)`) f(`uniq by (x) limit 10`) f(`uniq by (x, y)`) - f(`uniq by (x, y) hits`) + f(`uniq by (x, y) with hits`) f(`uniq by (x, y) limit 10`) - f(`uniq by (x, y) hits limit 10`) + f(`uniq by (x, y) with hits limit 10`) } func TestParsePipeUniqFailure(t *testing.T) { @@ -33,6 +33,7 @@ func TestParsePipeUniqFailure(t *testing.T) { f(`uniq by hits`) f(`uniq by(x) limit`) f(`uniq by(x) limit foo`) + f(`uniq by (x) with`) } func TestPipeUniq(t *testing.T) { @@ -365,10 +366,12 @@ func TestPipeUniqUpdateNeededFields(t *testing.T) { f("uniq by()", "*", "", "*", "") f("uniq by(*)", "*", "", "*", "") f("uniq by(f1,f2)", "*", "", "f1,f2", "") + f("uniq by(f1,f2) with hits", "*", "", "f1,f2", "") // all the needed fields, unneeded fields do not intersect with src f("uniq by(s1, s2)", "*", "f1,f2", "s1,s2", "") f("uniq", "*", "f1,f2", "*", "") + f("uniq with hits", "*", "f1,f2", "*", "") // all the needed fields, unneeded fields intersect with src f("uniq by(s1, s2)", "*", "s1,f1,f2", "s1,s2", "") diff --git a/lib/logstorage/storage_search.go b/lib/logstorage/storage_search.go index de754582d..95ea35af6 100644 --- a/lib/logstorage/storage_search.go +++ b/lib/logstorage/storage_search.go @@ -229,7 +229,7 @@ func (s *Storage) getFieldValuesNoHits(ctx context.Context, tenantIDs []TenantID func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) { pipes := append([]pipe{}, q.pipes...) quotedFieldName := quoteTokenIfNeeded(fieldName) - pipeStr := fmt.Sprintf("uniq by (%s) hits limit %d", quotedFieldName, limit) + pipeStr := fmt.Sprintf("uniq by (%s) with hits limit %d", quotedFieldName, limit) lex := newLexer(pipeStr) pu, err := parsePipeUniq(lex)