From 12fe2b265ca1af18cf070b4acc0d0df43a48a47d Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 15 May 2024 03:55:46 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/LogsQL.md | 10 ++++++ lib/logstorage/parser_test.go | 11 +++++++ lib/logstorage/pipe_sort.go | 57 +++++++++++++++++++++++++++-------- lib/logstorage/pipe_topk.go | 16 +++++++--- 4 files changed, 77 insertions(+), 17 deletions(-) diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index fdded45c8..62b6a4979 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -1128,6 +1128,7 @@ By default rows are selected in arbitrary order because of performance reasons, See also: +- [`sort` pipe](#sort-pipe) - [`offset` pipe](#offset-pipe) ### offset pipe @@ -1147,6 +1148,7 @@ Rows can be sorted with [`sort` pipe](#sort-pipe). See also: - [`limit` pipe](#limit-pipe) +- [`sort` pipe](#sort-pipe) ### rename pipe @@ -1208,6 +1210,14 @@ for the `request_duration` [field](https://docs.victoriametrics.com/VictoriaLogs _time:1h | sort by (request_duration desc) limit 10 ``` +If the first `N` sorted results must be skipped, then `offset N` can be added to `sort` pipe. For example, +the following query skips the first 10 logs with the biggest `request_duration` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model), +and then returns the next 20 sorted logs for the last 5 minutes: + +```logsql +_time:1h | sort by (request_duration desc) offset 10 limit 20 +``` + Note that sorting of big number of logs can be slow and can consume a lot of additional memory. It is recommended limiting the number of logs before sorting with the following approaches: diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index 5e7a15c61..1f0289e58 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -975,9 +975,14 @@ func TestParseQuerySuccess(t *testing.T) { f(`* | sORt bY (_time, _stream DEsc, host)`, `* | sort by (_time, _stream desc, host)`) f(`* | sort bY (foo desc, bar,) desc`, `* | sort by (foo desc, bar) desc`) f(`* | sort limit 10`, `* | sort limit 10`) + f(`* | sort offset 20 limit 10`, `* | sort offset 20 limit 10`) f(`* | sort desc limit 10`, `* | sort desc limit 10`) + f(`* | sort desc offset 20 limit 10`, `* | sort desc offset 20 limit 10`) f(`* | sort by (foo desc, bar) limit 10`, `* | sort by (foo desc, bar) limit 10`) + f(`* | sort by (foo desc, bar) oFFset 20 limit 10`, `* | sort by (foo desc, bar) offset 20 limit 10`) f(`* | sort by (foo desc, bar) desc limit 10`, `* | sort by (foo desc, bar) desc limit 10`) + f(`* | sort by (foo desc, bar) desc OFFSET 30 limit 10`, `* | sort by (foo desc, bar) desc offset 30 limit 10`) + f(`* | sort by (foo desc, bar) desc limit 10 OFFSET 30`, `* | sort by (foo desc, bar) desc offset 30 limit 10`) // uniq pipe f(`* | uniq`, `* | uniq`) @@ -1338,6 +1343,12 @@ func TestParseQueryFailure(t *testing.T) { f(`foo | sort by(bar) limit foo`) f(`foo | sort by(bar) limit -1234`) f(`foo | sort by(bar) limit 12.34`) + f(`foo | sort by(bar) limit 10 limit 20`) + f(`foo | sort by(bar) offset`) + f(`foo | sort by(bar) offset limit`) + f(`foo | sort by(bar) offset -1234`) + f(`foo | sort by(bar) offset 12.34`) + f(`foo | sort by(bar) offset 10 offset 20`) // invalid uniq pipe f(`foo | uniq bar`) diff --git a/lib/logstorage/pipe_sort.go b/lib/logstorage/pipe_sort.go index 16aded0da..4787b56bb 100644 --- a/lib/logstorage/pipe_sort.go +++ b/lib/logstorage/pipe_sort.go @@ -26,6 +26,9 @@ type pipeSort struct { // whether to apply descending order isDesc bool + // how many results to skip + offset uint64 + // how many results to return // // if zero, then all the results are returned @@ -44,6 +47,9 @@ func (ps *pipeSort) String() string { if ps.isDesc { s += " desc" } + if ps.offset > 0 { + s += fmt.Sprintf(" offset %d", ps.offset) + } if ps.limit > 0 { s += fmt.Sprintf(" limit %d", ps.limit) } @@ -470,16 +476,25 @@ type pipeSortWriteContext struct { rcs []resultColumn br blockResult - valuesLen int + rowsWritten uint64 + valuesLen int } func (wctx *pipeSortWriteContext) writeNextRow(shard *pipeSortProcessorShard) { + ps := shard.ps + rowIdx := shard.rowRefNext shard.rowRefNext++ + + wctx.rowsWritten++ + if wctx.rowsWritten <= ps.offset { + return + } + rr := shard.rowRefs[rowIdx] b := &shard.blocks[rr.blockIdx] - byFields := shard.ps.byFields + byFields := ps.byFields rcs := wctx.rcs areEqualColumns := len(rcs) == len(byFields)+len(b.otherColumns) @@ -688,18 +703,36 @@ func parsePipeSort(lex *lexer) (*pipeSort, error) { ps.isDesc = true } - switch { - case lex.isKeyword("limit"): - lex.nextToken() - n, ok := tryParseUint64(lex.token) - lex.nextToken() - if !ok { - return nil, fmt.Errorf("cannot parse 'limit %s'", lex.token) + for { + switch { + case lex.isKeyword("offset"): + lex.nextToken() + s := lex.token + n, ok := tryParseUint64(s) + lex.nextToken() + if !ok { + return nil, fmt.Errorf("cannot parse 'offset %s'", s) + } + if ps.offset > 0 { + return nil, fmt.Errorf("duplicate 'offset'; the previous one is %d; the new one is %s", ps.offset, s) + } + ps.offset = n + case lex.isKeyword("limit"): + lex.nextToken() + s := lex.token + n, ok := tryParseUint64(s) + lex.nextToken() + if !ok { + return nil, fmt.Errorf("cannot parse 'limit %s'", s) + } + if ps.limit > 0 { + return nil, fmt.Errorf("duplicate 'limit'; the previous one is %d; the new one is %s", ps.limit, s) + } + ps.limit = n + default: + return &ps, nil } - ps.limit = n } - - return &ps, nil } // bySortField represents 'by (...)' part of the pipeSort. diff --git a/lib/logstorage/pipe_topk.go b/lib/logstorage/pipe_topk.go index 1b2018c4e..42b18df6a 100644 --- a/lib/logstorage/pipe_topk.go +++ b/lib/logstorage/pipe_topk.go @@ -410,16 +410,22 @@ type pipeTopkWriteContext struct { } func (wctx *pipeTopkWriteContext) writeNextRow(shard *pipeTopkProcessorShard) bool { - if wctx.rowsWritten >= wctx.ptp.ps.limit { - return false - } - wctx.rowsWritten++ + ps := shard.ps rowIdx := shard.rowNext shard.rowNext++ + + wctx.rowsWritten++ + if wctx.rowsWritten <= ps.offset { + return true + } + if wctx.rowsWritten > ps.offset+ps.limit { + return false + } + r := shard.rows[rowIdx] - byFields := shard.ps.byFields + byFields := ps.byFields rcs := wctx.rcs areEqualColumns := len(rcs) == len(byFields)+len(r.otherColumns)