From 16a91539bda57e018d400215009574371f8234a7 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Sat, 27 Apr 2024 03:14:00 +0200
Subject: [PATCH] wip

---
 docs/VictoriaLogs/CHANGELOG.md |  2 +-
 docs/VictoriaLogs/LogsQL.md    |  6 ++-
 lib/logstorage/parser_test.go  | 14 +++++++
 lib/logstorage/pipes.go        | 75 +++++++++++++++++++++++++++++++++-
 4 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md
index 5b0ea5ae5..fd50f0c71 100644
--- a/docs/VictoriaLogs/CHANGELOG.md
+++ b/docs/VictoriaLogs/CHANGELOG.md
@@ -22,7 +22,7 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
 * FEATURE: return all the log fields by default in query results. Previously only [`_stream`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields), [`_time`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) and [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) fields were returned by default.
 * FEATURE: add support for returning only the requested log [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#querying-specific-fields).
 * FEATURE: add support for calculating the number of matching logs and the number of logs with non-empty [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Grouping by arbitrary set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) is supported. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats) for details.
-* FEATURE: add support for returning the first `N` results. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#limiters).
+* FEATURE: add support for limiting the number of returned results. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#limiters).
 * FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed.
 
 * BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query.
diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md
index a3671e7af..985bf1ebf 100644
--- a/docs/VictoriaLogs/LogsQL.md
+++ b/docs/VictoriaLogs/LogsQL.md
@@ -1092,11 +1092,13 @@ See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) fo
 
 ## Limiters
 
-LogsQL provides the following functionality for limiting the amounts of returned log entries:
+LogsQL provides the following functionality for limiting the number of returned log entries:
 
 - `error | head 10` - returns up to 10 log entries with the `error` [word](#word).
+- `error | skip 10` - skips the first 10 log entris with the `error` [word](#word).
 
-LogsQL will support the ability to page the returned results.
+It is recommended [sorting](#sorting) entries before limiting the number of returned log entries,
+in order to get consistent results.
 
 It is possible to limit the returned results with `head`, `tail`, `less`, etc. Unix commands
 according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go
index c4fbea08e..b3bca1482 100644
--- a/lib/logstorage/parser_test.go
+++ b/lib/logstorage/parser_test.go
@@ -820,6 +820,12 @@ func TestParseQuerySuccess(t *testing.T) {
 	// multiple head pipes
 	f(`foo | head 100 | head 10 | head 234`, `foo | head 100 | head 10 | head 234`)
 
+	// skip pipe
+	f(`foo | skip 10`, `foo | skip 10`)
+
+	// multiple skip pipes
+	f(`foo | skip 10 | skip 100`, `foo | skip 10 | skip 100`)
+
 	// stats count pipe
 	f(`* | Stats count() AS foo`, `* | stats count() as foo`)
 	f(`* | STATS bY (foo, b.a/r, "b az") count(*) as XYz`, `* | stats by (foo, "b.a/r", "b az") count(*) as XYz`)
@@ -827,6 +833,7 @@ func TestParseQuerySuccess(t *testing.T) {
 
 	// multiple different pipes
 	f(`* | fields foo, bar | head 100 | stats by(foo,bar) count(baz) as qwert`, `* | fields foo, bar | head 100 | stats by (foo, bar) count(baz) as qwert`)
+	f(`* | skip 100 | head 20 | skip 10`, `* | skip 100 | head 20 | skip 10`)
 }
 
 func TestParseQueryFailure(t *testing.T) {
@@ -1046,6 +1053,13 @@ func TestParseQueryFailure(t *testing.T) {
 	f(`foo | head bar`)
 	f(`foo | head -123`)
 
+	// missing skip pipe value
+	f(`foo | skip`)
+
+	// invalid skip pipe value
+	f(`foo | skip bar`)
+	f(`foo | skip -10`)
+
 	// missing stats
 	f(`foo | stats`)
 
diff --git a/lib/logstorage/pipes.go b/lib/logstorage/pipes.go
index d757016bb..db24673e1 100644
--- a/lib/logstorage/pipes.go
+++ b/lib/logstorage/pipes.go
@@ -86,6 +86,12 @@ func parsePipes(lex *lexer) ([]pipe, error) {
 				return nil, fmt.Errorf("cannot parse 'head' pipe: %w", err)
 			}
 			pipes = append(pipes, hp)
+		case lex.isKeyword("skip"):
+			sp, err := parseSkipPipe(lex)
+			if err != nil {
+				return nil, fmt.Errorf("cannot parse 'skip' pipe: %w", err)
+			}
+			pipes = append(pipes, sp)
 		default:
 			return nil, fmt.Errorf("unexpected pipe %q", lex.token)
 		}
@@ -647,7 +653,7 @@ func parseHeadPipe(lex *lexer) (*headPipe, error) {
 	}
 	n, err := strconv.ParseUint(lex.token, 10, 64)
 	if err != nil {
-		return nil, fmt.Errorf("cannot parse %q: %w", lex.token, err)
+		return nil, fmt.Errorf("cannot parse the number of head rows to return %q: %w", lex.token, err)
 	}
 	lex.nextToken()
 	hp := &headPipe{
@@ -656,6 +662,73 @@ func parseHeadPipe(lex *lexer) (*headPipe, error) {
 	return hp, nil
 }
 
+type skipPipe struct {
+	n uint64
+}
+
+func (sp *skipPipe) String() string {
+	return fmt.Sprintf("skip %d", sp.n)
+}
+
+func (sp *skipPipe) newPipeProcessor(workersCount int, _ <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
+	return &skipPipeProcessor{
+		sp:     sp,
+		cancel: cancel,
+		ppBase: ppBase,
+	}
+}
+
+type skipPipeProcessor struct {
+	sp     *skipPipe
+	cancel func()
+	ppBase pipeProcessor
+
+	rowsSkipped atomic.Uint64
+}
+
+func (spp *skipPipeProcessor) writeBlock(workerID uint, timestamps []int64, columns []BlockColumn) {
+	rowsSkipped := spp.rowsSkipped.Add(uint64(len(timestamps)))
+	if rowsSkipped <= spp.sp.n {
+		return
+	}
+
+	rowsSkipped -= uint64(len(timestamps))
+	if rowsSkipped >= spp.sp.n {
+		spp.ppBase.writeBlock(workerID, timestamps, columns)
+		return
+	}
+
+	rowsRemaining := spp.sp.n - rowsSkipped
+	cs := make([]BlockColumn, len(columns))
+	for i, c := range columns {
+		cDst := &cs[i]
+		cDst.Name = c.Name
+		cDst.Values = c.Values[rowsRemaining:]
+	}
+	timestamps = timestamps[rowsRemaining:]
+	spp.ppBase.writeBlock(workerID, timestamps, cs)
+}
+
+func (spp *skipPipeProcessor) flush() {
+	spp.cancel()
+	spp.ppBase.flush()
+}
+
+func parseSkipPipe(lex *lexer) (*skipPipe, error) {
+	if !lex.mustNextToken() {
+		return nil, fmt.Errorf("missing the number of rows to skip")
+	}
+	n, err := strconv.ParseUint(lex.token, 10, 64)
+	if err != nil {
+		return nil, fmt.Errorf("cannot parse the number of rows to skip %q: %w", lex.token, err)
+	}
+	lex.nextToken()
+	sp := &skipPipe{
+		n: n,
+	}
+	return sp, nil
+}
+
 func parseFieldNamesInParens(lex *lexer) ([]string, error) {
 	if !lex.isKeyword("(") {
 		return nil, fmt.Errorf("missing `(`")