app/vlselect: add start_offset query arg for /select/logsql/tail endpoint for returning historical logs before live tailing

(cherry picked from commit 3a5f1019ba)
2025-01-10 15:14:09 +00:00 · 2024-11-06 12:51:47 +01:00 · 2024-11-06 12:51:47 +01:00 · ab3b4020b4
commit ab3b4020b4
parent 98fe1950a1
3 changed files with 22 additions and 4 deletions
--- a/app/vlselect/logsql/logsql.go
+++ b/app/vlselect/logsql/logsql.go
@ -407,6 +407,13 @@ func ProcessLiveTailRequest(ctx context.Context, w http.ResponseWriter, r *http.
 	}
 	refreshInterval := time.Millisecond * time.Duration(refreshIntervalMsecs)

+	startOffsetMsecs, err := httputils.GetDuration(r, "start_offset", 5*1000)
+	if err != nil {
+		httpserver.Errorf(w, r, "%s", err)
+		return
+	}
+	startOffset := startOffsetMsecs * 1e6
+
 	ctxWithCancel, cancel := context.WithCancel(ctx)
 	tp := newTailProcessor(cancel)

@ -414,6 +421,7 @@ func ProcessLiveTailRequest(ctx context.Context, w http.ResponseWriter, r *http.
 	defer ticker.Stop()

 	end := time.Now().UnixNano()
+	start := end - startOffset
 	doneCh := ctxWithCancel.Done()
 	flusher, ok := w.(http.Flusher)
 	if !ok {
@ -421,14 +429,12 @@ func ProcessLiveTailRequest(ctx context.Context, w http.ResponseWriter, r *http.
 	}
 	qOrig := q
 	for {
-		start := end - tailOffsetNsecs
-		end = time.Now().UnixNano()
-
 		q = qOrig.Clone(end)
 		q.AddTimeFilter(start, end)
 		// q.Optimize() call is needed for converting '*' into filterNoop.
 		// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785#issuecomment-2358547733
 		q.Optimize()
+
 		if err := vlstorage.RunQuery(ctxWithCancel, tenantIDs, q, tp.writeBlock); err != nil {
 			httpserver.Errorf(w, r, "cannot execute tail query [%s]: %s", q, err)
 			return
@ -447,6 +453,8 @@ func ProcessLiveTailRequest(ctx context.Context, w http.ResponseWriter, r *http.
 		case <-doneCh:
 			return
 		case <-ticker.C:
+			start = end - tailOffsetNsecs
+			end = time.Now().UnixNano()
 		}
 	}
 }
--- a/docs/VictoriaLogs/CHANGELOG.md
+++ b/docs/VictoriaLogs/CHANGELOG.md
@ -15,6 +15,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta

 ## tip

+* FEATURE: support returning historical logs from [live tailing API](https://docs.victoriametrics.com/victorialogs/querying/#live-tailing) via `start_offset` query arg. For example, request to `/select/logsql/tail?query=*&start_offset=5m` returns logs for the last 5 minutes before starting returning live tailing logs for the given `query`.
 * FEATURE: add an ability to specify extra fields for logs ingested via [HTTP-based data ingestion protocols](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-apis). See `extra_fields` query arg and `VL-Extra-Fields` HTTP header in [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters).
 * BUGFIX: Properly parse structured metadata when ingesting logs with Loki ingestion protocol. An issue has been introduced in [v0.3.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.3.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7431) for the details.

--- a/docs/VictoriaLogs/querying/README.md
+++ b/docs/VictoriaLogs/querying/README.md
@ -130,7 +130,8 @@ curl -N http://localhost:9428/select/logsql/tail -d 'query=error'
 ```

 The `-N` command-line flag is essential to pass to `curl` during live tailing, since otherwise curl may delay displaying matching logs
-because of internal response buffering.
+because of internal response buffering. It is recommended using [vlogscli](https://docs.victoriametrics.com/victorialogs/querying/vlogscli/) for live tailing -
+see [these docs](https://docs.victoriametrics.com/victorialogs/querying/vlogscli/#live-tailing).

 The `<query>` must conform the following rules:

@ -146,6 +147,14 @@ The `<query>` must conform the following rules:
 - It is recommended to return [`_stream_id`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields) field for more accurate live tailing
  across multiple streams.

+Live tailing supports returning historical logs, which were ingested into VictoriaLogs before the start of live tailing. Pass `start_offset=<d>` query
+arg to `/select/logsql/tail` where `<d>` is the duration for returning historical logs. For example, the following request returns historical logs
+which were ingested into VictoriaLogs during the last hour, before starting live tailing:
+
+```sh
+curl -N http://localhost:9428/select/logsql/tail -d 'query=*' -d 'start_offset=1h'
+```
+
 **Performance tip**: live tailing works the best if it matches newly ingested logs at relatively slow rate (e.g. up to 1K matching logs per second),
 e.g. it is optimized for the case when real humans inspect the output of live tailing in the real time. If live tailing returns logs at too high rate,
 then it is recommended adding more specific [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) to the `<query>`, so it matches less logs.