Enable the limit query param for the /select/logsql/query (#5778)

* app/vlselect: add limit for logs query * app/vlselect: CHANGELOG.md * app/vlselect: stop search process if limit is reached, update logic, remove default limit * app/vlselect: fix tests * app/vlselect: fix filter tests * app/vlselect: fix tests
2024-11-21 14:44:00 +00:00 · 2024-02-18 21:58:47 +01:00 · 2024-02-18 21:58:47 +01:00 · 451d2abf50
commit 451d2abf50
parent c42ddce159
6 changed files with 88 additions and 40 deletions
--- a/app/vlselect/logsql/logsql.go
+++ b/app/vlselect/logsql/logsql.go
@ -2,11 +2,13 @@ package logsql

 import (
 	"net/http"
+	"sync"

 	"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
 )

@ -31,23 +33,36 @@ func ProcessQueryRequest(w http.ResponseWriter, r *http.Request, stopCh <-chan s
 		httpserver.Errorf(w, r, "cannot parse query [%s]: %s", qStr, err)
 		return
 	}
+	limit, err := httputils.GetInt(r, "limit")
+	if err != nil {
+		httpserver.Errorf(w, r, "cannot parse limit from the request: %s", err)
+		return
+	}
 	w.Header().Set("Content-Type", "application/stream+json; charset=utf-8")
-
 	sw := getSortWriter()
 	sw.Init(w, maxSortBufferSize.IntN())
 	tenantIDs := []logstorage.TenantID{tenantID}
-	vlstorage.RunQuery(tenantIDs, q, stopCh, func(columns []logstorage.BlockColumn) {
+
+	var mx sync.Mutex
+	vlstorage.RunQuery(tenantIDs, q, stopCh, func(columns []logstorage.BlockColumn) bool {
 		if len(columns) == 0 {
-			return
+			return true
 		}
 		rowsCount := len(columns[0].Values)
-
+		mx.Lock()
+		if rowsCount > limit {
+			rowsCount = limit
+		}
+		limit = limit - rowsCount
+		mx.Unlock()
 		bb := blockResultPool.Get()
 		for rowIdx := 0; rowIdx < rowsCount; rowIdx++ {
 			WriteJSONRow(bb, columns, rowIdx)
 		}
 		sw.MustWrite(bb.B)
 		blockResultPool.Put(bb)
+
+		return limit == 0
 	})
 	sw.FinalFlush()
 	putSortWriter(sw)
--- a/app/vlstorage/main.go
+++ b/app/vlstorage/main.go
@ -100,7 +100,7 @@ func MustAddRows(lr *logstorage.LogRows) {
 }

 // RunQuery runs the given q and calls processBlock for the returned data blocks
-func RunQuery(tenantIDs []logstorage.TenantID, q *logstorage.Query, stopCh <-chan struct{}, processBlock func(columns []logstorage.BlockColumn)) {
+func RunQuery(tenantIDs []logstorage.TenantID, q *logstorage.Query, stopCh <-chan struct{}, processBlock func(columns []logstorage.BlockColumn) bool) {
 	strg.RunQuery(tenantIDs, q, stopCh, processBlock)
 }

--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -63,6 +63,7 @@ Released at 2024-02-14
 * FEATURE: [dashboards/all](https://grafana.com/orgs/victoriametrics): add new panel `CPU spent on GC`. It should help identifying cases when too much CPU is spent on garbage collection, and advice users on how this can be addressed.
 * FEATURE: [vmalert](https://docs.victoriametrics.com/#vmalert): support [filtering](https://prometheus.io/docs/prometheus/2.49/querying/api/#rules) for `/api/v1/rules` API. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5749) by @victoramsantos.
 * FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): support client-side TLS configuration for creating and deleting snapshots via `-snapshot.tls*` cmd-line flags. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5724). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5738).
+* FEATURE: enable `limit` query param for `/select/logsql/query` VictoriaLogs API endpoint. By default this limit sets to 1000 lines, but it can be changed via query param. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5674).

 * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): reduce CPU usage when `-promscrape.dropOriginalLabels` command-line flag is set. This issue has been introduced in [v1.96.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.96.0) when addressing [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5389).
 * BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth.html): properly release memory during config reload. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4690).
--- a/lib/logstorage/filters_test.go
+++ b/lib/logstorage/filters_test.go
@ -9226,7 +9226,7 @@ func testFilterMatchForStorage(t *testing.T, s *Storage, tenantID TenantID, f fi
 		resultColumnNames: []string{resultColumnName},
 	}
 	workersCount := 3
-	s.search(workersCount, so, nil, func(workerID uint, br *blockResult) {
+	s.search(workersCount, so, nil, func(workerID uint, br *blockResult) bool {
 		// Verify tenantID
 		if !br.streamID.tenantID.equal(&tenantID) {
 			t.Fatalf("unexpected tenantID in blockResult; got %s; want %s", &br.streamID.tenantID, &tenantID)
@ -9248,6 +9248,7 @@ func testFilterMatchForStorage(t *testing.T, s *Storage, tenantID TenantID, f fi
 		if !reflect.DeepEqual(br.timestamps, expectedTimestamps) {
 			t.Fatalf("unexpected timestamps;\ngot\n%d\nwant\n%d", br.timestamps, expectedTimestamps)
 		}
+		return false
 	})
 }

--- a/lib/logstorage/storage_search.go
+++ b/lib/logstorage/storage_search.go
@ -1,6 +1,7 @@
 package logstorage

 import (
+	"context"
 	"math"
 	"sort"
 	"sync"
@ -43,7 +44,7 @@ type searchOptions struct {
 }

 // RunQuery runs the given q and calls processBlock for results
-func (s *Storage) RunQuery(tenantIDs []TenantID, q *Query, stopCh <-chan struct{}, processBlock func(columns []BlockColumn)) {
+func (s *Storage) RunQuery(tenantIDs []TenantID, q *Query, stopCh <-chan struct{}, processBlock func(columns []BlockColumn) bool) {
 	resultColumnNames := q.getResultColumnNames()
 	so := &genericSearchOptions{
 		tenantIDs:         tenantIDs,
@ -51,7 +52,7 @@ func (s *Storage) RunQuery(tenantIDs []TenantID, q *Query, stopCh <-chan struct{
 		resultColumnNames: resultColumnNames,
 	}
 	workersCount := cgroup.AvailableCPUs()
-	s.search(workersCount, so, stopCh, func(workerID uint, br *blockResult) {
+	s.search(workersCount, so, stopCh, func(workerID uint, br *blockResult) bool {
 		brs := getBlockRows()
 		cs := brs.cs

@ -61,10 +62,11 @@ func (s *Storage) RunQuery(tenantIDs []TenantID, q *Query, stopCh <-chan struct{
 				Values: br.getColumnValues(i),
 			})
 		}
-		processBlock(cs)
+		limitReached := processBlock(cs)

 		brs.cs = cs
 		putBlockRows(brs)
+		return limitReached
 	})
 }

@ -118,7 +120,7 @@ const blockSearchWorksPerBatch = 64
 // searchResultFunc must process sr.
 //
 // The callback is called at the worker with the given workerID.
-type searchResultFunc func(workerID uint, br *blockResult)
+type searchResultFunc func(workerID uint, br *blockResult) bool

 // search searches for the matching rows according to so.
 //
@ -128,19 +130,34 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
 	var wg sync.WaitGroup
 	workCh := make(chan []*blockSearchWork, workersCount)
 	wg.Add(workersCount)
+
+	ctx, cancelFn := context.WithCancel(context.Background())
+
 	for i := 0; i < workersCount; i++ {
 		go func(workerID uint) {
+			defer wg.Done()
 			bs := getBlockSearch()
-			for bsws := range workCh {
+			defer putBlockSearch(bs)
+			for {
+				select {
+				case bsws, ok := <-workCh:
+					if !ok {
+						return
+					}
 					for _, bsw := range bsws {
 						bs.search(bsw)
 						if bs.br.RowsCount() > 0 {
-						processBlockResult(workerID, &bs.br)
+							limitReached := processBlockResult(workerID, &bs.br)
+							if limitReached {
+								cancelFn()
+								return
 							}
 						}
 					}
-			putBlockSearch(bs)
-			wg.Done()
+				case <-ctx.Done():
+					return
+				}
+			}
 		}(uint(i))
 	}

@ -178,6 +195,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
 	// Wait until workers finish their work
 	close(workCh)
 	wg.Wait()
+	cancelFn()

 	// Decrement references to parts
 	for _, pw := range pws {
--- a/lib/logstorage/storage_search_test.go
+++ b/lib/logstorage/storage_search_test.go
@ -84,7 +84,7 @@ func TestStorageRunQuery(t *testing.T) {
 			AccountID: 0,
 			ProjectID: 0,
 		}
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		tenantIDs := []TenantID{tenantID}
@ -96,7 +96,7 @@ func TestStorageRunQuery(t *testing.T) {
 			AccountID: 1,
 			ProjectID: 11,
 		}
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		tenantIDs := []TenantID{tenantID}
@ -111,7 +111,7 @@ func TestStorageRunQuery(t *testing.T) {
 			}
 			expectedTenantID := tenantID.String()
 			rowsCount := uint32(0)
-			processBlock := func(columns []BlockColumn) {
+			processBlock := func(columns []BlockColumn) bool {
 				hasTenantIDColumn := false
 				var columnNames []string
 				for _, c := range columns {
@ -132,6 +132,7 @@ func TestStorageRunQuery(t *testing.T) {
 					panic(fmt.Errorf("missing tenant.id column among columns: %q", columnNames))
 				}
 				atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
+				return false
 			}
 			tenantIDs := []TenantID{tenantID}
 			s.RunQuery(tenantIDs, q, nil, processBlock)
@ -145,8 +146,9 @@ func TestStorageRunQuery(t *testing.T) {
 	t.Run("matching-multiple-tenant-ids", func(t *testing.T) {
 		q := mustParseQuery(`"log message"`)
 		rowsCount := uint32(0)
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
+			return false
 		}
 		s.RunQuery(allTenantIDs, q, nil, processBlock)

@ -158,8 +160,9 @@ func TestStorageRunQuery(t *testing.T) {
 	t.Run("matching-in-filter", func(t *testing.T) {
 		q := mustParseQuery(`source-file:in(foobar,/foo/bar/baz)`)
 		rowsCount := uint32(0)
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
+			return false
 		}
 		s.RunQuery(allTenantIDs, q, nil, processBlock)

@ -170,7 +173,7 @@ func TestStorageRunQuery(t *testing.T) {
 	})
 	t.Run("stream-filter-mismatch", func(t *testing.T) {
 		q := mustParseQuery(`_stream:{job="foobar",instance=~"host-.+:2345"} log`)
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		s.RunQuery(allTenantIDs, q, nil, processBlock)
@ -184,7 +187,7 @@ func TestStorageRunQuery(t *testing.T) {
 			}
 			expectedStreamID := fmt.Sprintf("stream_id=%d", i)
 			rowsCount := uint32(0)
-			processBlock := func(columns []BlockColumn) {
+			processBlock := func(columns []BlockColumn) bool {
 				hasStreamIDColumn := false
 				var columnNames []string
 				for _, c := range columns {
@ -205,6 +208,7 @@ func TestStorageRunQuery(t *testing.T) {
 					panic(fmt.Errorf("missing stream-id column among columns: %q", columnNames))
 				}
 				atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
+				return false
 			}
 			tenantIDs := []TenantID{tenantID}
 			s.RunQuery(tenantIDs, q, nil, processBlock)
@ -222,8 +226,9 @@ func TestStorageRunQuery(t *testing.T) {
 			ProjectID: 11,
 		}
 		rowsCount := uint32(0)
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
+			return false
 		}
 		tenantIDs := []TenantID{tenantID}
 		s.RunQuery(tenantIDs, q, nil, processBlock)
@ -242,8 +247,9 @@ func TestStorageRunQuery(t *testing.T) {
 			ProjectID: 11,
 		}
 		rowsCount := uint32(0)
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
+			return false
 		}
 		tenantIDs := []TenantID{tenantID}
 		s.RunQuery(tenantIDs, q, nil, processBlock)
@ -262,8 +268,9 @@ func TestStorageRunQuery(t *testing.T) {
 			ProjectID: 11,
 		}
 		rowsCount := uint32(0)
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			atomic.AddUint32(&rowsCount, uint32(len(columns[0].Values)))
+			return false
 		}
 		tenantIDs := []TenantID{tenantID}
 		s.RunQuery(tenantIDs, q, nil, processBlock)
@ -281,7 +288,7 @@ func TestStorageRunQuery(t *testing.T) {
 			AccountID: 1,
 			ProjectID: 11,
 		}
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		tenantIDs := []TenantID{tenantID}
@ -295,7 +302,7 @@ func TestStorageRunQuery(t *testing.T) {
 			AccountID: 1,
 			ProjectID: 11,
 		}
-		processBlock := func(columns []BlockColumn) {
+		processBlock := func(columns []BlockColumn) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		tenantIDs := []TenantID{tenantID}
@ -405,7 +412,7 @@ func TestStorageSearch(t *testing.T) {
 			filter:            f,
 			resultColumnNames: []string{"_msg"},
 		}
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		s.search(workersCount, so, nil, processBlock)
@ -423,7 +430,7 @@ func TestStorageSearch(t *testing.T) {
 			filter:            f,
 			resultColumnNames: []string{"_msg"},
 		}
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		s.search(workersCount, so, nil, processBlock)
@ -441,7 +448,7 @@ func TestStorageSearch(t *testing.T) {
 			filter:            f,
 			resultColumnNames: []string{"_msg"},
 		}
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		s.search(workersCount, so, nil, processBlock)
@ -461,11 +468,12 @@ func TestStorageSearch(t *testing.T) {
 				resultColumnNames: []string{"_msg"},
 			}
 			rowsCount := uint32(0)
-			processBlock := func(workerID uint, br *blockResult) {
+			processBlock := func(workerID uint, br *blockResult) bool {
 				if !br.streamID.tenantID.equal(&tenantID) {
 					panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
 				}
 				atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
+				return false
 			}
 			s.search(workersCount, so, nil, processBlock)

@ -485,8 +493,9 @@ func TestStorageSearch(t *testing.T) {
 			resultColumnNames: []string{"_msg"},
 		}
 		rowsCount := uint32(0)
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
+			return false
 		}
 		s.search(workersCount, so, nil, processBlock)

@ -505,7 +514,7 @@ func TestStorageSearch(t *testing.T) {
 			filter:            f,
 			resultColumnNames: []string{"_msg"},
 		}
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		s.search(workersCount, so, nil, processBlock)
@ -526,11 +535,12 @@ func TestStorageSearch(t *testing.T) {
 				resultColumnNames: []string{"_msg"},
 			}
 			rowsCount := uint32(0)
-			processBlock := func(workerID uint, br *blockResult) {
+			processBlock := func(workerID uint, br *blockResult) bool {
 				if !br.streamID.tenantID.equal(&tenantID) {
 					panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
 				}
 				atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
+				return false
 			}
 			s.search(workersCount, so, nil, processBlock)

@ -555,11 +565,12 @@ func TestStorageSearch(t *testing.T) {
 			resultColumnNames: []string{"_msg"},
 		}
 		rowsCount := uint32(0)
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			if !br.streamID.tenantID.equal(&tenantID) {
 				panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
 			}
 			atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
+			return false
 		}
 		s.search(workersCount, so, nil, processBlock)

@ -592,11 +603,12 @@ func TestStorageSearch(t *testing.T) {
 			resultColumnNames: []string{"_msg"},
 		}
 		rowsCount := uint32(0)
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			if !br.streamID.tenantID.equal(&tenantID) {
 				panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
 			}
 			atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
+			return false
 		}
 		s.search(workersCount, so, nil, processBlock)

@ -620,8 +632,9 @@ func TestStorageSearch(t *testing.T) {
 			resultColumnNames: []string{"_msg"},
 		}
 		rowsCount := uint32(0)
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			atomic.AddUint32(&rowsCount, uint32(br.RowsCount()))
+			return false
 		}
 		s.search(workersCount, so, nil, processBlock)

@ -644,7 +657,7 @@ func TestStorageSearch(t *testing.T) {
 			filter:            f,
 			resultColumnNames: []string{"_msg"},
 		}
-		processBlock := func(workerID uint, br *blockResult) {
+		processBlock := func(workerID uint, br *blockResult) bool {
 			panic(fmt.Errorf("unexpected match"))
 		}
 		s.search(workersCount, so, nil, processBlock)