VictoriaMetrics/lib/logstorage/pipe_limit.go
Aliaksandr Valialkin fea934936b
lib/logstorage: properly propagate extra filters to all the subqueries
The purpose of extra filters ( https://docs.victoriametrics.com/victorialogs/querying/#extra-filters )
is to limit the subset of logs, which can be queried. For example, it is expected that all the queries
with `extra_filters={tenant=123}` can access only logs, which contain `123` value for the `tenant` field.

Previously this wasn't the case, since the provided extra filters weren't applied to subqueries.
For example, the following query could be used to select all the logs outside `tenant=123`, for any `extra_filters` arg:

    * | union({tenant!=123})

This commit fixes this by propagating extra filters to all the subqueries.

While at it, this commit also properly propagates [start, end] time range filter from HTTP querying APIs
into all the subqueries, since this is what most users expect. This behaviour can be overriden on per-subquery
basis with the `options(ignore_global_time_filter=true)` option - see https://docs.victoriametrics.com/victorialogs/logsql/#query-options

Also properly apply apply optimizations across all the subqueries. Previously the optimizations at Query.optimize()
function were applied only to the top-level query.
2025-01-26 22:05:05 +01:00

115 lines
2.5 KiB
Go

package logstorage
import (
"fmt"
"sync/atomic"
)
// pipeLimit implements '| limit ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe
type pipeLimit struct {
limit uint64
}
func (pl *pipeLimit) String() string {
return fmt.Sprintf("limit %d", pl.limit)
}
func (pl *pipeLimit) canLiveTail() bool {
return false
}
func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) {
// nothing to do
}
func (pl *pipeLimit) hasFilterInWithQuery() bool {
return false
}
func (pl *pipeLimit) initFilterInValues(_ *inValuesCache, _ getFieldValuesFunc) (pipe, error) {
return pl, nil
}
func (pl *pipeLimit) visitSubqueries(_ func(q *Query)) {
// nothing to do
}
func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
if pl.limit == 0 {
// Special case - notify the caller to stop writing data to the returned pipeLimitProcessor
cancel()
}
return &pipeLimitProcessor{
pl: pl,
cancel: cancel,
ppNext: ppNext,
}
}
type pipeLimitProcessor struct {
pl *pipeLimit
cancel func()
ppNext pipeProcessor
rowsProcessed atomic.Uint64
}
func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
if br.rowsLen == 0 {
return
}
rowsProcessed := plp.rowsProcessed.Add(uint64(br.rowsLen))
limit := plp.pl.limit
if rowsProcessed <= limit {
// Fast path - write all the rows to ppNext.
plp.ppNext.writeBlock(workerID, br)
if rowsProcessed == limit {
plp.cancel()
}
return
}
// Slow path - overflow. Write the remaining rows if needed.
rowsProcessed -= uint64(br.rowsLen)
if rowsProcessed >= limit {
// Nothing to write. There is no need in cancel() call, since it has been called by another goroutine.
return
}
// Write remaining rows.
keepRows := limit - rowsProcessed
br.truncateRows(int(keepRows))
plp.ppNext.writeBlock(workerID, br)
// Notify the caller that it should stop passing more data to writeBlock().
plp.cancel()
}
func (plp *pipeLimitProcessor) flush() error {
return nil
}
func parsePipeLimit(lex *lexer) (pipe, error) {
if !lex.isKeyword("limit", "head") {
return nil, fmt.Errorf("expecting 'limit' or 'head'; got %q", lex.token)
}
lex.nextToken()
limit := uint64(10)
if !lex.isKeyword("|", ")", "") {
n, err := parseUint(lex.token)
if err != nil {
return nil, fmt.Errorf("cannot parse rows limit from %q: %w", lex.token, err)
}
lex.nextToken()
limit = n
}
pl := &pipeLimit{
limit: limit,
}
return pl, nil
}