VictoriaMetrics/lib/logstorage/pipe_offset.go
Aliaksandr Valialkin ad6c587494
lib/logstorage: properly propagate extra filters to all the subqueries
The purpose of extra filters ( https://docs.victoriametrics.com/victorialogs/querying/#extra-filters )
is to limit the subset of logs, which can be queried. For example, it is expected that all the queries
with `extra_filters={tenant=123}` can access only logs, which contain `123` value for the `tenant` field.

Previously this wasn't the case, since the provided extra filters weren't applied to subqueries.
For example, the following query could be used to select all the logs outside `tenant=123`, for any `extra_filters` arg:

    * | union({tenant!=123})

This commit fixes this by propagating extra filters to all the subqueries.

While at it, this commit also properly propagates [start, end] time range filter from HTTP querying APIs
into all the subqueries, since this is what most users expect. This behaviour can be overriden on per-subquery
basis with the `options(ignore_global_time_filter=true)` option - see https://docs.victoriametrics.com/victorialogs/logsql/#query-options

Also properly apply apply optimizations across all the subqueries. Previously the optimizations at Query.optimize()
function were applied only to the top-level query.
2025-01-24 18:49:25 +01:00

93 lines
1.9 KiB
Go

package logstorage
import (
"fmt"
"sync/atomic"
)
// pipeOffset implements '| offset ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#offset-pipe
type pipeOffset struct {
offset uint64
}
func (po *pipeOffset) String() string {
return fmt.Sprintf("offset %d", po.offset)
}
func (po *pipeOffset) canLiveTail() bool {
return false
}
func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) {
// nothing to do
}
func (po *pipeOffset) hasFilterInWithQuery() bool {
return false
}
func (po *pipeOffset) initFilterInValues(_ *inValuesCache, _ getFieldValuesFunc) (pipe, error) {
return po, nil
}
func (po *pipeOffset) visitSubqueries(_ func(q *Query)) {
// nothing to do
}
func (po *pipeOffset) newPipeProcessor(_ int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
return &pipeOffsetProcessor{
po: po,
ppNext: ppNext,
}
}
type pipeOffsetProcessor struct {
po *pipeOffset
ppNext pipeProcessor
rowsProcessed atomic.Uint64
}
func (pop *pipeOffsetProcessor) writeBlock(workerID uint, br *blockResult) {
if br.rowsLen == 0 {
return
}
rowsProcessed := pop.rowsProcessed.Add(uint64(br.rowsLen))
if rowsProcessed <= pop.po.offset {
return
}
rowsProcessed -= uint64(br.rowsLen)
if rowsProcessed >= pop.po.offset {
pop.ppNext.writeBlock(workerID, br)
return
}
rowsSkip := pop.po.offset - rowsProcessed
br.skipRows(int(rowsSkip))
pop.ppNext.writeBlock(workerID, br)
}
func (pop *pipeOffsetProcessor) flush() error {
return nil
}
func parsePipeOffset(lex *lexer) (pipe, error) {
if !lex.isKeyword("offset", "skip") {
return nil, fmt.Errorf("expecting 'offset' or 'skip'; got %q", lex.token)
}
lex.nextToken()
n, err := parseUint(lex.token)
if err != nil {
return nil, fmt.Errorf("cannot parse the number of rows to skip from %q: %w", lex.token, err)
}
lex.nextToken()
po := &pipeOffset{
offset: n,
}
return po, nil
}