VictoriaMetrics/lib/logstorage/pipe_field_values.go
Aliaksandr Valialkin ad6c587494
lib/logstorage: properly propagate extra filters to all the subqueries
The purpose of extra filters ( https://docs.victoriametrics.com/victorialogs/querying/#extra-filters )
is to limit the subset of logs, which can be queried. For example, it is expected that all the queries
with `extra_filters={tenant=123}` can access only logs, which contain `123` value for the `tenant` field.

Previously this wasn't the case, since the provided extra filters weren't applied to subqueries.
For example, the following query could be used to select all the logs outside `tenant=123`, for any `extra_filters` arg:

    * | union({tenant!=123})

This commit fixes this by propagating extra filters to all the subqueries.

While at it, this commit also properly propagates [start, end] time range filter from HTTP querying APIs
into all the subqueries, since this is what most users expect. This behaviour can be overriden on per-subquery
basis with the `options(ignore_global_time_filter=true)` option - see https://docs.victoriametrics.com/victorialogs/logsql/#query-options

Also properly apply apply optimizations across all the subqueries. Previously the optimizations at Query.optimize()
function were applied only to the top-level query.
2025-01-24 18:49:25 +01:00

102 lines
2.2 KiB
Go

package logstorage
import (
"fmt"
)
// pipeFieldValues processes '| field_values ...' queries.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe
type pipeFieldValues struct {
field string
limit uint64
}
func (pf *pipeFieldValues) String() string {
s := "field_values " + quoteTokenIfNeeded(pf.field)
if pf.limit > 0 {
s += fmt.Sprintf(" limit %d", pf.limit)
}
return s
}
func (pf *pipeFieldValues) canLiveTail() bool {
return false
}
func (pf *pipeFieldValues) updateNeededFields(neededFields, unneededFields fieldsSet) {
if neededFields.isEmpty() {
neededFields.add(pf.field)
return
}
if neededFields.contains("*") {
neededFields.reset()
if !unneededFields.contains(pf.field) {
neededFields.add(pf.field)
}
unneededFields.reset()
} else {
neededFieldsOrig := neededFields.clone()
neededFields.reset()
if neededFieldsOrig.contains(pf.field) {
neededFields.add(pf.field)
}
}
}
func (pf *pipeFieldValues) hasFilterInWithQuery() bool {
return false
}
func (pf *pipeFieldValues) initFilterInValues(_ *inValuesCache, _ getFieldValuesFunc) (pipe, error) {
return pf, nil
}
func (pf *pipeFieldValues) visitSubqueries(_ func(q *Query)) {
// nothing to do
}
func (pf *pipeFieldValues) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
hitsFieldName := "hits"
if hitsFieldName == pf.field {
hitsFieldName = "hitss"
}
pu := &pipeUniq{
byFields: []string{pf.field},
hitsFieldName: hitsFieldName,
limit: pf.limit,
}
return pu.newPipeProcessor(workersCount, stopCh, cancel, ppNext)
}
func parsePipeFieldValues(lex *lexer) (pipe, error) {
if !lex.isKeyword("field_values") {
return nil, fmt.Errorf("expecting 'field_values'; got %q", lex.token)
}
lex.nextToken()
field, err := parseFieldNameWithOptionalParens(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse field name for 'field_values': %w", err)
}
limit := uint64(0)
if lex.isKeyword("limit") {
lex.nextToken()
n, ok := tryParseUint64(lex.token)
if !ok {
return nil, fmt.Errorf("cannot parse 'limit %s'", lex.token)
}
lex.nextToken()
limit = n
}
pf := &pipeFieldValues{
field: field,
limit: limit,
}
return pf, nil
}