lib/logstorage: optimize query imeediately after its parsing

This eliminates possible bugs related to forgotten Query.Optimize() calls.

This also allows removing optimize() function from pipe interface.

While at it, drop filterNoop inside filterAnd.
This commit is contained in:
Aliaksandr Valialkin 2024-11-08 14:23:02 +01:00
parent 0550093802
commit 66b2987f49
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
39 changed files with 88 additions and 193 deletions

View file

@ -982,7 +982,6 @@ func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID,
if err != nil {
return nil, nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err)
}
q.Optimize()
// Parse optional start and end args
start, okStart, err := getTimeNsec(r, "start")

View file

@ -49,6 +49,17 @@ func visitFilters(filters []filter, visitFunc func(f filter) bool) bool {
//
// It doesn't copy other filters by returning them as is.
func copyFilter(f filter, visitFunc func(f filter) bool, copyFunc func(f filter) (filter, error)) (filter, error) {
f, err := copyFilterInternal(f, visitFunc, copyFunc)
if err != nil {
return nil, err
}
if !visitFunc(f) {
return f, nil
}
return copyFunc(f)
}
func copyFilterInternal(f filter, visitFunc func(f filter) bool, copyFunc func(f filter) (filter, error)) (filter, error) {
switch t := f.(type) {
case *filterAnd:
filters, err := copyFilters(t.filters, visitFunc, copyFunc)
@ -78,11 +89,7 @@ func copyFilter(f filter, visitFunc func(f filter) bool, copyFunc func(f filter)
}
return fn, nil
default:
if !visitFunc(t) {
// Nothing to copy
return t, nil
}
return copyFunc(t)
return f, nil
}
}

View file

@ -85,6 +85,13 @@ func TestGetCommonTokensForAndFilters(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error in ParseQuery: %s", err)
}
if _, ok := q.f.(*filterNoop); ok {
if len(tokensExpected) != 0 {
t.Fatalf("expecting non-empty tokens %q", tokensExpected)
}
return
}
fa, ok := q.f.(*filterAnd)
if !ok {
t.Fatalf("unexpected filter type: %T; want *filterAnd", q.f)
@ -134,6 +141,7 @@ func TestGetCommonTokensForAndFilters(t *testing.T) {
tokens: []string{"foo", "bar"},
},
})
f(`*`, nil)
f(`* *`, nil)
// empty filter must be skipped

View file

@ -50,26 +50,3 @@ func parseIfFilter(lex *lexer) (*ifFilter, error) {
return iff, nil
}
func (iff *ifFilter) optimizeFilterIn() {
if iff == nil {
return
}
optimizeFilterIn(iff.f)
}
func optimizeFilterIn(f filter) {
if f == nil {
return
}
visitFunc := func(f filter) bool {
fi, ok := f.(*filterIn)
if ok && fi.q != nil {
fi.q.Optimize()
}
return false
}
_ = visitFilter(f, visitFunc)
}

View file

@ -349,9 +349,6 @@ func (q *Query) Clone(timestamp int64) *Query {
func (q *Query) CloneWithTimeFilter(timestamp, start, end int64) *Query {
q = q.Clone(timestamp)
q.AddTimeFilter(start, end)
// q.Optimize() call is needed for converting '*' into filterNoop.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785#issuecomment-2358547733
q.Optimize()
return q
}
@ -534,8 +531,8 @@ func (q *Query) AddPipeLimit(n uint64) {
})
}
// Optimize tries optimizing the query.
func (q *Query) Optimize() {
// optimize tries optimizing the query.
func (q *Query) optimize() {
q.pipes = optimizeSortOffsetPipes(q.pipes)
q.pipes = optimizeSortLimitPipes(q.pipes)
q.pipes = optimizeUniqLimitPipes(q.pipes)
@ -560,14 +557,6 @@ func (q *Query) Optimize() {
// Substitute '*' prefixFilter with filterNoop in order to avoid reading _msg data.
q.f = removeStarFilters(q.f)
// Call Optimize for queries from 'in(query)' filters.
optimizeFilterIn(q.f)
// Optimize individual pipes.
for _, p := range q.pipes {
p.optimize()
}
}
// GetStatsByFields returns `by (...)` fields from the last `stats` pipe at q.
@ -746,6 +735,7 @@ func addByTimeField(byFields []*byStatsField, step int64) []*byStatsField {
}
func removeStarFilters(f filter) filter {
// Substitute `*` filterPrefix with filterNoop
visitFunc := func(f filter) bool {
fp, ok := f.(*filterPrefix)
return ok && isMsgFieldName(fp.fieldName) && fp.prefix == ""
@ -758,6 +748,43 @@ func removeStarFilters(f filter) filter {
if err != nil {
logger.Fatalf("BUG: unexpected error: %s", err)
}
// Drop filterNoop inside filterAnd
visitFunc = func(f filter) bool {
fa, ok := f.(*filterAnd)
if !ok {
return false
}
for _, f := range fa.filters {
if _, ok := f.(*filterNoop); ok {
return true
}
}
return false
}
copyFunc = func(f filter) (filter, error) {
fa := f.(*filterAnd)
var resultFilters []filter
for _, f := range fa.filters {
if _, ok := f.(*filterNoop); !ok {
resultFilters = append(resultFilters, f)
}
}
if len(resultFilters) == 0 {
return &filterNoop{}, nil
}
if len(resultFilters) == 1 {
return resultFilters[0], nil
}
return &filterAnd{
filters: resultFilters,
}, nil
}
f, err = copyFilter(f, visitFunc, copyFunc)
if err != nil {
logger.Fatalf("BUG: unexpected error: %s", err)
}
return f
}
@ -946,6 +973,7 @@ func ParseQueryAtTimestamp(s string, timestamp int64) (*Query, error) {
return nil, fmt.Errorf("unexpected unparsed tail after [%s]; context: [%s]; tail: [%s]", q, lex.context(), lex.s)
}
q.timestamp = timestamp
q.optimize()
return q, nil
}

View file

@ -534,19 +534,28 @@ func TestParseFilterPrefix(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
fp, ok := q.f.(*filterPrefix)
if !ok {
t.Fatalf("unexpected filter type; got %T; want *filterPrefix; filter: %s", q.f, q.f)
switch f := q.f.(type) {
case *filterPrefix:
if f.fieldName != fieldNameExpected {
t.Fatalf("unexpected fieldName; got %q; want %q", f.fieldName, fieldNameExpected)
}
if fp.fieldName != fieldNameExpected {
t.Fatalf("unexpected fieldName; got %q; want %q", fp.fieldName, fieldNameExpected)
if f.prefix != prefixExpected {
t.Fatalf("unexpected prefix; got %q; want %q", f.prefix, prefixExpected)
}
if fp.prefix != prefixExpected {
t.Fatalf("unexpected prefix; got %q; want %q", fp.prefix, prefixExpected)
case *filterNoop:
if fieldNameExpected != "" {
t.Fatalf("expecting non-empty fieldName %q", fieldNameExpected)
}
if prefixExpected != "" {
t.Fatalf("expecting non-empty prefix %q", prefixExpected)
}
default:
t.Fatalf("unexpected filter type; got %T; want *filterPrefix or *filterNoop; filter: %s", q.f, q.f)
}
}
f(`*`, ``, ``)
f(`f:*`, `f`, ``)
f(`""*`, ``, ``)
f(`foo*`, ``, `foo`)
f(`abc-de.fg:foo-bar+baz*`, `abc-de.fg`, `foo-bar+baz`)
@ -695,8 +704,8 @@ func TestParseQuerySuccess(t *testing.T) {
f(`'foo'* and (a:x* and x:* or y:i(""*)) and i("abc def"*)`, `foo* (a:x* x:* or y:i(*)) i("abc def"*)`)
// This isn't a prefix search - it equals to `foo AND *`
f(`foo *`, `foo *`)
f(`"foo" *`, `foo *`)
f(`foo *`, `foo`)
f(`"foo" *`, `foo`)
// empty filter
f(`"" or foo:"" and not bar:""`, `"" or foo:"" !bar:""`)
@ -1197,13 +1206,13 @@ func TestParseQuerySuccess(t *testing.T) {
f(`* | uniq limit 10`, `* | uniq limit 10`)
// filter pipe
f(`* | filter error ip:12.3.4.5 or warn`, `* | filter error ip:12.3.4.5 or warn`)
f(`foo | stats by (host) count() logs | filter logs:>50 | sort by (logs desc) | limit 10`, `foo | stats by (host) count(*) as logs | filter logs:>50 | sort by (logs desc) | limit 10`)
f(`* | error`, `* | filter error`)
f(`* | "by"`, `* | filter "by"`)
f(`* | "stats"`, `* | filter "stats"`)
f(`* | "count"`, `* | filter "count"`)
f(`* | foo:bar AND baz:<10`, `* | filter foo:bar baz:<10`)
f(`* | filter error ip:12.3.4.5 or warn`, `error ip:12.3.4.5 or warn`)
f(`foo | stats by (host) count() logs | filter logs:>50 | sort by (logs desc) | limit 10`, `foo | stats by (host) count(*) as logs | filter logs:>50 | sort by (logs desc) limit 10`)
f(`* | error`, `error`)
f(`* | "by"`, `"by"`)
f(`* | "stats" *`, `"stats"`)
f(`* | * "count"`, `"count"`)
f(`* | foo:bar AND baz:<10`, `foo:bar baz:<10`)
// extract pipe
f(`* | extract "foo<bar>baz"`, `* | extract "foo<bar>baz"`)
@ -1734,7 +1743,6 @@ func TestQueryGetNeededColumns(t *testing.T) {
if err != nil {
t.Fatalf("cannot parse query [%s]: %s", s, err)
}
q.Optimize()
needed, unneeded := q.getNeededColumns()
neededColumns := strings.Join(needed, ",")
@ -2200,7 +2208,6 @@ func TestQueryDropAllPipes(t *testing.T) {
if err != nil {
t.Fatalf("cannot parse [%s]: %s", qStr, err)
}
q.Optimize()
q.DropAllPipes()
result := q.String()
if result != resultExpected {

View file

@ -26,9 +26,6 @@ type pipe interface {
// The returned pipeProcessor may call cancel() at any time in order to notify the caller to stop sending new data to it.
newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor
// optimize must optimize the pipe
optimize()
// hasFilterInWithQuery must return true of pipe contains 'in(subquery)' filter (recursively).
hasFilterInWithQuery() bool

View file

@ -22,10 +22,6 @@ func (ps *pipeBlockStats) canLiveTail() bool {
return false
}
func (ps *pipeBlockStats) optimize() {
// nothing to do
}
func (ps *pipeBlockStats) hasFilterInWithQuery() bool {
return false
}

View file

@ -31,10 +31,6 @@ func (pc *pipeBlocksCount) updateNeededFields(neededFields, unneededFields field
unneededFields.reset()
}
func (pc *pipeBlocksCount) optimize() {
// nothing to do
}
func (pc *pipeBlocksCount) hasFilterInWithQuery() bool {
return false
}

View file

@ -54,10 +54,6 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
}
}
func (pc *pipeCopy) optimize() {
// Nothing to do
}
func (pc *pipeCopy) hasFilterInWithQuery() bool {
return false
}

View file

@ -34,10 +34,6 @@ func (pd *pipeDelete) updateNeededFields(neededFields, unneededFields fieldsSet)
}
}
func (pd *pipeDelete) optimize() {
// nothing to do
}
func (pd *pipeDelete) hasFilterInWithQuery() bool {
return false
}

View file

@ -21,10 +21,6 @@ func (pd *pipeDropEmptyFields) canLiveTail() bool {
return true
}
func (pd *pipeDropEmptyFields) optimize() {
// nothing to do
}
func (pd *pipeDropEmptyFields) hasFilterInWithQuery() bool {
return false
}

View file

@ -45,10 +45,6 @@ func (pe *pipeExtract) canLiveTail() bool {
return true
}
func (pe *pipeExtract) optimize() {
pe.iff.optimizeFilterIn()
}
func (pe *pipeExtract) hasFilterInWithQuery() bool {
return pe.iff.hasFilterInWithQuery()
}

View file

@ -47,10 +47,6 @@ func (pe *pipeExtractRegexp) canLiveTail() bool {
return true
}
func (pe *pipeExtractRegexp) optimize() {
pe.iff.optimizeFilterIn()
}
func (pe *pipeExtractRegexp) hasFilterInWithQuery() bool {
return pe.iff.hasFilterInWithQuery()
}

View file

@ -39,10 +39,6 @@ func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fields
unneededFields.reset()
}
func (pf *pipeFieldNames) optimize() {
// nothing to do
}
func (pf *pipeFieldNames) hasFilterInWithQuery() bool {
return false
}

View file

@ -46,10 +46,6 @@ func (pf *pipeFieldValues) updateNeededFields(neededFields, unneededFields field
}
}
func (pf *pipeFieldValues) optimize() {
// nothing to do
}
func (pf *pipeFieldValues) hasFilterInWithQuery() bool {
return false
}

View file

@ -54,10 +54,6 @@ func (pf *pipeFields) updateNeededFields(neededFields, unneededFields fieldsSet)
unneededFields.reset()
}
func (pf *pipeFields) optimize() {
// nothing to do
}
func (pf *pipeFields) hasFilterInWithQuery() bool {
return false
}

View file

@ -33,10 +33,6 @@ func (pf *pipeFilter) updateNeededFields(neededFields, unneededFields fieldsSet)
}
}
func (pf *pipeFilter) optimize() {
optimizeFilterIn(pf.f)
}
func (pf *pipeFilter) hasFilterInWithQuery() bool {
return hasFilterInWithQueryForFilter(pf.f)
}

View file

@ -87,10 +87,6 @@ func (pf *pipeFormat) updateNeededFields(neededFields, unneededFields fieldsSet)
}
}
func (pf *pipeFormat) optimize() {
pf.iff.optimizeFilterIn()
}
func (pf *pipeFormat) hasFilterInWithQuery() bool {
return pf.iff.hasFilterInWithQuery()
}

View file

@ -37,10 +37,6 @@ func (pj *pipeJoin) canLiveTail() bool {
return true
}
func (pj *pipeJoin) optimize() {
pj.q.Optimize()
}
func (pj *pipeJoin) hasFilterInWithQuery() bool {
return false
}

View file

@ -41,10 +41,6 @@ func (pl *pipeLen) updateNeededFields(neededFields, unneededFields fieldsSet) {
}
}
func (pl *pipeLen) optimize() {
// Nothing to do
}
func (pl *pipeLen) hasFilterInWithQuery() bool {
return false
}

View file

@ -24,10 +24,6 @@ func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) {
// nothing to do
}
func (pl *pipeLimit) optimize() {
// nothing to do
}
func (pl *pipeLimit) hasFilterInWithQuery() bool {
return false
}

View file

@ -221,10 +221,6 @@ func (me *mathExpr) updateNeededFields(neededFields fieldsSet) {
}
}
func (pm *pipeMath) optimize() {
// nothing to do
}
func (pm *pipeMath) hasFilterInWithQuery() bool {
return false
}

View file

@ -24,10 +24,6 @@ func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) {
// nothing to do
}
func (po *pipeOffset) optimize() {
// nothing to do
}
func (po *pipeOffset) hasFilterInWithQuery() bool {
return false
}

View file

@ -33,10 +33,6 @@ func (pp *pipePackJSON) updateNeededFields(neededFields, unneededFields fieldsSe
updateNeededFieldsForPipePack(neededFields, unneededFields, pp.resultField, pp.fields)
}
func (pp *pipePackJSON) optimize() {
// nothing to do
}
func (pp *pipePackJSON) hasFilterInWithQuery() bool {
return false
}

View file

@ -33,10 +33,6 @@ func (pp *pipePackLogfmt) updateNeededFields(neededFields, unneededFields fields
updateNeededFieldsForPipePack(neededFields, unneededFields, pp.resultField, pp.fields)
}
func (pp *pipePackLogfmt) optimize() {
// nothing to do
}
func (pp *pipePackLogfmt) hasFilterInWithQuery() bool {
return false
}

View file

@ -58,10 +58,6 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
}
}
func (pr *pipeRename) optimize() {
// nothing to do
}
func (pr *pipeRename) hasFilterInWithQuery() bool {
return false
}

View file

@ -45,10 +45,6 @@ func (pr *pipeReplace) updateNeededFields(neededFields, unneededFields fieldsSet
updateNeededFieldsForUpdatePipe(neededFields, unneededFields, pr.field, pr.iff)
}
func (pr *pipeReplace) optimize() {
pr.iff.optimizeFilterIn()
}
func (pr *pipeReplace) hasFilterInWithQuery() bool {
return pr.iff.hasFilterInWithQuery()
}

View file

@ -45,10 +45,6 @@ func (pr *pipeReplaceRegexp) updateNeededFields(neededFields, unneededFields fie
updateNeededFieldsForUpdatePipe(neededFields, unneededFields, pr.field, pr.iff)
}
func (pr *pipeReplaceRegexp) optimize() {
pr.iff.optimizeFilterIn()
}
func (pr *pipeReplaceRegexp) hasFilterInWithQuery() bool {
return pr.iff.hasFilterInWithQuery()
}

View file

@ -89,10 +89,6 @@ func (ps *pipeSort) updateNeededFields(neededFields, unneededFields fieldsSet) {
}
}
func (ps *pipeSort) optimize() {
// nothing to do
}
func (ps *pipeSort) hasFilterInWithQuery() bool {
return false
}

View file

@ -123,12 +123,6 @@ func (ps *pipeStats) updateNeededFields(neededFields, unneededFields fieldsSet)
unneededFields.reset()
}
func (ps *pipeStats) optimize() {
for _, f := range ps.funcs {
f.iff.optimizeFilterIn()
}
}
func (ps *pipeStats) hasFilterInWithQuery() bool {
for _, f := range ps.funcs {
if f.iff.hasFilterInWithQuery() {

View file

@ -55,10 +55,6 @@ func (pc *pipeStreamContext) updateNeededFields(neededFields, unneededFields fie
unneededFields.removeFields(neededFieldsForStreamContext)
}
func (pc *pipeStreamContext) optimize() {
// nothing to do
}
func (pc *pipeStreamContext) hasFilterInWithQuery() bool {
return false
}

View file

@ -71,10 +71,6 @@ func (pt *pipeTop) updateNeededFields(neededFields, unneededFields fieldsSet) {
}
}
func (pt *pipeTop) optimize() {
// nothing to do
}
func (pt *pipeTop) hasFilterInWithQuery() bool {
return false
}

View file

@ -58,10 +58,6 @@ func (pu *pipeUniq) updateNeededFields(neededFields, unneededFields fieldsSet) {
}
}
func (pu *pipeUniq) optimize() {
// nothing to do
}
func (pu *pipeUniq) hasFilterInWithQuery() bool {
return false
}

View file

@ -60,10 +60,6 @@ func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fields
updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields)
}
func (pu *pipeUnpackJSON) optimize() {
pu.iff.optimizeFilterIn()
}
func (pu *pipeUnpackJSON) hasFilterInWithQuery() bool {
return pu.iff.hasFilterInWithQuery()
}

View file

@ -58,10 +58,6 @@ func (pu *pipeUnpackLogfmt) updateNeededFields(neededFields, unneededFields fiel
updateNeededFieldsForUnpackPipe(pu.fromField, pu.fields, pu.keepOriginalFields, pu.skipEmptyResults, pu.iff, neededFields, unneededFields)
}
func (pu *pipeUnpackLogfmt) optimize() {
pu.iff.optimizeFilterIn()
}
func (pu *pipeUnpackLogfmt) hasFilterInWithQuery() bool {
return pu.iff.hasFilterInWithQuery()
}

View file

@ -54,10 +54,6 @@ func (pu *pipeUnpackSyslog) updateNeededFields(neededFields, unneededFields fiel
updateNeededFieldsForUnpackPipe(pu.fromField, nil, pu.keepOriginalFields, false, pu.iff, neededFields, unneededFields)
}
func (pu *pipeUnpackSyslog) optimize() {
pu.iff.optimizeFilterIn()
}
func (pu *pipeUnpackSyslog) hasFilterInWithQuery() bool {
return pu.iff.hasFilterInWithQuery()
}

View file

@ -36,10 +36,6 @@ func (pu *pipeUnroll) canLiveTail() bool {
return true
}
func (pu *pipeUnroll) optimize() {
pu.iff.optimizeFilterIn()
}
func (pu *pipeUnroll) hasFilterInWithQuery() bool {
return pu.iff.hasFilterInWithQuery()
}

View file

@ -220,8 +220,6 @@ func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Qu
func (s *Storage) getJoinMap(ctx context.Context, tenantIDs []TenantID, q *Query, byFields []string, prefix string) (map[string][][]Field, error) {
// TODO: track memory usage
logger.Infof("DEBUG: byFields=%q, prefix=%q", byFields, prefix)
m := make(map[string][][]Field)
var mLock sync.Mutex
writeBlockResult := func(_ uint, br *blockResult) {