lib/logstorage: properly extract common tokens from unsupported OR filters

Previously the following query could miss rows matching !bar if these rows do not contain foo:

   foo OR !bar

This is because of incorrect detection of common tokens for OR filters - all the unsupported filters
were skipped (including the NOT filter (aka `!`)), while in this case zero common tokens must be returned.

While at it, move repetiteve code in TestFilterAnd and TestFilterOr into f function.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6556
This commit is contained in:
Aliaksandr Valialkin 2024-09-08 11:14:49 +02:00
parent c448189f69
commit edb1afe804
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
4 changed files with 74 additions and 616 deletions

View file

@ -114,6 +114,10 @@ func (fa *filterAnd) getByFieldTokens() []fieldTokens {
} }
func (fa *filterAnd) initByFieldTokens() { func (fa *filterAnd) initByFieldTokens() {
fa.byFieldTokens = getCommonTokensForAndFilters(fa.filters)
}
func getCommonTokensForAndFilters(filters []filter) []fieldTokens {
m := make(map[string]map[string]struct{}) m := make(map[string]map[string]struct{})
var fieldNames []string var fieldNames []string
@ -134,7 +138,7 @@ func (fa *filterAnd) initByFieldTokens() {
} }
} }
for _, f := range fa.filters { for _, f := range filters {
switch t := f.(type) { switch t := f.(type) {
case *filterExact: case *filterExact:
tokens := t.getTokens() tokens := t.getTokens()
@ -177,7 +181,7 @@ func (fa *filterAnd) initByFieldTokens() {
}) })
} }
fa.byFieldTokens = byFieldTokens return byFieldTokens
} }
func matchStringByAllTokens(v string, tokens []string) bool { func matchStringByAllTokens(v string, tokens []string) bool {

View file

@ -25,350 +25,53 @@ func TestFilterAnd(t *testing.T) {
}, },
} }
// non-empty intersection f := func(qStr string, expectedRowIdxs []int) {
// foo:a AND foo:abc* t.Helper()
fa := &filterAnd{
filters: []filter{ q, err := ParseQuery(qStr)
&filterPhrase{ if err != nil {
fieldName: "foo", t.Fatalf("unexpected error in ParseQuery: %s", err)
phrase: "a", }
}, testFilterMatchForColumns(t, columns, q.f, "foo", expectedRowIdxs)
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
},
} }
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// non-empty intersection
f(`foo:a AND foo:abc*`, []int{2, 6})
// reverse non-empty intersection // reverse non-empty intersection
// foo:abc* AND foo:a f(`foo:abc* AND foo:a`, []int{2, 6})
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// the first filter mismatch // the first filter mismatch
// foo:bc* AND foo:a f(`foo:bc* AND foo:a`, nil)
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "bc",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// the last filter mismatch // the last filter mismatch
// foo:abc AND foo:foo* f(`foo:abc AND foo:foo*`, nil)
fa = &filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "abc",
},
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// empty intersection // empty intersection
// foo:foo AND foo:abc* f(`foo:foo AND foo:abc*`, nil)
fa = &filterAnd{ f(`foo:abc* AND foo:foo`, nil)
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// reverse empty intersection
// foo:abc* AND foo:foo
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// empty value // empty value
// foo:"" AND bar:"" f(`foo:"" AND bar:""`, []int{5})
fa = &filterAnd{
filters: []filter{
&filterExact{
fieldName: "foo",
value: "",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{5})
// non-existing field with empty value // non-existing field with empty value
// foo:foo* AND bar:"" f(`foo:foo* AND bar:""`, []int{0, 1, 3, 4, 6})
fa = &filterAnd{ f(`bar:"" AND foo:foo*`, []int{0, 1, 3, 4, 6})
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// reverse non-existing field with empty value
// bar:"" AND foo:foo*
fa = &filterAnd{
filters: []filter{
&filterExact{
fieldName: "bar",
value: "",
},
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// non-existing field with non-empty value // non-existing field with non-empty value
// foo:foo* AND bar:* f(`foo:foo* AND bar:*`, nil)
fa = &filterAnd{ f(`bar:* AND foo:foo*`, nil)
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// reverse non-existing field with non-empty value
// bar:* AND foo:foo*
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "bar",
prefix: "",
},
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554 // https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
// foo:"a foo"* AND (foo:="a foobar" OR boo:bbbbbbb) f(`foo:"a foo"* AND (foo:="a foobar" OR boo:bbbbbbb)`, []int{1})
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "a foo",
},
&filterOr{
filters: []filter{
&filterExact{
fieldName: "foo",
value: "a foobar",
},
&filterExact{
fieldName: "boo",
value: "bbbbbbb",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{1})
// foo:"a foo"* AND (foo:"abcd foobar" OR foo:foobar) f(`foo:"a foo"* AND (foo:"abcd foobar" OR foo:foobar)`, []int{1, 6})
fa = &filterAnd{ f(`(foo:foo* OR bar:baz) AND (bar:x OR foo:a)`, []int{0, 1, 3, 4, 6})
filters: []filter{ f(`(foo:foo* OR bar:baz) AND (bar:x OR foo:xyz)`, nil)
&filterPrefix{ f(`(foo:foo* OR bar:baz) AND (bar:* OR foo:xyz)`, nil)
fieldName: "foo", f(`(foo:foo* OR bar:baz) AND (bar:"" OR foo:xyz)`, []int{0, 1, 3, 4, 6})
prefix: "a foo",
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "abcd foobar",
},
&filterPhrase{
fieldName: "foo",
phrase: "foobar",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{1, 6})
// (foo:foo* OR bar:baz) AND (bar:x OR foo:a) // negative filters
fa = &filterAnd{ f(`foo:foo* AND !foo:~bar`, []int{0})
filters: []filter{ f(`foo:foo* AND foo:!~bar`, []int{0})
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "bar",
phrase: "x",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// (foo:foo* OR bar:baz) AND (bar:x OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "bar",
phrase: "x",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// (foo:foo* OR bar:baz) AND (bar:* OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "bar",
prefix: "",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// (foo:foo* OR bar:baz) AND (bar:"" OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterExact{
fieldName: "bar",
value: "",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
} }

View file

@ -126,6 +126,10 @@ func (fo *filterOr) getByFieldTokens() []fieldTokens {
} }
func (fo *filterOr) initByFieldTokens() { func (fo *filterOr) initByFieldTokens() {
fo.byFieldTokens = getCommonTokensForOrFilters(fo.filters)
}
func getCommonTokensForOrFilters(filters []filter) []fieldTokens {
m := make(map[string][][]string) m := make(map[string][][]string)
var fieldNames []string var fieldNames []string
@ -141,7 +145,7 @@ func (fo *filterOr) initByFieldTokens() {
m[fieldName] = append(m[fieldName], tokens) m[fieldName] = append(m[fieldName], tokens)
} }
for _, f := range fo.filters { for _, f := range filters {
switch t := f.(type) { switch t := f.(type) {
case *filterExact: case *filterExact:
tokens := t.getTokens() tokens := t.getTokens()
@ -166,28 +170,29 @@ func (fo *filterOr) initByFieldTokens() {
for _, bft := range bfts { for _, bft := range bfts {
mergeFieldTokens(bft.field, bft.tokens) mergeFieldTokens(bft.field, bft.tokens)
} }
default:
// Cannot extract tokens from this filter. This means that it is impossible to extract common tokens from OR filters.
return nil
} }
} }
var byFieldTokens []fieldTokens var byFieldTokens []fieldTokens
for _, fieldName := range fieldNames { for _, fieldName := range fieldNames {
tokenss := m[fieldName] tokenss := m[fieldName]
if len(tokenss) != len(fo.filters) { if len(tokenss) != len(filters) {
// The filter for the given fieldName is missing in some OR filters, // The filter for the given fieldName is missing in some OR filters,
// so it is impossible to extract common tokens from these filters. // so it is impossible to extract common tokens from these filters.
// Give up extracting common tokens from the remaining filters, // Give up extracting common tokens from the remaining filters,
// since they may not cover log entries matching fieldName filters. // since they may not cover log entries matching fieldName filters.
// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554 // This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
byFieldTokens = nil return nil
break
} }
commonTokens := getCommonTokens(tokenss) commonTokens := getCommonTokens(tokenss)
if len(commonTokens) == 0 { if len(commonTokens) == 0 {
// Give up extracting common tokens from the remaining filters, // Give up extracting common tokens from the remaining filters,
// since they may not cover log entries matching fieldName filters. // since they may not cover log entries matching fieldName filters.
// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554 // This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
byFieldTokens = nil return nil
break
} }
byFieldTokens = append(byFieldTokens, fieldTokens{ byFieldTokens = append(byFieldTokens, fieldTokens{
field: fieldName, field: fieldName,
@ -196,5 +201,5 @@ func (fo *filterOr) initByFieldTokens() {
}) })
} }
fo.byFieldTokens = byFieldTokens return byFieldTokens
} }

View file

@ -25,302 +25,48 @@ func TestFilterOr(t *testing.T) {
}, },
} }
// non-empty union f := func(qStr string, expectedRowIdxs []int) {
// foo:23 OR foo:abc t.Helper()
fo := &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// reverse non-empty union q, err := ParseQuery(qStr)
// foo:abc OR foo:23 if err != nil {
fo = &filterOr{ t.Fatalf("unexpected error in ParseQuery: %s", err)
filters: []filter{ }
&filterPrefix{ testFilterMatchForColumns(t, columns, q.f, "foo", expectedRowIdxs)
fieldName: "foo",
prefix: "abc",
},
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
},
} }
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// non-empty union
f(`foo:23 OR foo:abc*`, []int{2, 6, 9})
f(`foo:abc* OR foo:23`, []int{2, 6, 9})
// first empty result, second non-empty result // first empty result, second non-empty result
// foo:xabc* OR foo:23 f(`foo:xabc* OR foo:23`, []int{9})
fo = &filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "xabc",
},
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// first non-empty result, second empty result // first non-empty result, second empty result
// foo:23 OR foo:xabc* f(`foo:23 OR foo:xabc*`, []int{9})
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "foo",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// first match all // first match all
// foo:a OR foo:23 f(`foo:a OR foo:23`, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
&filterPrefix{
fieldName: "foo",
prefix: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// second match all // second match all
// foo:23 OR foo:a f(`foo:23 OR foo:a`, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
fo = &filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "23",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// both empty results // both empty results
// foo:x23 OR foo:xabc f(`foo:x23 OR foo:xabc`, nil)
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "x23",
},
&filterPrefix{
fieldName: "foo",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", nil)
// non-existing column (last) // non-existing column (last)
// foo:23 OR bar:xabc* f(`foo:23 OR bar:xabc*`, []int{9})
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "bar",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// non-existing column (first) // non-existing column (first)
// bar:xabc* OR foo:23 f(`bar:xabc* OR foo:23`, []int{9})
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "bar",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// (foo:23 AND bar:"") OR (foo:foo AND bar:*) f(`(foo:23 AND bar:"") OR (foo:foo AND bar:*)`, []int{9})
fo = &filterOr{ f(`(foo:23 AND bar:"") OR (foo:foo AND bar:"")`, []int{0, 9})
filters: []filter{ f(`(foo:23 AND bar:"") OR (foo:foo AND baz:"")`, []int{0, 9})
&filterAnd{ f(`(foo:23 AND bar:abc) OR (foo:foo AND bar:"")`, []int{0})
filters: []filter{ f(`(foo:23 AND bar:abc) OR (foo:foo AND bar:*)`, nil)
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// (foo:23 AND bar:"") OR (foo:foo AND bar:"") // negative filter
fo = &filterOr{ f(`foo:baz or !foo:~foo`, []int{2, 3, 5, 7, 8, 9})
filters: []filter{ f(`foo:baz or foo:!~foo`, []int{2, 3, 5, 7, 8, 9})
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 9})
// (foo:23 AND bar:"") OR (foo:foo AND baz:"")
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "baz",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 9})
// (foo:23 AND bar:abc) OR (foo:foo AND bar:"")
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPhrase{
fieldName: "bar",
phrase: "abc",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0})
// (foo:23 AND bar:abc) OR (foo:foo AND bar:*)
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPhrase{
fieldName: "bar",
phrase: "abc",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", nil)
} }