lib/logstorage: properly extract common tokens from unsupported OR filters

Previously the following query could miss rows matching !bar if these rows do not contain foo:

   foo OR !bar

This is because of incorrect detection of common tokens for OR filters - all the unsupported filters
were skipped (including the NOT filter (aka `!`)), while in this case zero common tokens must be returned.

While at it, move repetiteve code in TestFilterAnd and TestFilterOr into f function.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6556
This commit is contained in:
Aliaksandr Valialkin 2024-09-08 11:14:49 +02:00
parent c448189f69
commit edb1afe804
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
4 changed files with 74 additions and 616 deletions

View file

@ -114,6 +114,10 @@ func (fa *filterAnd) getByFieldTokens() []fieldTokens {
}
func (fa *filterAnd) initByFieldTokens() {
fa.byFieldTokens = getCommonTokensForAndFilters(fa.filters)
}
func getCommonTokensForAndFilters(filters []filter) []fieldTokens {
m := make(map[string]map[string]struct{})
var fieldNames []string
@ -134,7 +138,7 @@ func (fa *filterAnd) initByFieldTokens() {
}
}
for _, f := range fa.filters {
for _, f := range filters {
switch t := f.(type) {
case *filterExact:
tokens := t.getTokens()
@ -177,7 +181,7 @@ func (fa *filterAnd) initByFieldTokens() {
})
}
fa.byFieldTokens = byFieldTokens
return byFieldTokens
}
func matchStringByAllTokens(v string, tokens []string) bool {

View file

@ -25,350 +25,53 @@ func TestFilterAnd(t *testing.T) {
},
}
// non-empty intersection
// foo:a AND foo:abc*
fa := &filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
},
f := func(qStr string, expectedRowIdxs []int) {
t.Helper()
q, err := ParseQuery(qStr)
if err != nil {
t.Fatalf("unexpected error in ParseQuery: %s", err)
}
testFilterMatchForColumns(t, columns, q.f, "foo", expectedRowIdxs)
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// non-empty intersection
f(`foo:a AND foo:abc*`, []int{2, 6})
// reverse non-empty intersection
// foo:abc* AND foo:a
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
f(`foo:abc* AND foo:a`, []int{2, 6})
// the first filter mismatch
// foo:bc* AND foo:a
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "bc",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
f(`foo:bc* AND foo:a`, nil)
// the last filter mismatch
// foo:abc AND foo:foo*
fa = &filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "abc",
},
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
f(`foo:abc AND foo:foo*`, nil)
// empty intersection
// foo:foo AND foo:abc*
fa = &filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// reverse empty intersection
// foo:abc* AND foo:foo
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
f(`foo:foo AND foo:abc*`, nil)
f(`foo:abc* AND foo:foo`, nil)
// empty value
// foo:"" AND bar:""
fa = &filterAnd{
filters: []filter{
&filterExact{
fieldName: "foo",
value: "",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{5})
f(`foo:"" AND bar:""`, []int{5})
// non-existing field with empty value
// foo:foo* AND bar:""
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// reverse non-existing field with empty value
// bar:"" AND foo:foo*
fa = &filterAnd{
filters: []filter{
&filterExact{
fieldName: "bar",
value: "",
},
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
f(`foo:foo* AND bar:""`, []int{0, 1, 3, 4, 6})
f(`bar:"" AND foo:foo*`, []int{0, 1, 3, 4, 6})
// non-existing field with non-empty value
// foo:foo* AND bar:*
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// reverse non-existing field with non-empty value
// bar:* AND foo:foo*
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "bar",
prefix: "",
},
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
f(`foo:foo* AND bar:*`, nil)
f(`bar:* AND foo:foo*`, nil)
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
// foo:"a foo"* AND (foo:="a foobar" OR boo:bbbbbbb)
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "a foo",
},
&filterOr{
filters: []filter{
&filterExact{
fieldName: "foo",
value: "a foobar",
},
&filterExact{
fieldName: "boo",
value: "bbbbbbb",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{1})
f(`foo:"a foo"* AND (foo:="a foobar" OR boo:bbbbbbb)`, []int{1})
// foo:"a foo"* AND (foo:"abcd foobar" OR foo:foobar)
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "a foo",
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "abcd foobar",
},
&filterPhrase{
fieldName: "foo",
phrase: "foobar",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{1, 6})
f(`foo:"a foo"* AND (foo:"abcd foobar" OR foo:foobar)`, []int{1, 6})
f(`(foo:foo* OR bar:baz) AND (bar:x OR foo:a)`, []int{0, 1, 3, 4, 6})
f(`(foo:foo* OR bar:baz) AND (bar:x OR foo:xyz)`, nil)
f(`(foo:foo* OR bar:baz) AND (bar:* OR foo:xyz)`, nil)
f(`(foo:foo* OR bar:baz) AND (bar:"" OR foo:xyz)`, []int{0, 1, 3, 4, 6})
// (foo:foo* OR bar:baz) AND (bar:x OR foo:a)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "bar",
phrase: "x",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// (foo:foo* OR bar:baz) AND (bar:x OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "bar",
phrase: "x",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// (foo:foo* OR bar:baz) AND (bar:* OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "bar",
prefix: "",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// (foo:foo* OR bar:baz) AND (bar:"" OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterExact{
fieldName: "bar",
value: "",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// negative filters
f(`foo:foo* AND !foo:~bar`, []int{0})
f(`foo:foo* AND foo:!~bar`, []int{0})
}

View file

@ -126,6 +126,10 @@ func (fo *filterOr) getByFieldTokens() []fieldTokens {
}
func (fo *filterOr) initByFieldTokens() {
fo.byFieldTokens = getCommonTokensForOrFilters(fo.filters)
}
func getCommonTokensForOrFilters(filters []filter) []fieldTokens {
m := make(map[string][][]string)
var fieldNames []string
@ -141,7 +145,7 @@ func (fo *filterOr) initByFieldTokens() {
m[fieldName] = append(m[fieldName], tokens)
}
for _, f := range fo.filters {
for _, f := range filters {
switch t := f.(type) {
case *filterExact:
tokens := t.getTokens()
@ -166,28 +170,29 @@ func (fo *filterOr) initByFieldTokens() {
for _, bft := range bfts {
mergeFieldTokens(bft.field, bft.tokens)
}
default:
// Cannot extract tokens from this filter. This means that it is impossible to extract common tokens from OR filters.
return nil
}
}
var byFieldTokens []fieldTokens
for _, fieldName := range fieldNames {
tokenss := m[fieldName]
if len(tokenss) != len(fo.filters) {
if len(tokenss) != len(filters) {
// The filter for the given fieldName is missing in some OR filters,
// so it is impossible to extract common tokens from these filters.
// Give up extracting common tokens from the remaining filters,
// since they may not cover log entries matching fieldName filters.
// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
byFieldTokens = nil
break
return nil
}
commonTokens := getCommonTokens(tokenss)
if len(commonTokens) == 0 {
// Give up extracting common tokens from the remaining filters,
// since they may not cover log entries matching fieldName filters.
// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
byFieldTokens = nil
break
return nil
}
byFieldTokens = append(byFieldTokens, fieldTokens{
field: fieldName,
@ -196,5 +201,5 @@ func (fo *filterOr) initByFieldTokens() {
})
}
fo.byFieldTokens = byFieldTokens
return byFieldTokens
}

View file

@ -25,302 +25,48 @@ func TestFilterOr(t *testing.T) {
},
}
// non-empty union
// foo:23 OR foo:abc
fo := &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
f := func(qStr string, expectedRowIdxs []int) {
t.Helper()
// reverse non-empty union
// foo:abc OR foo:23
fo = &filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "abc",
},
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
},
q, err := ParseQuery(qStr)
if err != nil {
t.Fatalf("unexpected error in ParseQuery: %s", err)
}
testFilterMatchForColumns(t, columns, q.f, "foo", expectedRowIdxs)
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// non-empty union
f(`foo:23 OR foo:abc*`, []int{2, 6, 9})
f(`foo:abc* OR foo:23`, []int{2, 6, 9})
// first empty result, second non-empty result
// foo:xabc* OR foo:23
fo = &filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "xabc",
},
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
f(`foo:xabc* OR foo:23`, []int{9})
// first non-empty result, second empty result
// foo:23 OR foo:xabc*
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "foo",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
f(`foo:23 OR foo:xabc*`, []int{9})
// first match all
// foo:a OR foo:23
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
&filterPrefix{
fieldName: "foo",
prefix: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
f(`foo:a OR foo:23`, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// second match all
// foo:23 OR foo:a
fo = &filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "23",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
f(`foo:23 OR foo:a`, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// both empty results
// foo:x23 OR foo:xabc
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "x23",
},
&filterPrefix{
fieldName: "foo",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", nil)
f(`foo:x23 OR foo:xabc`, nil)
// non-existing column (last)
// foo:23 OR bar:xabc*
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "bar",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
f(`foo:23 OR bar:xabc*`, []int{9})
// non-existing column (first)
// bar:xabc* OR foo:23
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "bar",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
f(`bar:xabc* OR foo:23`, []int{9})
// (foo:23 AND bar:"") OR (foo:foo AND bar:*)
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
f(`(foo:23 AND bar:"") OR (foo:foo AND bar:*)`, []int{9})
f(`(foo:23 AND bar:"") OR (foo:foo AND bar:"")`, []int{0, 9})
f(`(foo:23 AND bar:"") OR (foo:foo AND baz:"")`, []int{0, 9})
f(`(foo:23 AND bar:abc) OR (foo:foo AND bar:"")`, []int{0})
f(`(foo:23 AND bar:abc) OR (foo:foo AND bar:*)`, nil)
// (foo:23 AND bar:"") OR (foo:foo AND bar:"")
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 9})
// (foo:23 AND bar:"") OR (foo:foo AND baz:"")
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "baz",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 9})
// (foo:23 AND bar:abc) OR (foo:foo AND bar:"")
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPhrase{
fieldName: "bar",
phrase: "abc",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0})
// (foo:23 AND bar:abc) OR (foo:foo AND bar:*)
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPhrase{
fieldName: "bar",
phrase: "abc",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", nil)
// negative filter
f(`foo:baz or !foo:~foo`, []int{2, 3, 5, 7, 8, 9})
f(`foo:baz or foo:!~foo`, []int{2, 3, 5, 7, 8, 9})
}