lib/logstorage: properly fix incorrect extraction of common tokens for OR filters at distinct log fields

Previously (f1:foo OR f2:bar) was incorrectly returning `foo` token for `f1` and `bar` token for `f2`.
These tokens were used for checking against bloom filter for every data block, so the data block,
which didn't contain simultaneously `foo` token for `f1` field and `bar` token for `f2` field, was skipped.
This was incorrect, since such a block may contain logs matching the original OR filter.

The fix is to return common tokens from `OR`-delimted filters only if these tokens exist at EVERY such filter
for the given field name. If some `OR`-delimited filter misses the given field name, then `OR`-delimited filters
do not contain common tokens, which could be used for checking against bloom filter.

While at it, add more tests covering various edge cases for filters delimited by AND and OR.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6556
This commit is contained in:
Aliaksandr Valialkin 2024-09-05 14:14:57 +02:00
parent cf7c7bc7fa
commit 2dd845fa53
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 457 additions and 12 deletions

View file

@ -29,7 +29,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
* BUGFIX: properly handle Logstash requests for Elasticsearch configuration when using `outputs.elasticsearch` in Logstash pipelines. Previously, the requests could be rejected with `400 Bad Request` response. Updates [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4750).
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix `not found index.js` error when loading vmui in VictoriaLogs. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6764). Thanks to @yincongcyincong for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6770).
* BUGFIX: fix filtering when logical operators `AND` and `OR` are used in query expression. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554) for details. Thanks to @yincongcyincong for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6556).
* BUGFIX: properly execute queries with `OR` [filters](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter) for distinct [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). For example, `field1:foo OR field2:bar`. Previously logs matching these filters may be skipped during querying. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554) for details. Thanks to @yincongcyincong for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6556).
## [v0.28.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.28.0-victorialogs)

View file

@ -112,10 +112,6 @@ func (fa *filterAnd) getByFieldTokens() []fieldTokens {
return fa.byFieldTokens
}
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
// and filter shouldn't return or filter which result in
// bloom filter execute error interception.
// detail see: https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6556#issuecomment-2323643507
func (fa *filterAnd) initByFieldTokens() {
m := make(map[string]map[string]struct{})
var fieldNames []string
@ -157,6 +153,11 @@ func (fa *filterAnd) initByFieldTokens() {
case *filterSequence:
tokens := t.getTokens()
mergeFieldTokens(t.fieldName, tokens)
case *filterOr:
bfts := t.getByFieldTokens()
for _, bft := range bfts {
mergeFieldTokens(bft.field, bft.tokens)
}
}
}

View file

@ -26,6 +26,7 @@ func TestFilterAnd(t *testing.T) {
}
// non-empty intersection
// foo:a AND foo:abc*
fa := &filterAnd{
filters: []filter{
&filterPhrase{
@ -41,6 +42,7 @@ func TestFilterAnd(t *testing.T) {
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// reverse non-empty intersection
// foo:abc* AND foo:a
fa = &filterAnd{
filters: []filter{
&filterPrefix{
@ -56,6 +58,7 @@ func TestFilterAnd(t *testing.T) {
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// the first filter mismatch
// foo:bc* AND foo:a
fa = &filterAnd{
filters: []filter{
&filterPrefix{
@ -71,6 +74,7 @@ func TestFilterAnd(t *testing.T) {
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// the last filter mismatch
// foo:abc AND foo:foo*
fa = &filterAnd{
filters: []filter{
&filterPhrase{
@ -86,6 +90,7 @@ func TestFilterAnd(t *testing.T) {
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// empty intersection
// foo:foo AND foo:abc*
fa = &filterAnd{
filters: []filter{
&filterPhrase{
@ -101,6 +106,7 @@ func TestFilterAnd(t *testing.T) {
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// reverse empty intersection
// foo:abc* AND foo:foo
fa = &filterAnd{
filters: []filter{
&filterPrefix{
@ -115,6 +121,88 @@ func TestFilterAnd(t *testing.T) {
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// empty value
// foo:"" AND bar:""
fa = &filterAnd{
filters: []filter{
&filterExact{
fieldName: "foo",
value: "",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{5})
// non-existing field with empty value
// foo:foo* AND bar:""
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// reverse non-existing field with empty value
// bar:"" AND foo:foo*
fa = &filterAnd{
filters: []filter{
&filterExact{
fieldName: "bar",
value: "",
},
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// non-existing field with non-empty value
// foo:foo* AND bar:*
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// reverse non-existing field with non-empty value
// bar:* AND foo:foo*
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "bar",
prefix: "",
},
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
// foo:"a foo"* AND (foo:="a foobar" OR boo:bbbbbbb)
fa = &filterAnd{
filters: []filter{
&filterPrefix{
@ -136,4 +224,151 @@ func TestFilterAnd(t *testing.T) {
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{1})
// foo:"a foo"* AND (foo:"abcd foobar" OR foo:foobar)
fa = &filterAnd{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "a foo",
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "abcd foobar",
},
&filterPhrase{
fieldName: "foo",
phrase: "foobar",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{1, 6})
// (foo:foo* OR bar:baz) AND (bar:x OR foo:a)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "bar",
phrase: "x",
},
&filterPhrase{
fieldName: "foo",
phrase: "a",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
// (foo:foo* OR bar:baz) AND (bar:x OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPhrase{
fieldName: "bar",
phrase: "x",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// (foo:foo* OR bar:baz) AND (bar:* OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "bar",
prefix: "",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// (foo:foo* OR bar:baz) AND (bar:"" OR foo:xyz)
fa = &filterAnd{
filters: []filter{
&filterOr{
filters: []filter{
&filterPrefix{
fieldName: "foo",
prefix: "foo",
},
&filterPhrase{
fieldName: "bar",
phrase: "baz",
},
},
},
&filterOr{
filters: []filter{
&filterExact{
fieldName: "bar",
value: "",
},
&filterPhrase{
fieldName: "foo",
phrase: "xyz",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{0, 1, 3, 4, 6})
}

View file

@ -171,14 +171,29 @@ func (fo *filterOr) initByFieldTokens() {
var byFieldTokens []fieldTokens
for _, fieldName := range fieldNames {
commonTokens := getCommonTokens(m[fieldName])
if len(commonTokens) > 0 {
tokenss := m[fieldName]
if len(tokenss) != len(fo.filters) {
// The filter for the given fieldName is missing in some OR filters,
// so it is impossible to extract common tokens from these filters.
// Give up extracting common tokens from the remaining filters,
// since they may not cover log entries matching fieldName filters.
// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
byFieldTokens = nil
break
}
commonTokens := getCommonTokens(tokenss)
if len(commonTokens) == 0 {
// Give up extracting common tokens from the remaining filters,
// since they may not cover log entries matching fieldName filters.
// This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554
byFieldTokens = nil
break
}
byFieldTokens = append(byFieldTokens, fieldTokens{
field: fieldName,
tokens: commonTokens,
})
}
}
fo.byFieldTokens = byFieldTokens
}

View file

@ -26,6 +26,7 @@ func TestFilterOr(t *testing.T) {
}
// non-empty union
// foo:23 OR foo:abc
fo := &filterOr{
filters: []filter{
&filterPhrase{
@ -41,6 +42,7 @@ func TestFilterOr(t *testing.T) {
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// reverse non-empty union
// foo:abc OR foo:23
fo = &filterOr{
filters: []filter{
&filterPrefix{
@ -56,6 +58,7 @@ func TestFilterOr(t *testing.T) {
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// first empty result, second non-empty result
// foo:xabc* OR foo:23
fo = &filterOr{
filters: []filter{
&filterPrefix{
@ -71,6 +74,7 @@ func TestFilterOr(t *testing.T) {
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// first non-empty result, second empty result
// foo:23 OR foo:xabc*
fo = &filterOr{
filters: []filter{
&filterPhrase{
@ -86,6 +90,7 @@ func TestFilterOr(t *testing.T) {
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// first match all
// foo:a OR foo:23
fo = &filterOr{
filters: []filter{
&filterPhrase{
@ -101,6 +106,7 @@ func TestFilterOr(t *testing.T) {
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// second match all
// foo:23 OR foo:a
fo = &filterOr{
filters: []filter{
&filterPrefix{
@ -116,6 +122,7 @@ func TestFilterOr(t *testing.T) {
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// both empty results
// foo:x23 OR foo:xabc
fo = &filterOr{
filters: []filter{
&filterPhrase{
@ -129,4 +136,191 @@ func TestFilterOr(t *testing.T) {
},
}
testFilterMatchForColumns(t, columns, fo, "foo", nil)
// non-existing column (last)
// foo:23 OR bar:xabc*
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "bar",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// non-existing column (first)
// bar:xabc* OR foo:23
fo = &filterOr{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPrefix{
fieldName: "bar",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// (foo:23 AND bar:"") OR (foo:foo AND bar:*)
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// (foo:23 AND bar:"") OR (foo:foo AND bar:"")
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 9})
// (foo:23 AND bar:"") OR (foo:foo AND baz:"")
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "baz",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 9})
// (foo:23 AND bar:abc) OR (foo:foo AND bar:"")
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPhrase{
fieldName: "bar",
phrase: "abc",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterExact{
fieldName: "bar",
value: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0})
// (foo:23 AND bar:abc) OR (foo:foo AND bar:*)
fo = &filterOr{
filters: []filter{
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "23",
},
&filterPhrase{
fieldName: "bar",
phrase: "abc",
},
},
},
&filterAnd{
filters: []filter{
&filterPhrase{
fieldName: "foo",
phrase: "foo",
},
&filterPrefix{
fieldName: "bar",
prefix: "",
},
},
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", nil)
}