diff --git a/lib/logstorage/filter_and.go b/lib/logstorage/filter_and.go index 131050ed3..cf0fa8db2 100644 --- a/lib/logstorage/filter_and.go +++ b/lib/logstorage/filter_and.go @@ -118,7 +118,7 @@ func (fa *filterAnd) initByFieldTokens() { } func getCommonTokensForAndFilters(filters []filter) []fieldTokens { - m := make(map[string]map[string]struct{}) + m := make(map[string][]string) var fieldNames []string mergeFieldTokens := func(fieldName string, tokens []string) { @@ -127,15 +127,10 @@ func getCommonTokensForAndFilters(filters []filter) []fieldTokens { } fieldName = getCanonicalColumnName(fieldName) - mTokens, ok := m[fieldName] - if !ok { + if _, ok := m[fieldName]; !ok { fieldNames = append(fieldNames, fieldName) - mTokens = make(map[string]struct{}) - m[fieldName] = mTokens - } - for _, token := range tokens { - mTokens[token] = struct{}{} } + m[fieldName] = append(m[fieldName], tokens...) } for _, f := range filters { @@ -169,8 +164,13 @@ func getCommonTokensForAndFilters(filters []filter) []fieldTokens { var byFieldTokens []fieldTokens for _, fieldName := range fieldNames { mTokens := m[fieldName] + seenTokens := make(map[string]struct{}) tokens := make([]string, 0, len(mTokens)) - for token := range mTokens { + for _, token := range mTokens { + if _, ok := seenTokens[token]; ok { + continue + } + seenTokens[token] = struct{}{} tokens = append(tokens, token) } diff --git a/lib/logstorage/filter_and_test.go b/lib/logstorage/filter_and_test.go index 41dd2d19f..223fffb7a 100644 --- a/lib/logstorage/filter_and_test.go +++ b/lib/logstorage/filter_and_test.go @@ -1,6 +1,7 @@ package logstorage import ( + "reflect" "testing" ) @@ -75,3 +76,92 @@ func TestFilterAnd(t *testing.T) { f(`foo:foo* AND !foo:~bar`, []int{0}) f(`foo:foo* AND foo:!~bar`, []int{0}) } + +func TestGetCommonTokensForAndFilters(t *testing.T) { + f := func(qStr string, tokensExpected []fieldTokens) { + t.Helper() + + q, err := ParseQuery(qStr) + if err != nil { + t.Fatalf("unexpected error in ParseQuery: %s", err) + } + fa, ok := q.f.(*filterAnd) + if !ok { + t.Fatalf("unexpected filter type: %T; want *filterAnd", q.f) + } + tokens := getCommonTokensForAndFilters(fa.filters) + + if len(tokens) != len(tokensExpected) { + t.Fatalf("unexpected len(tokens); got %d; want %d\ntokens\n%#v\ntokensExpected\n%#v", len(tokens), len(tokensExpected), tokens, tokensExpected) + } + for i, ft := range tokens { + ftExpected := tokensExpected[i] + if ft.field != ftExpected.field { + t.Fatalf("unexpected field; got %q; want %q\ntokens\n%q\ntokensExpected\n%q", ft.field, ftExpected.field, ft.tokens, ftExpected.tokens) + } + if !reflect.DeepEqual(ft.tokens, ftExpected.tokens) { + t.Fatalf("unexpected tokens for field %q; got %q; want %q", ft.field, ft.tokens, ftExpected.tokens) + } + } + } + + f(`foo AND bar`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"foo", "bar"}, + }, + }) + + f(`="foo bar" AND ="a foo"* AND "bar foo" AND "foo bar"* AND ~"foo qwe bar.+" AND seq(x, bar, "foo qwe")`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"foo", "bar", "a", "qwe", "x"}, + }, + }) + + // extract common tokens from OR filters + f(`foo AND (bar OR ~"x bar baz")`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"foo", "bar"}, + }, + }) + + // star matches any non-empty token, so it is skipped + f(`foo bar *`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"foo", "bar"}, + }, + }) + f(`* *`, nil) + + // empty filter must be skipped + f(`foo "" bar`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"foo", "bar"}, + }, + }) + f(`"" ""`, nil) + + // unknown filters must be skipped + f(`_time:5m !foo "bar baz" x`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"bar", "baz", "x"}, + }, + }) + + // distinct field names + f(`foo:x bar:"a bc" (foo:y OR (bar:qwe AND foo:"z y a"))`, []fieldTokens{ + { + field: "foo", + tokens: []string{"x", "y"}, + }, + { + field: "bar", + tokens: []string{"a", "bc"}, + }, + }) +} diff --git a/lib/logstorage/filter_or.go b/lib/logstorage/filter_or.go index ab268f745..39e49a0d6 100644 --- a/lib/logstorage/filter_or.go +++ b/lib/logstorage/filter_or.go @@ -182,17 +182,11 @@ func getCommonTokensForOrFilters(filters []filter) []fieldTokens { if len(tokenss) != len(filters) { // The filter for the given fieldName is missing in some OR filters, // so it is impossible to extract common tokens from these filters. - // Give up extracting common tokens from the remaining filters, - // since they may not cover log entries matching fieldName filters. - // This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554 - return nil + continue } commonTokens := getCommonTokens(tokenss) if len(commonTokens) == 0 { - // Give up extracting common tokens from the remaining filters, - // since they may not cover log entries matching fieldName filters. - // This fixes https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6554 - return nil + continue } byFieldTokens = append(byFieldTokens, fieldTokens{ field: fieldName, diff --git a/lib/logstorage/filter_or_test.go b/lib/logstorage/filter_or_test.go index c7bc860ee..371941a5c 100644 --- a/lib/logstorage/filter_or_test.go +++ b/lib/logstorage/filter_or_test.go @@ -1,6 +1,7 @@ package logstorage import ( + "reflect" "testing" ) @@ -70,3 +71,100 @@ func TestFilterOr(t *testing.T) { f(`foo:baz or !foo:~foo`, []int{2, 3, 5, 7, 8, 9}) f(`foo:baz or foo:!~foo`, []int{2, 3, 5, 7, 8, 9}) } + +func TestGetCommonTokensForOrFilters(t *testing.T) { + f := func(qStr string, tokensExpected []fieldTokens) { + t.Helper() + + q, err := ParseQuery(qStr) + if err != nil { + t.Fatalf("unexpected error in ParseQuery: %s", err) + } + fo, ok := q.f.(*filterOr) + if !ok { + t.Fatalf("unexpected filter type: %T; want *filterOr", q.f) + } + tokens := getCommonTokensForOrFilters(fo.filters) + + if len(tokens) != len(tokensExpected) { + t.Fatalf("unexpected len(tokens); got %d; want %d\ntokens\n%#v\ntokensExpected\n%#v", len(tokens), len(tokensExpected), tokens, tokensExpected) + } + for i, ft := range tokens { + ftExpected := tokensExpected[i] + if ft.field != ftExpected.field { + t.Fatalf("unexpected field; got %q; want %q\ntokens\n%q\ntokensExpected\n%q", ft.field, ftExpected.field, ft.tokens, ftExpected.tokens) + } + if !reflect.DeepEqual(ft.tokens, ftExpected.tokens) { + t.Fatalf("unexpected tokens for field %q; got %q; want %q", ft.field, ft.tokens, ftExpected.tokens) + } + } + } + + // no common tokens + f(`foo OR bar`, nil) + + // star filter matches non-empty value; it is skipped, since one of OR filters may contain an empty filter + f(`* OR foo`, nil) + f(`foo or *`, nil) + f(`* or *`, nil) + f(`"" or * or foo`, nil) + + // empty filter suppresses all the common tokens. + f(`"" or foo or "foo bar"`, nil) + f(`foo or ""`, nil) + f(`"" or ""`, nil) + + // common foo token + f(`foo OR "foo bar" OR ="a foo" OR ="foo bar"* OR "bar foo "* OR seq("bar foo", "baz") OR ~"a.+ foo bar"`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"foo"}, + }, + }) + + // regexp ending on foo token doesn't contain foo token, since it may match foobar. + f(`foo OR "foo bar" OR ="a foo" OR ="foo bar"* OR "bar foo "* OR seq("bar foo", "baz") OR ~"a.+ foo"`, nil) + + // regexp starting from foo token doesn't contain foo token, since it may match barfoo. + f(`foo OR "foo bar" OR ="a foo" OR ="foo bar"* OR "bar foo "* OR seq("bar foo", "baz") OR ~"foo bar"`, nil) + + // prefix filter ending on foo doesn't contain foo token, since it may match foobar. + f(`foo OR "foo bar" OR ="a foo" OR ="foo bar"* OR "bar foo"* OR seq("bar foo", "baz") OR ~"a.+ foo bar"`, nil) + + // bar and foo are common tokens + f(`"bar foo baz" OR (foo AND "bar x" AND foobar)`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"bar", "foo"}, + }, + }) + + // bar and foo are common tokens, x:foobar should be ignored, since it doesn't present in every OR filter + f(`"bar foo baz" OR (foo AND "bar x" AND x:foobar)`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"bar", "foo"}, + }, + }) + + // common tokens for distinct fields + f(`(foo AND x:a) OR (x:"b a c" AND ~"aaa foo ")`, []fieldTokens{ + { + field: "_msg", + tokens: []string{"foo"}, + }, + { + field: "x", + tokens: []string{"a"}, + }, + }) + + // zero common tokens for distinct fields + f(`(foo AND x:a) OR (x:"b c" AND ~"aaa foo" AND y:z)`, nil) + + // negative filter removes all the matching + f(`foo OR !"foo bar"`, nil) + + // time filter removes all the matching + f(`_time:5m or foo`, nil) +}