diff --git a/lib/logstorage/filter.go b/lib/logstorage/filter.go index ae15dad64..ecee1de7a 100644 --- a/lib/logstorage/filter.go +++ b/lib/logstorage/filter.go @@ -24,143 +24,6 @@ type filter interface { apply(bs *blockSearch, bm *bitmap) } -// filterNoop does nothing -type filterNoop struct { -} - -func (fn *filterNoop) String() string { - return "" -} - -func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) { - // nothing to do -} - -// filterOr contains filters joined by OR operator. -// -// It is epxressed as `f1 OR f2 ... OR fN` in LogsQL. -type filterOr struct { - filters []filter -} - -func (fo *filterOr) String() string { - filters := fo.filters - a := make([]string, len(filters)) - for i, f := range filters { - s := f.String() - a[i] = s - } - return strings.Join(a, " or ") -} - -func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) { - bmResult := getBitmap(bm.bitsLen) - bmTmp := getBitmap(bm.bitsLen) - for _, f := range fo.filters { - // Minimize the number of rows to check by the filter by checking only - // the rows, which may change the output bm: - // - bm matches them, e.g. the caller wants to get them - // - bmResult doesn't match them, e.g. all the previous OR filters didn't match them - bmTmp.copyFrom(bm) - bmTmp.andNot(bmResult) - if bmTmp.isZero() { - // Shortcut - there is no need in applying the remaining filters, - // since the result already matches all the values from the block. - break - } - f.apply(bs, bmTmp) - bmResult.or(bmTmp) - } - putBitmap(bmTmp) - bm.copyFrom(bmResult) - putBitmap(bmResult) -} - -// filterAnd contains filters joined by AND opertor. -// -// It is expressed as `f1 AND f2 ... AND fN` in LogsQL. -type filterAnd struct { - filters []filter - - msgTokensOnce sync.Once - msgTokens []string -} - -func (fa *filterAnd) String() string { - filters := fa.filters - a := make([]string, len(filters)) - for i, f := range filters { - s := f.String() - switch f.(type) { - case *filterOr: - s = "(" + s + ")" - } - a[i] = s - } - return strings.Join(a, " ") -} - -func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) { - if tokens := fa.getMsgTokens(); len(tokens) > 0 { - // Verify whether fa tokens for the _msg field match bloom filter. - ch := bs.csh.getColumnHeader("_msg") - if ch == nil { - // Fast path - there is no _msg field in the block. - bm.resetBits() - return - } - if !matchBloomFilterAllTokens(bs, ch, tokens) { - // Fast path - fa tokens for the _msg field do not match bloom filter. - bm.resetBits() - return - } - } - - // Slow path - verify every filter separately. - for _, f := range fa.filters { - f.apply(bs, bm) - if bm.isZero() { - // Shortcut - there is no need in applying the remaining filters, - // since the result will be zero anyway. - return - } - } -} - -func (fa *filterAnd) getMsgTokens() []string { - fa.msgTokensOnce.Do(fa.initMsgTokens) - return fa.msgTokens -} - -func (fa *filterAnd) initMsgTokens() { - var a []string - for _, f := range fa.filters { - switch t := f.(type) { - case *phraseFilter: - if isMsgFieldName(t.fieldName) { - a = append(a, t.getTokens()...) - } - case *sequenceFilter: - if isMsgFieldName(t.fieldName) { - a = append(a, t.getTokens()...) - } - case *exactFilter: - if isMsgFieldName(t.fieldName) { - a = append(a, t.getTokens()...) - } - case *exactPrefixFilter: - if isMsgFieldName(t.fieldName) { - a = append(a, t.getTokens()...) - } - case *prefixFilter: - if isMsgFieldName(t.fieldName) { - a = append(a, t.getTokens()...) - } - } - } - fa.msgTokens = a -} - // notFilter negates the filter. // // It is expressed as `NOT f` or `!f` in LogsQL. diff --git a/lib/logstorage/filter_and.go b/lib/logstorage/filter_and.go new file mode 100644 index 000000000..3b639303e --- /dev/null +++ b/lib/logstorage/filter_and.go @@ -0,0 +1,91 @@ +package logstorage + +import ( + "strings" + "sync" +) + +// filterAnd contains filters joined by AND opertor. +// +// It is expressed as `f1 AND f2 ... AND fN` in LogsQL. +type filterAnd struct { + filters []filter + + msgTokensOnce sync.Once + msgTokens []string +} + +func (fa *filterAnd) String() string { + filters := fa.filters + a := make([]string, len(filters)) + for i, f := range filters { + s := f.String() + switch f.(type) { + case *filterOr: + s = "(" + s + ")" + } + a[i] = s + } + return strings.Join(a, " ") +} + +func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) { + if tokens := fa.getMsgTokens(); len(tokens) > 0 { + // Verify whether fa tokens for the _msg field match bloom filter. + ch := bs.csh.getColumnHeader("_msg") + if ch == nil { + // Fast path - there is no _msg field in the block. + bm.resetBits() + return + } + if !matchBloomFilterAllTokens(bs, ch, tokens) { + // Fast path - fa tokens for the _msg field do not match bloom filter. + bm.resetBits() + return + } + } + + // Slow path - verify every filter separately. + for _, f := range fa.filters { + f.apply(bs, bm) + if bm.isZero() { + // Shortcut - there is no need in applying the remaining filters, + // since the result will be zero anyway. + return + } + } +} + +func (fa *filterAnd) getMsgTokens() []string { + fa.msgTokensOnce.Do(fa.initMsgTokens) + return fa.msgTokens +} + +func (fa *filterAnd) initMsgTokens() { + var a []string + for _, f := range fa.filters { + switch t := f.(type) { + case *phraseFilter: + if isMsgFieldName(t.fieldName) { + a = append(a, t.getTokens()...) + } + case *sequenceFilter: + if isMsgFieldName(t.fieldName) { + a = append(a, t.getTokens()...) + } + case *exactFilter: + if isMsgFieldName(t.fieldName) { + a = append(a, t.getTokens()...) + } + case *exactPrefixFilter: + if isMsgFieldName(t.fieldName) { + a = append(a, t.getTokens()...) + } + case *prefixFilter: + if isMsgFieldName(t.fieldName) { + a = append(a, t.getTokens()...) + } + } + } + fa.msgTokens = a +} diff --git a/lib/logstorage/filter_and_test.go b/lib/logstorage/filter_and_test.go new file mode 100644 index 000000000..6533756ca --- /dev/null +++ b/lib/logstorage/filter_and_test.go @@ -0,0 +1,115 @@ +package logstorage + +import ( + "testing" +) + +func TestFilterAnd(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "", + "a foobar abcdef", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // non-empty intersection + fa := &filterAnd{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + }, + } + testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6}) + + // reverse non-empty intersection + fa = &filterAnd{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + }, + } + testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6}) + + // the first filter mismatch + fa = &filterAnd{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "bc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + }, + } + testFilterMatchForColumns(t, columns, fa, "foo", nil) + + // the last filter mismatch + fa = &filterAnd{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "abc", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "foo", + }, + }, + } + testFilterMatchForColumns(t, columns, fa, "foo", nil) + + // empty intersection + fa = &filterAnd{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "foo", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + }, + } + testFilterMatchForColumns(t, columns, fa, "foo", nil) + + // reverse empty intersection + fa = &filterAnd{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "foo", + }, + }, + } + testFilterMatchForColumns(t, columns, fa, "foo", nil) +} diff --git a/lib/logstorage/filter_noop.go b/lib/logstorage/filter_noop.go new file mode 100644 index 000000000..d9bbb5775 --- /dev/null +++ b/lib/logstorage/filter_noop.go @@ -0,0 +1,13 @@ +package logstorage + +// filterNoop does nothing +type filterNoop struct { +} + +func (fn *filterNoop) String() string { + return "" +} + +func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) { + // nothing to do +} diff --git a/lib/logstorage/filter_or.go b/lib/logstorage/filter_or.go new file mode 100644 index 000000000..564fa5618 --- /dev/null +++ b/lib/logstorage/filter_or.go @@ -0,0 +1,45 @@ +package logstorage + +import ( + "strings" +) + +// filterOr contains filters joined by OR operator. +// +// It is epxressed as `f1 OR f2 ... OR fN` in LogsQL. +type filterOr struct { + filters []filter +} + +func (fo *filterOr) String() string { + filters := fo.filters + a := make([]string, len(filters)) + for i, f := range filters { + s := f.String() + a[i] = s + } + return strings.Join(a, " or ") +} + +func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) { + bmResult := getBitmap(bm.bitsLen) + bmTmp := getBitmap(bm.bitsLen) + for _, f := range fo.filters { + // Minimize the number of rows to check by the filter by checking only + // the rows, which may change the output bm: + // - bm matches them, e.g. the caller wants to get them + // - bmResult doesn't match them, e.g. all the previous OR filters didn't match them + bmTmp.copyFrom(bm) + bmTmp.andNot(bmResult) + if bmTmp.isZero() { + // Shortcut - there is no need in applying the remaining filters, + // since the result already matches all the values from the block. + break + } + f.apply(bs, bmTmp) + bmResult.or(bmTmp) + } + putBitmap(bmTmp) + bm.copyFrom(bmResult) + putBitmap(bmResult) +} diff --git a/lib/logstorage/filter_or_test.go b/lib/logstorage/filter_or_test.go new file mode 100644 index 000000000..d270a8afe --- /dev/null +++ b/lib/logstorage/filter_or_test.go @@ -0,0 +1,130 @@ +package logstorage + +import ( + "testing" +) + +func TestFilterOr(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a", + "a foobar abcdef", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // non-empty union + fo := &filterOr{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "23", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + }, + } + testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9}) + + // reverse non-empty union + fo = &filterOr{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "abc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "23", + }, + }, + } + testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9}) + + // first empty result, second non-empty result + fo = &filterOr{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "xabc", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "23", + }, + }, + } + testFilterMatchForColumns(t, columns, fo, "foo", []int{9}) + + // first non-empty result, second empty result + fo = &filterOr{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "23", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "xabc", + }, + }, + } + testFilterMatchForColumns(t, columns, fo, "foo", []int{9}) + + // first match all + fo = &filterOr{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "23", + }, + }, + } + testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // second match all + fo = &filterOr{ + filters: []filter{ + &prefixFilter{ + fieldName: "foo", + prefix: "23", + }, + &phraseFilter{ + fieldName: "foo", + phrase: "a", + }, + }, + } + testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // both empty results + fo = &filterOr{ + filters: []filter{ + &phraseFilter{ + fieldName: "foo", + phrase: "x23", + }, + &prefixFilter{ + fieldName: "foo", + prefix: "xabc", + }, + }, + } + testFilterMatchForColumns(t, columns, fo, "foo", nil) +} diff --git a/lib/logstorage/filter_test.go b/lib/logstorage/filter_test.go index 137b8b924..88df031d2 100644 --- a/lib/logstorage/filter_test.go +++ b/lib/logstorage/filter_test.go @@ -480,241 +480,6 @@ func TestComplexFilters(t *testing.T) { testFilterMatchForColumns(t, columns, f, "foo", []int{1, 3, 6}) } -func TestOrFilter(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "a foo", - "a foobar", - "aa abc a", - "ca afdf a,foobar baz", - "a fddf foobarbaz", - "a", - "a foobar abcdef", - "a kjlkjf dfff", - "a ТЕСТЙЦУК НГКШ ", - "a !!,23.(!1)", - }, - }, - } - - // non-empty union - fo := &filterOr{ - filters: []filter{ - &phraseFilter{ - fieldName: "foo", - phrase: "23", - }, - &prefixFilter{ - fieldName: "foo", - prefix: "abc", - }, - }, - } - testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9}) - - // reverse non-empty union - fo = &filterOr{ - filters: []filter{ - &prefixFilter{ - fieldName: "foo", - prefix: "abc", - }, - &phraseFilter{ - fieldName: "foo", - phrase: "23", - }, - }, - } - testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9}) - - // first empty result, second non-empty result - fo = &filterOr{ - filters: []filter{ - &prefixFilter{ - fieldName: "foo", - prefix: "xabc", - }, - &phraseFilter{ - fieldName: "foo", - phrase: "23", - }, - }, - } - testFilterMatchForColumns(t, columns, fo, "foo", []int{9}) - - // first non-empty result, second empty result - fo = &filterOr{ - filters: []filter{ - &phraseFilter{ - fieldName: "foo", - phrase: "23", - }, - &prefixFilter{ - fieldName: "foo", - prefix: "xabc", - }, - }, - } - testFilterMatchForColumns(t, columns, fo, "foo", []int{9}) - - // first match all - fo = &filterOr{ - filters: []filter{ - &phraseFilter{ - fieldName: "foo", - phrase: "a", - }, - &prefixFilter{ - fieldName: "foo", - prefix: "23", - }, - }, - } - testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - // second match all - fo = &filterOr{ - filters: []filter{ - &prefixFilter{ - fieldName: "foo", - prefix: "23", - }, - &phraseFilter{ - fieldName: "foo", - phrase: "a", - }, - }, - } - testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - // both empty results - fo = &filterOr{ - filters: []filter{ - &phraseFilter{ - fieldName: "foo", - phrase: "x23", - }, - &prefixFilter{ - fieldName: "foo", - prefix: "xabc", - }, - }, - } - testFilterMatchForColumns(t, columns, fo, "foo", nil) -} - -func TestAndFilter(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "a foo", - "a foobar", - "aa abc a", - "ca afdf a,foobar baz", - "a fddf foobarbaz", - "", - "a foobar abcdef", - "a kjlkjf dfff", - "a ТЕСТЙЦУК НГКШ ", - "a !!,23.(!1)", - }, - }, - } - - // non-empty intersection - fa := &filterAnd{ - filters: []filter{ - &phraseFilter{ - fieldName: "foo", - phrase: "a", - }, - &prefixFilter{ - fieldName: "foo", - prefix: "abc", - }, - }, - } - testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6}) - - // reverse non-empty intersection - fa = &filterAnd{ - filters: []filter{ - &prefixFilter{ - fieldName: "foo", - prefix: "abc", - }, - &phraseFilter{ - fieldName: "foo", - phrase: "a", - }, - }, - } - testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6}) - - // the first filter mismatch - fa = &filterAnd{ - filters: []filter{ - &prefixFilter{ - fieldName: "foo", - prefix: "bc", - }, - &phraseFilter{ - fieldName: "foo", - phrase: "a", - }, - }, - } - testFilterMatchForColumns(t, columns, fa, "foo", nil) - - // the last filter mismatch - fa = &filterAnd{ - filters: []filter{ - &phraseFilter{ - fieldName: "foo", - phrase: "abc", - }, - &prefixFilter{ - fieldName: "foo", - prefix: "foo", - }, - }, - } - testFilterMatchForColumns(t, columns, fa, "foo", nil) - - // empty intersection - fa = &filterAnd{ - filters: []filter{ - &phraseFilter{ - fieldName: "foo", - phrase: "foo", - }, - &prefixFilter{ - fieldName: "foo", - prefix: "abc", - }, - }, - } - testFilterMatchForColumns(t, columns, fa, "foo", nil) - - // reverse empty intersection - fa = &filterAnd{ - filters: []filter{ - &prefixFilter{ - fieldName: "foo", - prefix: "abc", - }, - &phraseFilter{ - fieldName: "foo", - phrase: "foo", - }, - }, - } - testFilterMatchForColumns(t, columns, fa, "foo", nil) -} - func TestNotFilter(t *testing.T) { columns := []column{ {