This commit is contained in:
Aliaksandr Valialkin 2024-04-29 04:15:14 +02:00
parent 70baaace98
commit 93c5f2f9bc
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
7 changed files with 394 additions and 372 deletions

View file

@ -24,143 +24,6 @@ type filter interface {
apply(bs *blockSearch, bm *bitmap) apply(bs *blockSearch, bm *bitmap)
} }
// filterNoop does nothing
type filterNoop struct {
}
func (fn *filterNoop) String() string {
return ""
}
func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) {
// nothing to do
}
// filterOr contains filters joined by OR operator.
//
// It is epxressed as `f1 OR f2 ... OR fN` in LogsQL.
type filterOr struct {
filters []filter
}
func (fo *filterOr) String() string {
filters := fo.filters
a := make([]string, len(filters))
for i, f := range filters {
s := f.String()
a[i] = s
}
return strings.Join(a, " or ")
}
func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
bmResult := getBitmap(bm.bitsLen)
bmTmp := getBitmap(bm.bitsLen)
for _, f := range fo.filters {
// Minimize the number of rows to check by the filter by checking only
// the rows, which may change the output bm:
// - bm matches them, e.g. the caller wants to get them
// - bmResult doesn't match them, e.g. all the previous OR filters didn't match them
bmTmp.copyFrom(bm)
bmTmp.andNot(bmResult)
if bmTmp.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result already matches all the values from the block.
break
}
f.apply(bs, bmTmp)
bmResult.or(bmTmp)
}
putBitmap(bmTmp)
bm.copyFrom(bmResult)
putBitmap(bmResult)
}
// filterAnd contains filters joined by AND opertor.
//
// It is expressed as `f1 AND f2 ... AND fN` in LogsQL.
type filterAnd struct {
filters []filter
msgTokensOnce sync.Once
msgTokens []string
}
func (fa *filterAnd) String() string {
filters := fa.filters
a := make([]string, len(filters))
for i, f := range filters {
s := f.String()
switch f.(type) {
case *filterOr:
s = "(" + s + ")"
}
a[i] = s
}
return strings.Join(a, " ")
}
func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
if tokens := fa.getMsgTokens(); len(tokens) > 0 {
// Verify whether fa tokens for the _msg field match bloom filter.
ch := bs.csh.getColumnHeader("_msg")
if ch == nil {
// Fast path - there is no _msg field in the block.
bm.resetBits()
return
}
if !matchBloomFilterAllTokens(bs, ch, tokens) {
// Fast path - fa tokens for the _msg field do not match bloom filter.
bm.resetBits()
return
}
}
// Slow path - verify every filter separately.
for _, f := range fa.filters {
f.apply(bs, bm)
if bm.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result will be zero anyway.
return
}
}
}
func (fa *filterAnd) getMsgTokens() []string {
fa.msgTokensOnce.Do(fa.initMsgTokens)
return fa.msgTokens
}
func (fa *filterAnd) initMsgTokens() {
var a []string
for _, f := range fa.filters {
switch t := f.(type) {
case *phraseFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
case *sequenceFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
case *exactFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
case *exactPrefixFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
case *prefixFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
}
}
fa.msgTokens = a
}
// notFilter negates the filter. // notFilter negates the filter.
// //
// It is expressed as `NOT f` or `!f` in LogsQL. // It is expressed as `NOT f` or `!f` in LogsQL.

View file

@ -0,0 +1,91 @@
package logstorage
import (
"strings"
"sync"
)
// filterAnd contains filters joined by AND opertor.
//
// It is expressed as `f1 AND f2 ... AND fN` in LogsQL.
type filterAnd struct {
filters []filter
msgTokensOnce sync.Once
msgTokens []string
}
func (fa *filterAnd) String() string {
filters := fa.filters
a := make([]string, len(filters))
for i, f := range filters {
s := f.String()
switch f.(type) {
case *filterOr:
s = "(" + s + ")"
}
a[i] = s
}
return strings.Join(a, " ")
}
func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
if tokens := fa.getMsgTokens(); len(tokens) > 0 {
// Verify whether fa tokens for the _msg field match bloom filter.
ch := bs.csh.getColumnHeader("_msg")
if ch == nil {
// Fast path - there is no _msg field in the block.
bm.resetBits()
return
}
if !matchBloomFilterAllTokens(bs, ch, tokens) {
// Fast path - fa tokens for the _msg field do not match bloom filter.
bm.resetBits()
return
}
}
// Slow path - verify every filter separately.
for _, f := range fa.filters {
f.apply(bs, bm)
if bm.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result will be zero anyway.
return
}
}
}
func (fa *filterAnd) getMsgTokens() []string {
fa.msgTokensOnce.Do(fa.initMsgTokens)
return fa.msgTokens
}
func (fa *filterAnd) initMsgTokens() {
var a []string
for _, f := range fa.filters {
switch t := f.(type) {
case *phraseFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
case *sequenceFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
case *exactFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
case *exactPrefixFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
case *prefixFilter:
if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...)
}
}
}
fa.msgTokens = a
}

View file

@ -0,0 +1,115 @@
package logstorage
import (
"testing"
)
func TestFilterAnd(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"a foo",
"a foobar",
"aa abc a",
"ca afdf a,foobar baz",
"a fddf foobarbaz",
"",
"a foobar abcdef",
"a kjlkjf dfff",
"a ТЕСТЙЦУК НГКШ ",
"a !!,23.(!1)",
},
},
}
// non-empty intersection
fa := &filterAnd{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// reverse non-empty intersection
fa = &filterAnd{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// the first filter mismatch
fa = &filterAnd{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "bc",
},
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// the last filter mismatch
fa = &filterAnd{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "abc",
},
&prefixFilter{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// empty intersection
fa = &filterAnd{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "foo",
},
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// reverse empty intersection
fa = &filterAnd{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
&phraseFilter{
fieldName: "foo",
phrase: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
}

View file

@ -0,0 +1,13 @@
package logstorage
// filterNoop does nothing
type filterNoop struct {
}
func (fn *filterNoop) String() string {
return ""
}
func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) {
// nothing to do
}

View file

@ -0,0 +1,45 @@
package logstorage
import (
"strings"
)
// filterOr contains filters joined by OR operator.
//
// It is epxressed as `f1 OR f2 ... OR fN` in LogsQL.
type filterOr struct {
filters []filter
}
func (fo *filterOr) String() string {
filters := fo.filters
a := make([]string, len(filters))
for i, f := range filters {
s := f.String()
a[i] = s
}
return strings.Join(a, " or ")
}
func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
bmResult := getBitmap(bm.bitsLen)
bmTmp := getBitmap(bm.bitsLen)
for _, f := range fo.filters {
// Minimize the number of rows to check by the filter by checking only
// the rows, which may change the output bm:
// - bm matches them, e.g. the caller wants to get them
// - bmResult doesn't match them, e.g. all the previous OR filters didn't match them
bmTmp.copyFrom(bm)
bmTmp.andNot(bmResult)
if bmTmp.isZero() {
// Shortcut - there is no need in applying the remaining filters,
// since the result already matches all the values from the block.
break
}
f.apply(bs, bmTmp)
bmResult.or(bmTmp)
}
putBitmap(bmTmp)
bm.copyFrom(bmResult)
putBitmap(bmResult)
}

View file

@ -0,0 +1,130 @@
package logstorage
import (
"testing"
)
func TestFilterOr(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"a foo",
"a foobar",
"aa abc a",
"ca afdf a,foobar baz",
"a fddf foobarbaz",
"a",
"a foobar abcdef",
"a kjlkjf dfff",
"a ТЕСТЙЦУК НГКШ ",
"a !!,23.(!1)",
},
},
}
// non-empty union
fo := &filterOr{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "23",
},
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// reverse non-empty union
fo = &filterOr{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
&phraseFilter{
fieldName: "foo",
phrase: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// first empty result, second non-empty result
fo = &filterOr{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "xabc",
},
&phraseFilter{
fieldName: "foo",
phrase: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// first non-empty result, second empty result
fo = &filterOr{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "23",
},
&prefixFilter{
fieldName: "foo",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// first match all
fo = &filterOr{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
&prefixFilter{
fieldName: "foo",
prefix: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// second match all
fo = &filterOr{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "23",
},
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// both empty results
fo = &filterOr{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "x23",
},
&prefixFilter{
fieldName: "foo",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", nil)
}

View file

@ -480,241 +480,6 @@ func TestComplexFilters(t *testing.T) {
testFilterMatchForColumns(t, columns, f, "foo", []int{1, 3, 6}) testFilterMatchForColumns(t, columns, f, "foo", []int{1, 3, 6})
} }
func TestOrFilter(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"a foo",
"a foobar",
"aa abc a",
"ca afdf a,foobar baz",
"a fddf foobarbaz",
"a",
"a foobar abcdef",
"a kjlkjf dfff",
"a ТЕСТЙЦУК НГКШ ",
"a !!,23.(!1)",
},
},
}
// non-empty union
fo := &filterOr{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "23",
},
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// reverse non-empty union
fo = &filterOr{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
&phraseFilter{
fieldName: "foo",
phrase: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
// first empty result, second non-empty result
fo = &filterOr{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "xabc",
},
&phraseFilter{
fieldName: "foo",
phrase: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// first non-empty result, second empty result
fo = &filterOr{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "23",
},
&prefixFilter{
fieldName: "foo",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
// first match all
fo = &filterOr{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
&prefixFilter{
fieldName: "foo",
prefix: "23",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// second match all
fo = &filterOr{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "23",
},
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
// both empty results
fo = &filterOr{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "x23",
},
&prefixFilter{
fieldName: "foo",
prefix: "xabc",
},
},
}
testFilterMatchForColumns(t, columns, fo, "foo", nil)
}
func TestAndFilter(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"a foo",
"a foobar",
"aa abc a",
"ca afdf a,foobar baz",
"a fddf foobarbaz",
"",
"a foobar abcdef",
"a kjlkjf dfff",
"a ТЕСТЙЦУК НГКШ ",
"a !!,23.(!1)",
},
},
}
// non-empty intersection
fa := &filterAnd{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// reverse non-empty intersection
fa = &filterAnd{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
// the first filter mismatch
fa = &filterAnd{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "bc",
},
&phraseFilter{
fieldName: "foo",
phrase: "a",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// the last filter mismatch
fa = &filterAnd{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "abc",
},
&prefixFilter{
fieldName: "foo",
prefix: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// empty intersection
fa = &filterAnd{
filters: []filter{
&phraseFilter{
fieldName: "foo",
phrase: "foo",
},
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
// reverse empty intersection
fa = &filterAnd{
filters: []filter{
&prefixFilter{
fieldName: "foo",
prefix: "abc",
},
&phraseFilter{
fieldName: "foo",
phrase: "foo",
},
},
}
testFilterMatchForColumns(t, columns, fa, "foo", nil)
}
func TestNotFilter(t *testing.T) { func TestNotFilter(t *testing.T) {
columns := []column{ columns := []column{
{ {