wip

2024-12-31 15:06:26 +00:00 · 2024-04-29 04:15:14 +02:00 · 2024-04-29 04:15:14 +02:00 · 93c5f2f9bc
commit 93c5f2f9bc
parent 70baaace98
7 changed files with 394 additions and 372 deletions
--- a/lib/logstorage/filter.go
+++ b/lib/logstorage/filter.go
@ -24,143 +24,6 @@ type filter interface {
 	apply(bs *blockSearch, bm *bitmap)
 }
 // filterNoop does nothing
 type filterNoop struct {
 }
 func (fn *filterNoop) String() string {
 	return ""
 }
 func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) {
 	// nothing to do
 }
 // filterOr contains filters joined by OR operator.
 //
 // It is epxressed as `f1 OR f2 ... OR fN` in LogsQL.
 type filterOr struct {
 	filters []filter
 }
 func (fo *filterOr) String() string {
 	filters := fo.filters
 	a := make([]string, len(filters))
 	for i, f := range filters {
 		s := f.String()
 		a[i] = s
 	}
 	return strings.Join(a, " or ")
 }
 func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
 	bmResult := getBitmap(bm.bitsLen)
 	bmTmp := getBitmap(bm.bitsLen)
 	for _, f := range fo.filters {
 		// Minimize the number of rows to check by the filter by checking only
 		// the rows, which may change the output bm:
 		// - bm matches them, e.g. the caller wants to get them
 		// - bmResult doesn't match them, e.g. all the previous OR filters didn't match them
 		bmTmp.copyFrom(bm)
 		bmTmp.andNot(bmResult)
 		if bmTmp.isZero() {
 			// Shortcut - there is no need in applying the remaining filters,
 			// since the result already matches all the values from the block.
 			break
 		}
 		f.apply(bs, bmTmp)
 		bmResult.or(bmTmp)
 	}
 	putBitmap(bmTmp)
 	bm.copyFrom(bmResult)
 	putBitmap(bmResult)
 }
 // filterAnd contains filters joined by AND opertor.
 //
 // It is expressed as `f1 AND f2 ... AND fN` in LogsQL.
 type filterAnd struct {
 	filters []filter
 	msgTokensOnce sync.Once
 	msgTokens     []string
 }
 func (fa *filterAnd) String() string {
 	filters := fa.filters
 	a := make([]string, len(filters))
 	for i, f := range filters {
 		s := f.String()
 		switch f.(type) {
 		case *filterOr:
 			s = "(" + s + ")"
 		}
 		a[i] = s
 	}
 	return strings.Join(a, " ")
 }
 func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
 	if tokens := fa.getMsgTokens(); len(tokens) > 0 {
 		// Verify whether fa tokens for the _msg field match bloom filter.
 		ch := bs.csh.getColumnHeader("_msg")
 		if ch == nil {
 			// Fast path - there is no _msg field in the block.
 			bm.resetBits()
 			return
 		}
 		if !matchBloomFilterAllTokens(bs, ch, tokens) {
 			// Fast path - fa tokens for the _msg field do not match bloom filter.
 			bm.resetBits()
 			return
 		}
 	}
 	// Slow path - verify every filter separately.
 	for _, f := range fa.filters {
 		f.apply(bs, bm)
 		if bm.isZero() {
 			// Shortcut - there is no need in applying the remaining filters,
 			// since the result will be zero anyway.
 			return
 		}
 	}
 }
 func (fa *filterAnd) getMsgTokens() []string {
 	fa.msgTokensOnce.Do(fa.initMsgTokens)
 	return fa.msgTokens
 }
 func (fa *filterAnd) initMsgTokens() {
 	var a []string
 	for _, f := range fa.filters {
 		switch t := f.(type) {
 		case *phraseFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		case *sequenceFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		case *exactFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		case *exactPrefixFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		case *prefixFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		}
 	}
 	fa.msgTokens = a
 }
 // notFilter negates the filter.
 //
 // It is expressed as `NOT f` or `!f` in LogsQL.
--- a/lib/logstorage/filter_and.go
+++ b/lib/logstorage/filter_and.go
@ -0,0 +1,91 @@
 package logstorage
 import (
 	"strings"
 	"sync"
 )
 // filterAnd contains filters joined by AND opertor.
 //
 // It is expressed as `f1 AND f2 ... AND fN` in LogsQL.
 type filterAnd struct {
 	filters []filter
 	msgTokensOnce sync.Once
 	msgTokens     []string
 }
 func (fa *filterAnd) String() string {
 	filters := fa.filters
 	a := make([]string, len(filters))
 	for i, f := range filters {
 		s := f.String()
 		switch f.(type) {
 		case *filterOr:
 			s = "(" + s + ")"
 		}
 		a[i] = s
 	}
 	return strings.Join(a, " ")
 }
 func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
 	if tokens := fa.getMsgTokens(); len(tokens) > 0 {
 		// Verify whether fa tokens for the _msg field match bloom filter.
 		ch := bs.csh.getColumnHeader("_msg")
 		if ch == nil {
 			// Fast path - there is no _msg field in the block.
 			bm.resetBits()
 			return
 		}
 		if !matchBloomFilterAllTokens(bs, ch, tokens) {
 			// Fast path - fa tokens for the _msg field do not match bloom filter.
 			bm.resetBits()
 			return
 		}
 	}
 	// Slow path - verify every filter separately.
 	for _, f := range fa.filters {
 		f.apply(bs, bm)
 		if bm.isZero() {
 			// Shortcut - there is no need in applying the remaining filters,
 			// since the result will be zero anyway.
 			return
 		}
 	}
 }
 func (fa *filterAnd) getMsgTokens() []string {
 	fa.msgTokensOnce.Do(fa.initMsgTokens)
 	return fa.msgTokens
 }
 func (fa *filterAnd) initMsgTokens() {
 	var a []string
 	for _, f := range fa.filters {
 		switch t := f.(type) {
 		case *phraseFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		case *sequenceFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		case *exactFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		case *exactPrefixFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		case *prefixFilter:
 			if isMsgFieldName(t.fieldName) {
 				a = append(a, t.getTokens()...)
 			}
 		}
 	}
 	fa.msgTokens = a
 }
--- a/lib/logstorage/filter_and_test.go
+++ b/lib/logstorage/filter_and_test.go
@ -0,0 +1,115 @@
 package logstorage
 import (
 	"testing"
 )
 func TestFilterAnd(t *testing.T) {
 	columns := []column{
 		{
 			name: "foo",
 			values: []string{
 				"a foo",
 				"a foobar",
 				"aa abc a",
 				"ca afdf a,foobar baz",
 				"a fddf foobarbaz",
 				"",
 				"a foobar abcdef",
 				"a kjlkjf dfff",
 				"a ТЕСТЙЦУК НГКШ ",
 				"a !!,23.(!1)",
 			},
 		},
 	}
 	// non-empty intersection
 	fa := &filterAnd{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
 	// reverse non-empty intersection
 	fa = &filterAnd{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
 	// the first filter mismatch
 	fa = &filterAnd{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "bc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", nil)
 	// the last filter mismatch
 	fa = &filterAnd{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "abc",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "foo",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", nil)
 	// empty intersection
 	fa = &filterAnd{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "foo",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", nil)
 	// reverse empty intersection
 	fa = &filterAnd{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "foo",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", nil)
 }
--- a/lib/logstorage/filter_noop.go
+++ b/lib/logstorage/filter_noop.go
@ -0,0 +1,13 @@
 package logstorage
 // filterNoop does nothing
 type filterNoop struct {
 }
 func (fn *filterNoop) String() string {
 	return ""
 }
 func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) {
 	// nothing to do
 }
--- a/lib/logstorage/filter_or.go
+++ b/lib/logstorage/filter_or.go
@ -0,0 +1,45 @@
 package logstorage
 import (
 	"strings"
 )
 // filterOr contains filters joined by OR operator.
 //
 // It is epxressed as `f1 OR f2 ... OR fN` in LogsQL.
 type filterOr struct {
 	filters []filter
 }
 func (fo *filterOr) String() string {
 	filters := fo.filters
 	a := make([]string, len(filters))
 	for i, f := range filters {
 		s := f.String()
 		a[i] = s
 	}
 	return strings.Join(a, " or ")
 }
 func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
 	bmResult := getBitmap(bm.bitsLen)
 	bmTmp := getBitmap(bm.bitsLen)
 	for _, f := range fo.filters {
 		// Minimize the number of rows to check by the filter by checking only
 		// the rows, which may change the output bm:
 		// - bm matches them, e.g. the caller wants to get them
 		// - bmResult doesn't match them, e.g. all the previous OR filters didn't match them
 		bmTmp.copyFrom(bm)
 		bmTmp.andNot(bmResult)
 		if bmTmp.isZero() {
 			// Shortcut - there is no need in applying the remaining filters,
 			// since the result already matches all the values from the block.
 			break
 		}
 		f.apply(bs, bmTmp)
 		bmResult.or(bmTmp)
 	}
 	putBitmap(bmTmp)
 	bm.copyFrom(bmResult)
 	putBitmap(bmResult)
 }
--- a/lib/logstorage/filter_or_test.go
+++ b/lib/logstorage/filter_or_test.go
@ -0,0 +1,130 @@
 package logstorage
 import (
 	"testing"
 )
 func TestFilterOr(t *testing.T) {
 	columns := []column{
 		{
 			name: "foo",
 			values: []string{
 				"a foo",
 				"a foobar",
 				"aa abc a",
 				"ca afdf a,foobar baz",
 				"a fddf foobarbaz",
 				"a",
 				"a foobar abcdef",
 				"a kjlkjf dfff",
 				"a ТЕСТЙЦУК НГКШ ",
 				"a !!,23.(!1)",
 			},
 		},
 	}
 	// non-empty union
 	fo := &filterOr{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "23",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
 	// reverse non-empty union
 	fo = &filterOr{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "23",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
 	// first empty result, second non-empty result
 	fo = &filterOr{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "xabc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "23",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
 	// first non-empty result, second empty result
 	fo = &filterOr{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "23",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "xabc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
 	// first match all
 	fo = &filterOr{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "23",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
 	// second match all
 	fo = &filterOr{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "23",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
 	// both empty results
 	fo = &filterOr{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "x23",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "xabc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", nil)
 }
--- a/lib/logstorage/filter_test.go
+++ b/lib/logstorage/filter_test.go
@ -480,241 +480,6 @@ func TestComplexFilters(t *testing.T) {
 	testFilterMatchForColumns(t, columns, f, "foo", []int{1, 3, 6})
 }
 func TestOrFilter(t *testing.T) {
 	columns := []column{
 		{
 			name: "foo",
 			values: []string{
 				"a foo",
 				"a foobar",
 				"aa abc a",
 				"ca afdf a,foobar baz",
 				"a fddf foobarbaz",
 				"a",
 				"a foobar abcdef",
 				"a kjlkjf dfff",
 				"a ТЕСТЙЦУК НГКШ ",
 				"a !!,23.(!1)",
 			},
 		},
 	}
 	// non-empty union
 	fo := &filterOr{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "23",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
 	// reverse non-empty union
 	fo = &filterOr{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "23",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{2, 6, 9})
 	// first empty result, second non-empty result
 	fo = &filterOr{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "xabc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "23",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
 	// first non-empty result, second empty result
 	fo = &filterOr{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "23",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "xabc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{9})
 	// first match all
 	fo = &filterOr{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "23",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
 	// second match all
 	fo = &filterOr{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "23",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
 	// both empty results
 	fo = &filterOr{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "x23",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "xabc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fo, "foo", nil)
 }
 func TestAndFilter(t *testing.T) {
 	columns := []column{
 		{
 			name: "foo",
 			values: []string{
 				"a foo",
 				"a foobar",
 				"aa abc a",
 				"ca afdf a,foobar baz",
 				"a fddf foobarbaz",
 				"",
 				"a foobar abcdef",
 				"a kjlkjf dfff",
 				"a ТЕСТЙЦУК НГКШ ",
 				"a !!,23.(!1)",
 			},
 		},
 	}
 	// non-empty intersection
 	fa := &filterAnd{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
 	// reverse non-empty intersection
 	fa = &filterAnd{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", []int{2, 6})
 	// the first filter mismatch
 	fa = &filterAnd{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "bc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "a",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", nil)
 	// the last filter mismatch
 	fa = &filterAnd{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "abc",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "foo",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", nil)
 	// empty intersection
 	fa = &filterAnd{
 		filters: []filter{
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "foo",
 			},
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", nil)
 	// reverse empty intersection
 	fa = &filterAnd{
 		filters: []filter{
 			&prefixFilter{
 				fieldName: "foo",
 				prefix:    "abc",
 			},
 			&phraseFilter{
 				fieldName: "foo",
 				phrase:    "foo",
 			},
 		},
 	}
 	testFilterMatchForColumns(t, columns, fa, "foo", nil)
 }
 func TestNotFilter(t *testing.T) {
 	columns := []column{
 		{