From a968561182c24239040ccd8c6cec5cf6aa746408 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 29 Apr 2024 05:51:43 +0200 Subject: [PATCH] wip --- lib/logstorage/filter.go | 189 ------- lib/logstorage/filter_and.go | 2 +- lib/logstorage/filter_exact.go | 199 +++++++ lib/logstorage/filter_exact_prefix_test.go | 2 +- lib/logstorage/filter_exact_test.go | 582 +++++++++++++++++++++ lib/logstorage/filter_test.go | 581 +------------------- lib/logstorage/parser.go | 6 +- 7 files changed, 788 insertions(+), 773 deletions(-) create mode 100644 lib/logstorage/filter_exact.go create mode 100644 lib/logstorage/filter_exact_test.go diff --git a/lib/logstorage/filter.go b/lib/logstorage/filter.go index c074b5a75..5d57324d3 100644 --- a/lib/logstorage/filter.go +++ b/lib/logstorage/filter.go @@ -72,79 +72,6 @@ func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) { } } -// exactFilter matches the exact value. -// -// Example LogsQL: `fieldName:exact("foo bar")` -type exactFilter struct { - fieldName string - value string - - tokensOnce sync.Once - tokens []string -} - -func (fe *exactFilter) String() string { - return fmt.Sprintf("%sexact(%s)", quoteFieldNameIfNeeded(fe.fieldName), quoteTokenIfNeeded(fe.value)) -} - -func (fe *exactFilter) getTokens() []string { - fe.tokensOnce.Do(fe.initTokens) - return fe.tokens -} - -func (fe *exactFilter) initTokens() { - fe.tokens = tokenizeStrings(nil, []string{fe.value}) -} - -func (fe *exactFilter) apply(bs *blockSearch, bm *bitmap) { - fieldName := fe.fieldName - value := fe.value - - v := bs.csh.getConstColumnValue(fieldName) - if v != "" { - if value != v { - bm.resetBits() - } - return - } - - // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) - if ch == nil { - // Fast path - there are no matching columns. - // It matches anything only for empty value. - if value != "" { - bm.resetBits() - } - return - } - - tokens := fe.getTokens() - - switch ch.valueType { - case valueTypeString: - matchStringByExactValue(bs, ch, bm, value, tokens) - case valueTypeDict: - matchValuesDictByExactValue(bs, ch, bm, value) - case valueTypeUint8: - matchUint8ByExactValue(bs, ch, bm, value, tokens) - case valueTypeUint16: - matchUint16ByExactValue(bs, ch, bm, value, tokens) - case valueTypeUint32: - matchUint32ByExactValue(bs, ch, bm, value, tokens) - case valueTypeUint64: - matchUint64ByExactValue(bs, ch, bm, value, tokens) - case valueTypeFloat64: - matchFloat64ByExactValue(bs, ch, bm, value, tokens) - case valueTypeIPv4: - matchIPv4ByExactValue(bs, ch, bm, value, tokens) - case valueTypeTimestampISO8601: - matchTimestampISO8601ByExactValue(bs, ch, bm, value, tokens) - default: - logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) - } -} - // inFilter matches any exact value from the values map. // // Example LogsQL: `fieldName:in("foo", "bar baz")` @@ -1154,18 +1081,6 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap bbPool.Put(bb) } -func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { - n, ok := tryParseTimestampISO8601(value) - if !ok || n < ch.minValue || n > ch.maxValue { - bm.resetBits() - return - } - bb := bbPool.Get() - bb.B = encoding.MarshalUint64(bb.B, n) - matchBinaryValue(bs, ch, bm, bb.B, tokens) - bbPool.Put(bb) -} - func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { _, ok := tryParseTimestampISO8601(phrase) if ok { @@ -1262,18 +1177,6 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str bbPool.Put(bb) } -func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { - n, ok := tryParseIPv4(value) - if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue { - bm.resetBits() - return - } - bb := bbPool.Get() - bb.B = encoding.MarshalUint32(bb.B, n) - matchBinaryValue(bs, ch, bm, bb.B, tokens) - bbPool.Put(bb) -} - func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { _, ok := tryParseIPv4(phrase) if ok { @@ -1380,19 +1283,6 @@ func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix bbPool.Put(bb) } -func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { - f, ok := tryParseFloat64(value) - if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) { - bm.resetBits() - return - } - n := math.Float64bits(f) - bb := bbPool.Get() - bb.B = encoding.MarshalUint64(bb.B, n) - matchBinaryValue(bs, ch, bm, bb.B, tokens) - bbPool.Put(bb) -} - func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { // The phrase may contain a part of the floating-point number. // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`. @@ -1510,17 +1400,6 @@ func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref bbPool.Put(bb) } -func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string) { - bb := bbPool.Get() - for i, v := range ch.valuesDict.values { - if v == value { - bb.B = append(bb.B, byte(i)) - } - } - matchEncodedValuesDict(bs, ch, bm, bb.B) - bbPool.Put(bb) -} - func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) { bb := bbPool.Get() for i, v := range ch.valuesDict.values { @@ -1611,16 +1490,6 @@ func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix s }) } -func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - visitValues(bs, ch, bm, func(v string) bool { - return v == value - }) -} - func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() @@ -1952,64 +1821,6 @@ func matchUint64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix s bbPool.Put(bb) } -func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - n, ok := tryParseUint64(phrase) - if !ok || n < ch.minValue || n > ch.maxValue { - bm.resetBits() - return - } - bb := bbPool.Get() - bb.B = append(bb.B, byte(n)) - matchBinaryValue(bs, ch, bm, bb.B, tokens) - bbPool.Put(bb) -} - -func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - n, ok := tryParseUint64(phrase) - if !ok || n < ch.minValue || n > ch.maxValue { - bm.resetBits() - return - } - bb := bbPool.Get() - bb.B = encoding.MarshalUint16(bb.B, uint16(n)) - matchBinaryValue(bs, ch, bm, bb.B, tokens) - bbPool.Put(bb) -} - -func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - n, ok := tryParseUint64(phrase) - if !ok || n < ch.minValue || n > ch.maxValue { - bm.resetBits() - return - } - bb := bbPool.Get() - bb.B = encoding.MarshalUint32(bb.B, uint32(n)) - matchBinaryValue(bs, ch, bm, bb.B, tokens) - bbPool.Put(bb) -} - -func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - n, ok := tryParseUint64(phrase) - if !ok || n < ch.minValue || n > ch.maxValue { - bm.resetBits() - return - } - bb := bbPool.Get() - bb.B = encoding.MarshalUint64(bb.B, n) - matchBinaryValue(bs, ch, bm, bb.B, tokens) - bbPool.Put(bb) -} - -func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []string) { - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - visitValues(bs, ch, bm, func(v string) bool { - return v == string(binValue) - }) -} - func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, tokenSets [][]string) { if !matchBloomFilterAnyTokenSet(bs, ch, tokenSets) { bm.resetBits() diff --git a/lib/logstorage/filter_and.go b/lib/logstorage/filter_and.go index 79b172319..582b7280b 100644 --- a/lib/logstorage/filter_and.go +++ b/lib/logstorage/filter_and.go @@ -73,7 +73,7 @@ func (fa *filterAnd) initMsgTokens() { if isMsgFieldName(t.fieldName) { a = append(a, t.getTokens()...) } - case *exactFilter: + case *filterExact: if isMsgFieldName(t.fieldName) { a = append(a, t.getTokens()...) } diff --git a/lib/logstorage/filter_exact.go b/lib/logstorage/filter_exact.go new file mode 100644 index 000000000..c868787fe --- /dev/null +++ b/lib/logstorage/filter_exact.go @@ -0,0 +1,199 @@ +package logstorage + +import ( + "fmt" + "math" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// filterExact matches the exact value. +// +// Example LogsQL: `fieldName:exact("foo bar")` +type filterExact struct { + fieldName string + value string + + tokensOnce sync.Once + tokens []string +} + +func (fe *filterExact) String() string { + return fmt.Sprintf("%sexact(%s)", quoteFieldNameIfNeeded(fe.fieldName), quoteTokenIfNeeded(fe.value)) +} + +func (fe *filterExact) getTokens() []string { + fe.tokensOnce.Do(fe.initTokens) + return fe.tokens +} + +func (fe *filterExact) initTokens() { + fe.tokens = tokenizeStrings(nil, []string{fe.value}) +} + +func (fe *filterExact) apply(bs *blockSearch, bm *bitmap) { + fieldName := fe.fieldName + value := fe.value + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if value != v { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + // It matches anything only for empty value. + if value != "" { + bm.resetBits() + } + return + } + + tokens := fe.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByExactValue(bs, ch, bm, value, tokens) + case valueTypeDict: + matchValuesDictByExactValue(bs, ch, bm, value) + case valueTypeUint8: + matchUint8ByExactValue(bs, ch, bm, value, tokens) + case valueTypeUint16: + matchUint16ByExactValue(bs, ch, bm, value, tokens) + case valueTypeUint32: + matchUint32ByExactValue(bs, ch, bm, value, tokens) + case valueTypeUint64: + matchUint64ByExactValue(bs, ch, bm, value, tokens) + case valueTypeFloat64: + matchFloat64ByExactValue(bs, ch, bm, value, tokens) + case valueTypeIPv4: + matchIPv4ByExactValue(bs, ch, bm, value, tokens) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByExactValue(bs, ch, bm, value, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { + n, ok := tryParseTimestampISO8601(value) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint64(bb.B, n) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { + n, ok := tryParseIPv4(value) + if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint32(bb.B, n) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { + f, ok := tryParseFloat64(value) + if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) { + bm.resetBits() + return + } + n := math.Float64bits(f) + bb := bbPool.Get() + bb.B = encoding.MarshalUint64(bb.B, n) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if v == value { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return v == value + }) +} + +func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { + n, ok := tryParseUint64(phrase) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = append(bb.B, byte(n)) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { + n, ok := tryParseUint64(phrase) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint16(bb.B, uint16(n)) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { + n, ok := tryParseUint64(phrase) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint32(bb.B, uint32(n)) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { + n, ok := tryParseUint64(phrase) + if !ok || n < ch.minValue || n > ch.maxValue { + bm.resetBits() + return + } + bb := bbPool.Get() + bb.B = encoding.MarshalUint64(bb.B, n) + matchBinaryValue(bs, ch, bm, bb.B, tokens) + bbPool.Put(bb) +} + +func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return v == string(binValue) + }) +} diff --git a/lib/logstorage/filter_exact_prefix_test.go b/lib/logstorage/filter_exact_prefix_test.go index 215c9003f..79579c2eb 100644 --- a/lib/logstorage/filter_exact_prefix_test.go +++ b/lib/logstorage/filter_exact_prefix_test.go @@ -4,7 +4,7 @@ import ( "testing" ) -func TestExactPrefixFilter(t *testing.T) { +func TestFilterExactPrefix(t *testing.T) { t.Run("single-row", func(t *testing.T) { columns := []column{ { diff --git a/lib/logstorage/filter_exact_test.go b/lib/logstorage/filter_exact_test.go new file mode 100644 index 000000000..f714c1e51 --- /dev/null +++ b/lib/logstorage/filter_exact_test.go @@ -0,0 +1,582 @@ +package logstorage + +import ( + "testing" +) + +func TestFilterExact(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "abc def", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "abc", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "non-existing column", + value: "abc", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "abc def", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "foobar", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "non-existing column", + value: "x", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "afoobarbaz", + "foobar", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "foobar", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 6}) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "baz", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "non-existing column", + value: "foobar", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a foobar baz", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "aa abc a", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{2}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "aa a", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "fooaaazz a", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "12", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 5}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "33", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "256", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "12", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 5}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "33", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "65536", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "12", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 5}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "33", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "12345678901", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "12", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 5}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "33", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "1234", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0}) + + fe = &filterExact{ + fieldName: "foo", + value: "1234.5678901", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{4}) + + fe = &filterExact{ + fieldName: "foo", + value: "-65536", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{3}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "65536", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "123", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.55.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "foo", + value: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{2, 4, 5, 7}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + // mismatch + fe = &filterExact{ + fieldName: "foo", + value: "bar", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "127.0", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + + fe = &filterExact{ + fieldName: "foo", + value: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, fe, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + fe := &filterExact{ + fieldName: "_msg", + value: "2006-01-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, fe, "_msg", []int{4}) + + fe = &filterExact{ + fieldName: "non-existing-column", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mimatch + fe = &filterExact{ + fieldName: "_msg", + value: "bar", + } + testFilterMatchForColumns(t, columns, fe, "_msg", nil) + + fe = &filterExact{ + fieldName: "_msg", + value: "", + } + testFilterMatchForColumns(t, columns, fe, "_msg", nil) + + fe = &filterExact{ + fieldName: "_msg", + value: "2006-03-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, fe, "_msg", nil) + }) +} diff --git a/lib/logstorage/filter_test.go b/lib/logstorage/filter_test.go index d1f3e2272..b2883f538 100644 --- a/lib/logstorage/filter_test.go +++ b/lib/logstorage/filter_test.go @@ -477,597 +477,20 @@ func TestStreamFilter(t *testing.T) { } // Match - f := &exactFilter{ + f := &filterExact{ fieldName: "job", value: "foobar", } testFilterMatchForColumns(t, columns, f, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) // Mismatch - f = &exactFilter{ + f = &filterExact{ fieldName: "job", value: "abc", } testFilterMatchForColumns(t, columns, f, "foo", nil) } -func TestExactFilter(t *testing.T) { - t.Run("single-row", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "abc def", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "abc def", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "abc", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "non-existing column", - value: "abc", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("const-column", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "abc def", - "abc def", - "abc def", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "abc def", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "foobar", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "non-existing column", - value: "x", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("dict", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "", - "foobar", - "abc", - "afdf foobar baz", - "fddf foobarbaz", - "afoobarbaz", - "foobar", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "foobar", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 6}) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "baz", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "non-existing column", - value: "foobar", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("strings", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "a foo", - "a foobar", - "aa abc a", - "ca afdf a,foobar baz", - "a fddf foobarbaz", - "a afoobarbaz", - "a foobar baz", - "a kjlkjf dfff", - "a ТЕСТЙЦУК НГКШ ", - "a !!,23.(!1)", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "aa abc a", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{2}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "aa a", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "fooaaazz a", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("uint8", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "12", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 5}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "bar", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "33", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("uint16", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12", - "256", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "12", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 5}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "bar", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "33", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("uint32", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12", - "65536", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "12", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 5}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "bar", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "33", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("uint64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12", - "12345678901", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "12", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{1, 5}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "bar", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "33", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("float64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "-65536", - "1234.5678901", - "1", - "2", - "3", - "4", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "1234", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0}) - - fe = &exactFilter{ - fieldName: "foo", - value: "1234.5678901", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{4}) - - fe = &exactFilter{ - fieldName: "foo", - value: "-65536", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{3}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "bar", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "65536", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "123", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "12345678901234567890", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("ipv4", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1.2.3.4", - "0.0.0.0", - "127.0.0.1", - "254.255.255.255", - "127.0.0.1", - "127.0.0.1", - "127.0.4.2", - "127.0.0.1", - "12.0.127.6", - "55.55.55.55", - "66.66.66.66", - "7.7.7.7", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "foo", - value: "127.0.0.1", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{2, 4, 5, 7}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) - - // mismatch - fe = &exactFilter{ - fieldName: "foo", - value: "bar", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "127.0", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - - fe = &exactFilter{ - fieldName: "foo", - value: "255.255.255.255", - } - testFilterMatchForColumns(t, columns, fe, "foo", nil) - }) - - t.Run("timestamp-iso8601", func(t *testing.T) { - columns := []column{ - { - name: "_msg", - values: []string{ - "2006-01-02T15:04:05.001Z", - "2006-01-02T15:04:05.002Z", - "2006-01-02T15:04:05.003Z", - "2006-01-02T15:04:05.004Z", - "2006-01-02T15:04:05.005Z", - "2006-01-02T15:04:05.006Z", - "2006-01-02T15:04:05.007Z", - "2006-01-02T15:04:05.008Z", - "2006-01-02T15:04:05.009Z", - }, - }, - } - - // match - fe := &exactFilter{ - fieldName: "_msg", - value: "2006-01-02T15:04:05.005Z", - } - testFilterMatchForColumns(t, columns, fe, "_msg", []int{4}) - - fe = &exactFilter{ - fieldName: "non-existing-column", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - // mimatch - fe = &exactFilter{ - fieldName: "_msg", - value: "bar", - } - testFilterMatchForColumns(t, columns, fe, "_msg", nil) - - fe = &exactFilter{ - fieldName: "_msg", - value: "", - } - testFilterMatchForColumns(t, columns, fe, "_msg", nil) - - fe = &exactFilter{ - fieldName: "_msg", - value: "2006-03-02T15:04:05.005Z", - } - testFilterMatchForColumns(t, columns, fe, "_msg", nil) - }) -} - func TestInFilter(t *testing.T) { t.Run("single-row", func(t *testing.T) { columns := []column{ diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index df9c633ea..51d07bf46 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -322,7 +322,7 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) { case lex.isKeyword("not", "!"): return parseFilterNot(lex, fieldName) case lex.isKeyword("exact"): - return parseExactFilter(lex, fieldName) + return parseFilterExact(lex, fieldName) case lex.isKeyword("i"): return parseAnyCaseFilter(lex, fieldName) case lex.isKeyword("in"): @@ -632,7 +632,7 @@ func parseFilterSequence(lex *lexer, fieldName string) (filter, error) { }) } -func parseExactFilter(lex *lexer, fieldName string) (filter, error) { +func parseFilterExact(lex *lexer, fieldName string) (filter, error) { return parseFuncArgMaybePrefix(lex, "exact", fieldName, func(phrase string, isPrefixFilter bool) (filter, error) { if isPrefixFilter { f := &filterExactPrefix{ @@ -641,7 +641,7 @@ func parseExactFilter(lex *lexer, fieldName string) (filter, error) { } return f, nil } - f := &exactFilter{ + f := &filterExact{ fieldName: fieldName, value: phrase, }