From bb89151ae88fd174c4db4a102fd5a1073bc01c31 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 29 Apr 2024 07:21:09 +0200 Subject: [PATCH] wip --- lib/logstorage/filter.go | 295 --------- lib/logstorage/filter_prefix.go | 305 +++++++++ lib/logstorage/filter_prefix_test.go | 929 +++++++++++++++++++++++++++ lib/logstorage/filter_test.go | 924 -------------------------- 4 files changed, 1234 insertions(+), 1219 deletions(-) create mode 100644 lib/logstorage/filter_prefix.go create mode 100644 lib/logstorage/filter_prefix_test.go diff --git a/lib/logstorage/filter.go b/lib/logstorage/filter.go index 333cb8ead..254015eaf 100644 --- a/lib/logstorage/filter.go +++ b/lib/logstorage/filter.go @@ -71,82 +71,6 @@ func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) { } } -// filterPrefix matches the given prefix. -// -// Example LogsQL: `fieldName:prefix*` or `fieldName:"some prefix"*` -// -// A special case `fieldName:*` matches non-empty value for the given `fieldName` field -type filterPrefix struct { - fieldName string - prefix string - - tokensOnce sync.Once - tokens []string -} - -func (fp *filterPrefix) String() string { - if fp.prefix == "" { - return quoteFieldNameIfNeeded(fp.fieldName) + "*" - } - return fmt.Sprintf("%s%s*", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix)) -} - -func (fp *filterPrefix) getTokens() []string { - fp.tokensOnce.Do(fp.initTokens) - return fp.tokens -} - -func (fp *filterPrefix) initTokens() { - fp.tokens = getTokensSkipLast(fp.prefix) -} - -func (fp *filterPrefix) apply(bs *blockSearch, bm *bitmap) { - fieldName := fp.fieldName - prefix := fp.prefix - - // Verify whether fp matches const column - v := bs.csh.getConstColumnValue(fieldName) - if v != "" { - if !matchPrefix(v, prefix) { - bm.resetBits() - } - return - } - - // Verify whether fp matches other columns - ch := bs.csh.getColumnHeader(fieldName) - if ch == nil { - // Fast path - there are no matching columns. - bm.resetBits() - return - } - - tokens := fp.getTokens() - - switch ch.valueType { - case valueTypeString: - matchStringByPrefix(bs, ch, bm, prefix, tokens) - case valueTypeDict: - matchValuesDictByPrefix(bs, ch, bm, prefix) - case valueTypeUint8: - matchUint8ByPrefix(bs, ch, bm, prefix) - case valueTypeUint16: - matchUint16ByPrefix(bs, ch, bm, prefix) - case valueTypeUint32: - matchUint32ByPrefix(bs, ch, bm, prefix) - case valueTypeUint64: - matchUint64ByPrefix(bs, ch, bm, prefix) - case valueTypeFloat64: - matchFloat64ByPrefix(bs, ch, bm, prefix, tokens) - case valueTypeIPv4: - matchIPv4ByPrefix(bs, ch, bm, prefix, tokens) - case valueTypeTimestampISO8601: - matchTimestampISO8601ByPrefix(bs, ch, bm, prefix, tokens) - default: - logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) - } -} - // anyCasePhraseFilter filters field entries by case-insensitive phrase match. // // An example LogsQL query: `fieldName:i(word)` or `fieldName:i("word1 ... wordN")` @@ -315,27 +239,6 @@ func (fp *phraseFilter) apply(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { - if prefix == "" { - // Fast path - all the timestamp values match an empty prefix aka `*` - return - } - // There is no sense in trying to parse prefix, since it may contain incomplete timestamp. - // We cannot compar binary representation of timestamp and need converting - // the timestamp to string before searching for the prefix there. - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toTimestampISO8601StringExt(bs, bb, v) - return matchPrefix(s, prefix) - }) - bbPool.Put(bb) -} - func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { _, ok := tryParseTimestampISO8601(phrase) if ok { @@ -358,27 +261,6 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap bbPool.Put(bb) } -func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { - if prefix == "" { - // Fast path - all the ipv4 values match an empty prefix aka `*` - return - } - // There is no sense in trying to parse prefix, since it may contain incomplete ip. - // We cannot compare binary representation of ip address and need converting - // the ip to string before searching for the prefix there. - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toIPv4StringExt(bs, bb, v) - return matchPrefix(s, prefix) - }) - bbPool.Put(bb) -} - func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { _, ok := tryParseIPv4(phrase) if ok { @@ -403,34 +285,6 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str bbPool.Put(bb) } -func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { - if prefix == "" { - // Fast path - all the float64 values match an empty prefix aka `*` - return - } - // The prefix may contain a part of the floating-point number. - // For example, `foo:12*` must match `12`, `123.456` and `-0.123`. - // This means we cannot search in binary representation of floating-point numbers. - // Instead, we need searching for the whole prefix in string representation - // of floating-point numbers :( - _, ok := tryParseFloat64(prefix) - if !ok && prefix != "." && prefix != "+" && prefix != "-" && !strings.HasPrefix(prefix, "e") && !strings.HasPrefix(prefix, "E") { - bm.resetBits() - return - } - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toFloat64StringExt(bs, bb, v) - return matchPrefix(s, prefix) - }) - bbPool.Put(bb) -} - func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { // The phrase may contain a part of the floating-point number. // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`. @@ -471,17 +325,6 @@ func matchValuesDictByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *bitma bbPool.Put(bb) } -func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { - bb := bbPool.Get() - for i, v := range ch.valuesDict.values { - if matchPrefix(v, prefix) { - bb.B = append(bb.B, byte(i)) - } - } - matchEncodedValuesDict(bs, ch, bm, bb.B) - bbPool.Put(bb) -} - func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) { bb := bbPool.Get() for i, v := range ch.valuesDict.values { @@ -526,16 +369,6 @@ func matchStringByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, p }) } -func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - visitValues(bs, ch, bm, func(v string) bool { - return matchPrefix(v, prefix) - }) -} - func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() @@ -560,98 +393,6 @@ func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool { return minLen <= uint64(len(s)) } -func matchUint8ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { - if prefix == "" { - // Fast path - all the uint8 values match an empty prefix aka `*` - return - } - // The prefix may contain a part of the number. - // For example, `foo:12*` must match `12` and `123`. - // This means we cannot search in binary representation of numbers. - // Instead, we need searching for the whole prefix in string representation of numbers :( - n, ok := tryParseUint64(prefix) - if !ok || n > ch.maxValue { - bm.resetBits() - return - } - // There is no need in matching against bloom filters, since tokens is empty. - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint8String(bs, bb, v) - return matchPrefix(s, prefix) - }) - bbPool.Put(bb) -} - -func matchUint16ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { - if prefix == "" { - // Fast path - all the uint16 values match an empty prefix aka `*` - return - } - // The prefix may contain a part of the number. - // For example, `foo:12*` must match `12` and `123`. - // This means we cannot search in binary representation of numbers. - // Instead, we need searching for the whole prefix in string representation of numbers :( - n, ok := tryParseUint64(prefix) - if !ok || n > ch.maxValue { - bm.resetBits() - return - } - // There is no need in matching against bloom filters, since tokens is empty. - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint16String(bs, bb, v) - return matchPrefix(s, prefix) - }) - bbPool.Put(bb) -} - -func matchUint32ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { - if prefix == "" { - // Fast path - all the uint32 values match an empty prefix aka `*` - return - } - // The prefix may contain a part of the number. - // For example, `foo:12*` must match `12` and `123`. - // This means we cannot search in binary representation of numbers. - // Instead, we need searching for the whole prefix in string representation of numbers :( - n, ok := tryParseUint64(prefix) - if !ok || n > ch.maxValue { - bm.resetBits() - return - } - // There is no need in matching against bloom filters, since tokens is empty. - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint32String(bs, bb, v) - return matchPrefix(s, prefix) - }) - bbPool.Put(bb) -} - -func matchUint64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { - if prefix == "" { - // Fast path - all the uint64 values match an empty prefix aka `*` - return - } - // The prefix may contain a part of the number. - // For example, `foo:12*` must match `12` and `123`. - // This means we cannot search in binary representation of numbers. - // Instead, we need searching for the whole prefix in string representation of numbers :( - n, ok := tryParseUint64(prefix) - if !ok || n > ch.maxValue { - bm.resetBits() - return - } - // There is no need in matching against bloom filters, since tokens is empty. - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint64String(bs, bb, v) - return matchPrefix(s, prefix) - }) - bbPool.Put(bb) -} - func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool { if len(tokens) == 0 { return true @@ -681,42 +422,6 @@ func isASCIILowercase(s string) bool { return true } -func matchPrefix(s, prefix string) bool { - if len(prefix) == 0 { - // Special case - empty prefix matches any string. - return len(s) > 0 - } - if len(prefix) > len(s) { - return false - } - - r := rune(prefix[0]) - if r >= utf8.RuneSelf { - r, _ = utf8.DecodeRuneInString(prefix) - } - startsWithToken := isTokenRune(r) - offset := 0 - for { - n := strings.Index(s[offset:], prefix) - if n < 0 { - return false - } - offset += n - // Make sure that the found phrase contains non-token chars at the beginning - if startsWithToken && offset > 0 { - r := rune(s[offset-1]) - if r >= utf8.RuneSelf { - r, _ = utf8.DecodeLastRuneInString(s[:offset]) - } - if r == utf8.RuneError || isTokenRune(r) { - offset++ - continue - } - } - return true - } -} - func matchAnyCasePhrase(s, phraseLowercase string) bool { if len(phraseLowercase) == 0 { // Special case - empty phrase matches only empty string. diff --git a/lib/logstorage/filter_prefix.go b/lib/logstorage/filter_prefix.go new file mode 100644 index 000000000..50a819501 --- /dev/null +++ b/lib/logstorage/filter_prefix.go @@ -0,0 +1,305 @@ +package logstorage + +import ( + "fmt" + "strings" + "sync" + "unicode/utf8" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// filterPrefix matches the given prefix. +// +// Example LogsQL: `fieldName:prefix*` or `fieldName:"some prefix"*` +// +// A special case `fieldName:*` matches non-empty value for the given `fieldName` field +type filterPrefix struct { + fieldName string + prefix string + + tokensOnce sync.Once + tokens []string +} + +func (fp *filterPrefix) String() string { + if fp.prefix == "" { + return quoteFieldNameIfNeeded(fp.fieldName) + "*" + } + return fmt.Sprintf("%s%s*", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix)) +} + +func (fp *filterPrefix) getTokens() []string { + fp.tokensOnce.Do(fp.initTokens) + return fp.tokens +} + +func (fp *filterPrefix) initTokens() { + fp.tokens = getTokensSkipLast(fp.prefix) +} + +func (fp *filterPrefix) apply(bs *blockSearch, bm *bitmap) { + fieldName := fp.fieldName + prefix := fp.prefix + + // Verify whether fp matches const column + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchPrefix(v, prefix) { + bm.resetBits() + } + return + } + + // Verify whether fp matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + bm.resetBits() + return + } + + tokens := fp.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByPrefix(bs, ch, bm, prefix, tokens) + case valueTypeDict: + matchValuesDictByPrefix(bs, ch, bm, prefix) + case valueTypeUint8: + matchUint8ByPrefix(bs, ch, bm, prefix) + case valueTypeUint16: + matchUint16ByPrefix(bs, ch, bm, prefix) + case valueTypeUint32: + matchUint32ByPrefix(bs, ch, bm, prefix) + case valueTypeUint64: + matchUint64ByPrefix(bs, ch, bm, prefix) + case valueTypeFloat64: + matchFloat64ByPrefix(bs, ch, bm, prefix, tokens) + case valueTypeIPv4: + matchIPv4ByPrefix(bs, ch, bm, prefix, tokens) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByPrefix(bs, ch, bm, prefix, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { + if prefix == "" { + // Fast path - all the timestamp values match an empty prefix aka `*` + return + } + // There is no sense in trying to parse prefix, since it may contain incomplete timestamp. + // We cannot compar binary representation of timestamp and need converting + // the timestamp to string before searching for the prefix there. + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { + if prefix == "" { + // Fast path - all the ipv4 values match an empty prefix aka `*` + return + } + // There is no sense in trying to parse prefix, since it may contain incomplete ip. + // We cannot compare binary representation of ip address and need converting + // the ip to string before searching for the prefix there. + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { + if prefix == "" { + // Fast path - all the float64 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the floating-point number. + // For example, `foo:12*` must match `12`, `123.456` and `-0.123`. + // This means we cannot search in binary representation of floating-point numbers. + // Instead, we need searching for the whole prefix in string representation + // of floating-point numbers :( + _, ok := tryParseFloat64(prefix) + if !ok && prefix != "." && prefix != "+" && prefix != "-" && !strings.HasPrefix(prefix, "e") && !strings.HasPrefix(prefix, "E") { + bm.resetBits() + return + } + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchPrefix(v, prefix) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return matchPrefix(v, prefix) + }) +} + +func matchUint8ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { + if prefix == "" { + // Fast path - all the uint8 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the number. + // For example, `foo:12*` must match `12` and `123`. + // This means we cannot search in binary representation of numbers. + // Instead, we need searching for the whole prefix in string representation of numbers :( + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return + } + // There is no need in matching against bloom filters, since tokens is empty. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint8String(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint16ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { + if prefix == "" { + // Fast path - all the uint16 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the number. + // For example, `foo:12*` must match `12` and `123`. + // This means we cannot search in binary representation of numbers. + // Instead, we need searching for the whole prefix in string representation of numbers :( + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return + } + // There is no need in matching against bloom filters, since tokens is empty. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint16String(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint32ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { + if prefix == "" { + // Fast path - all the uint32 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the number. + // For example, `foo:12*` must match `12` and `123`. + // This means we cannot search in binary representation of numbers. + // Instead, we need searching for the whole prefix in string representation of numbers :( + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return + } + // There is no need in matching against bloom filters, since tokens is empty. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint32String(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchUint64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { + if prefix == "" { + // Fast path - all the uint64 values match an empty prefix aka `*` + return + } + // The prefix may contain a part of the number. + // For example, `foo:12*` must match `12` and `123`. + // This means we cannot search in binary representation of numbers. + // Instead, we need searching for the whole prefix in string representation of numbers :( + n, ok := tryParseUint64(prefix) + if !ok || n > ch.maxValue { + bm.resetBits() + return + } + // There is no need in matching against bloom filters, since tokens is empty. + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint64String(bs, bb, v) + return matchPrefix(s, prefix) + }) + bbPool.Put(bb) +} + +func matchPrefix(s, prefix string) bool { + if len(prefix) == 0 { + // Special case - empty prefix matches any string. + return len(s) > 0 + } + if len(prefix) > len(s) { + return false + } + + r := rune(prefix[0]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeRuneInString(prefix) + } + startsWithToken := isTokenRune(r) + offset := 0 + for { + n := strings.Index(s[offset:], prefix) + if n < 0 { + return false + } + offset += n + // Make sure that the found phrase contains non-token chars at the beginning + if startsWithToken && offset > 0 { + r := rune(s[offset-1]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeLastRuneInString(s[:offset]) + } + if r == utf8.RuneError || isTokenRune(r) { + offset++ + continue + } + } + return true + } +} diff --git a/lib/logstorage/filter_prefix_test.go b/lib/logstorage/filter_prefix_test.go new file mode 100644 index 000000000..e63e80c45 --- /dev/null +++ b/lib/logstorage/filter_prefix_test.go @@ -0,0 +1,929 @@ +package logstorage + +import ( + "testing" +) + +func TestMatchPrefix(t *testing.T) { + f := func(s, prefix string, resultExpected bool) { + t.Helper() + result := matchPrefix(s, prefix) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + f("", "", false) + f("foo", "", true) + f("", "foo", false) + f("foo", "foo", true) + f("foo bar", "foo", true) + f("foo bar", "bar", true) + f("a foo bar", "foo", true) + f("a foo bar", "fo", true) + f("a foo bar", "oo", false) + f("foobar", "foo", true) + f("foobar", "bar", false) + f("foobar", "oob", false) + f("afoobar foo", "foo", true) + f("раз два (три!)", "три", true) + f("", "foo bar", false) + f("foo bar", "foo bar", true) + f("(foo bar)", "foo bar", true) + f("afoo bar", "foo bar", false) + f("afoo bar", "afoo ba", true) + f("foo bar! baz", "foo bar!", true) + f("a.foo bar! baz", ".foo bar! ", true) + f("foo bar! baz", "foo bar! b", true) + f("255.255.255.255", "5", false) + f("255.255.255.255", "55", false) + f("255.255.255.255", "255", true) + f("255.255.255.255", "5.255", false) + f("255.255.255.255", "255.25", true) + f("255.255.255.255", "255.255", true) +} + +func TestFilterPrefix(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + { + name: "other column", + values: []string{ + "asdfdsf", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "ab", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "abc def", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "def", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) + + fp = &filterPrefix{ + fieldName: "other column", + prefix: "asdfdsf", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bc", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "other column", + prefix: "sd", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing column", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "other-column", + values: []string{ + "x", + "x", + "x", + }, + }, + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + { + name: "_msg", + values: []string{ + "1 2 3", + "1 2 3", + "1 2 3", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "abc", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "ab", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "abc de", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: " de", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "abc def", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) + + fp = &filterPrefix{ + fieldName: "other-column", + prefix: "x", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) + + fp = &filterPrefix{ + fieldName: "_msg", + prefix: " 2 ", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "abc def ", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "x", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "other-column", + prefix: "foo", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing column", + prefix: "x", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "foo", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "afoobarbaz", + "foobar", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "foobar", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{1, 3, 4, 6}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{1, 2, 3, 4, 5, 6}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "ba", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{3}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing column", + prefix: "foobar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a foobar", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "a", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "НГК", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{8}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "aa a", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{2}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "!,", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{9}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "aa ax", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "qwe rty abc", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "@", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 5}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{3, 4}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "1234", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65535", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{1}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "123456", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "65536", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{3}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "12345678901", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "12345678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "1234", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "12345678901", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{4}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "123", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "1234.5678901", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{4}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "56789", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{4}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "-6553", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{3}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "65536", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{3}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "7344.8943", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "-1234", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "+1234", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "23", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "678", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "33", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "foo", + prefix: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{2, 4, 5, 7}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "12", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{2, 4, 5, 6, 7, 8, 9}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "127.0.0", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{2, 4, 5, 7}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "2.3.", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "0", + } + testFilterMatchForColumns(t, columns, fp, "foo", []int{1, 2, 4, 5, 6, 7, 8}) + + // mismatch + fp = &filterPrefix{ + fieldName: "foo", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "8", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "127.1", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "27.0", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "foo", + prefix: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + + fp = &filterPrefix{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + fp := &filterPrefix{ + fieldName: "_msg", + prefix: "2006-01-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, fp, "_msg", []int{4}) + + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "2006-01-0", + } + testFilterMatchForColumns(t, columns, fp, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "002", + } + testFilterMatchForColumns(t, columns, fp, "_msg", []int{1}) + + // mimatch + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "bar", + } + testFilterMatchForColumns(t, columns, fp, "_msg", nil) + + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "2006-03-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, fp, "_msg", nil) + + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "06", + } + testFilterMatchForColumns(t, columns, fp, "_msg", nil) + + // This filter shouldn't match row=4, since it has different string representation of the timestamp + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "2006-01-02T16:04:05.005+01:00", + } + testFilterMatchForColumns(t, columns, fp, "_msg", nil) + + // This filter shouldn't match row=4, since it contains too many digits for millisecond part + fp = &filterPrefix{ + fieldName: "_msg", + prefix: "2006-01-02T15:04:05.00500Z", + } + testFilterMatchForColumns(t, columns, fp, "_msg", nil) + + fp = &filterPrefix{ + fieldName: "non-existing-column", + prefix: "", + } + testFilterMatchForColumns(t, columns, fp, "_msg", nil) + }) +} diff --git a/lib/logstorage/filter_test.go b/lib/logstorage/filter_test.go index 0c7059c31..ab518ebf4 100644 --- a/lib/logstorage/filter_test.go +++ b/lib/logstorage/filter_test.go @@ -81,45 +81,6 @@ func TestMatchPhrase(t *testing.T) { f("255.255.255.255", "255.255", true) } -func TestMatchPrefix(t *testing.T) { - f := func(s, prefix string, resultExpected bool) { - t.Helper() - result := matchPrefix(s, prefix) - if result != resultExpected { - t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) - } - } - - f("", "", false) - f("foo", "", true) - f("", "foo", false) - f("foo", "foo", true) - f("foo bar", "foo", true) - f("foo bar", "bar", true) - f("a foo bar", "foo", true) - f("a foo bar", "fo", true) - f("a foo bar", "oo", false) - f("foobar", "foo", true) - f("foobar", "bar", false) - f("foobar", "oob", false) - f("afoobar foo", "foo", true) - f("раз два (три!)", "три", true) - f("", "foo bar", false) - f("foo bar", "foo bar", true) - f("(foo bar)", "foo bar", true) - f("afoo bar", "foo bar", false) - f("afoo bar", "afoo ba", true) - f("foo bar! baz", "foo bar!", true) - f("a.foo bar! baz", ".foo bar! ", true) - f("foo bar! baz", "foo bar! b", true) - f("255.255.255.255", "5", false) - f("255.255.255.255", "55", false) - f("255.255.255.255", "255", true) - f("255.255.255.255", "5.255", false) - f("255.255.255.255", "255.25", true) - f("255.255.255.255", "255.255", true) -} - func TestComplexFilters(t *testing.T) { columns := []column{ { @@ -298,891 +259,6 @@ func TestStreamFilter(t *testing.T) { testFilterMatchForColumns(t, columns, f, "foo", nil) } -func TestFilterPrefix(t *testing.T) { - t.Run("single-row", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "abc def", - }, - }, - { - name: "other column", - values: []string{ - "asdfdsf", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "abc", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "ab", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "abc def", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "def", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) - - fp = &filterPrefix{ - fieldName: "other column", - prefix: "asdfdsf", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bc", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "other column", - prefix: "sd", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing column", - prefix: "abc", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("const-column", func(t *testing.T) { - columns := []column{ - { - name: "other-column", - values: []string{ - "x", - "x", - "x", - }, - }, - { - name: "foo", - values: []string{ - "abc def", - "abc def", - "abc def", - }, - }, - { - name: "_msg", - values: []string{ - "1 2 3", - "1 2 3", - "1 2 3", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "abc", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "ab", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "abc de", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: " de", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "abc def", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) - - fp = &filterPrefix{ - fieldName: "other-column", - prefix: "x", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) - - fp = &filterPrefix{ - fieldName: "_msg", - prefix: " 2 ", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "abc def ", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "x", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "other-column", - prefix: "foo", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing column", - prefix: "x", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "foo", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("dict", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "", - "foobar", - "abc", - "afdf foobar baz", - "fddf foobarbaz", - "afoobarbaz", - "foobar", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "foobar", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{1, 3, 4, 6}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{1, 2, 3, 4, 5, 6}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "ba", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{3}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing column", - prefix: "foobar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("strings", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "a foo", - "a foobar", - "aa abc a", - "ca afdf a,foobar baz", - "a fddf foobarbaz", - "a afoobarbaz", - "a foobar", - "a kjlkjf dfff", - "a ТЕСТЙЦУК НГКШ ", - "a !!,23.(!1)", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "a", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "НГК", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{8}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "aa a", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{2}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "!,", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{9}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "aa ax", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "qwe rty abc", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing-column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "@", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("uint8", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "12", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 5}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "0", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{3, 4}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "33", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "1234", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing-column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("uint16", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "65535", - "1234", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "123", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "0", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{1}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "33", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "123456", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing-column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("uint32", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "65536", - "1234", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "123", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "65536", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{3}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "33", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "12345678901", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing-column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("uint64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "65536", - "12345678901", - "1", - "2", - "3", - "4", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "1234", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "12345678901", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{4}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "33", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "12345678901234567890", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing-column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("float64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "-65536", - "1234.5678901", - "1", - "2", - "3", - "4", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "123", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 4}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "1234.5678901", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{4}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "56789", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{4}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "-6553", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{3}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "65536", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{3}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "7344.8943", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "-1234", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "+1234", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "23", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "678", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "33", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "12345678901234567890", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing-column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("ipv4", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1.2.3.4", - "0.0.0.0", - "127.0.0.1", - "254.255.255.255", - "127.0.0.1", - "127.0.0.1", - "127.0.4.2", - "127.0.0.1", - "12.0.127.6", - "55.55.12.55", - "66.66.66.66", - "7.7.7.7", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "foo", - prefix: "127.0.0.1", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{2, 4, 5, 7}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "12", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{2, 4, 5, 6, 7, 8, 9}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "127.0.0", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{2, 4, 5, 7}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "2.3.", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{0}) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "0", - } - testFilterMatchForColumns(t, columns, fp, "foo", []int{1, 2, 4, 5, 6, 7, 8}) - - // mismatch - fp = &filterPrefix{ - fieldName: "foo", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "8", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "127.1", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "27.0", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "foo", - prefix: "255.255.255.255", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - - fp = &filterPrefix{ - fieldName: "non-existing-column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "foo", nil) - }) - - t.Run("timestamp-iso8601", func(t *testing.T) { - columns := []column{ - { - name: "_msg", - values: []string{ - "2006-01-02T15:04:05.001Z", - "2006-01-02T15:04:05.002Z", - "2006-01-02T15:04:05.003Z", - "2006-01-02T15:04:05.004Z", - "2006-01-02T15:04:05.005Z", - "2006-01-02T15:04:05.006Z", - "2006-01-02T15:04:05.007Z", - "2006-01-02T15:04:05.008Z", - "2006-01-02T15:04:05.009Z", - }, - }, - } - - // match - fp := &filterPrefix{ - fieldName: "_msg", - prefix: "2006-01-02T15:04:05.005Z", - } - testFilterMatchForColumns(t, columns, fp, "_msg", []int{4}) - - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "2006-01-0", - } - testFilterMatchForColumns(t, columns, fp, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "002", - } - testFilterMatchForColumns(t, columns, fp, "_msg", []int{1}) - - // mimatch - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "bar", - } - testFilterMatchForColumns(t, columns, fp, "_msg", nil) - - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "2006-03-02T15:04:05.005Z", - } - testFilterMatchForColumns(t, columns, fp, "_msg", nil) - - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "06", - } - testFilterMatchForColumns(t, columns, fp, "_msg", nil) - - // This filter shouldn't match row=4, since it has different string representation of the timestamp - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "2006-01-02T16:04:05.005+01:00", - } - testFilterMatchForColumns(t, columns, fp, "_msg", nil) - - // This filter shouldn't match row=4, since it contains too many digits for millisecond part - fp = &filterPrefix{ - fieldName: "_msg", - prefix: "2006-01-02T15:04:05.00500Z", - } - testFilterMatchForColumns(t, columns, fp, "_msg", nil) - - fp = &filterPrefix{ - fieldName: "non-existing-column", - prefix: "", - } - testFilterMatchForColumns(t, columns, fp, "_msg", nil) - }) -} - func TestAnyCasePhraseFilter(t *testing.T) { t.Run("single-row", func(t *testing.T) { columns := []column{