From e876b99b59e3dd0471b9c23525e6a9bf32ce5504 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 29 Apr 2024 07:36:23 +0200 Subject: [PATCH] wip --- lib/logstorage/filter.go | 239 ------- lib/logstorage/filter_phrase.go | 247 ++++++++ lib/logstorage/filter_phrase_test.go | 893 +++++++++++++++++++++++++++ lib/logstorage/filter_test.go | 888 -------------------------- 4 files changed, 1140 insertions(+), 1127 deletions(-) create mode 100644 lib/logstorage/filter_phrase.go create mode 100644 lib/logstorage/filter_phrase_test.go diff --git a/lib/logstorage/filter.go b/lib/logstorage/filter.go index f18e7e85a..f4df75487 100644 --- a/lib/logstorage/filter.go +++ b/lib/logstorage/filter.go @@ -4,7 +4,6 @@ import ( "bytes" "math" "strconv" - "strings" "sync" "unicode/utf8" @@ -69,162 +68,6 @@ func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) { } } -// filterPhrase filters field entries by phrase match (aka full text search). -// -// A phrase consists of any number of words with delimiters between them. -// -// An empty phrase matches only an empty string. -// A single-word phrase is the simplest LogsQL query: `fieldName:word` -// -// Multi-word phrase is expressed as `fieldName:"word1 ... wordN"` in LogsQL. -// -// A special case `fieldName:""` matches any value without `fieldName` field. -type filterPhrase struct { - fieldName string - phrase string - - tokensOnce sync.Once - tokens []string -} - -func (fp *filterPhrase) String() string { - return quoteFieldNameIfNeeded(fp.fieldName) + quoteTokenIfNeeded(fp.phrase) -} - -func (fp *filterPhrase) getTokens() []string { - fp.tokensOnce.Do(fp.initTokens) - return fp.tokens -} - -func (fp *filterPhrase) initTokens() { - fp.tokens = tokenizeStrings(nil, []string{fp.phrase}) -} - -func (fp *filterPhrase) apply(bs *blockSearch, bm *bitmap) { - fieldName := fp.fieldName - phrase := fp.phrase - - // Verify whether fp matches const column - v := bs.csh.getConstColumnValue(fieldName) - if v != "" { - if !matchPhrase(v, phrase) { - bm.resetBits() - } - return - } - - // Verify whether fp matches other columns - ch := bs.csh.getColumnHeader(fieldName) - if ch == nil { - // Fast path - there are no matching columns. - // It matches anything only for empty phrase. - if len(phrase) > 0 { - bm.resetBits() - } - return - } - - tokens := fp.getTokens() - - switch ch.valueType { - case valueTypeString: - matchStringByPhrase(bs, ch, bm, phrase, tokens) - case valueTypeDict: - matchValuesDictByPhrase(bs, ch, bm, phrase) - case valueTypeUint8: - matchUint8ByExactValue(bs, ch, bm, phrase, tokens) - case valueTypeUint16: - matchUint16ByExactValue(bs, ch, bm, phrase, tokens) - case valueTypeUint32: - matchUint32ByExactValue(bs, ch, bm, phrase, tokens) - case valueTypeUint64: - matchUint64ByExactValue(bs, ch, bm, phrase, tokens) - case valueTypeFloat64: - matchFloat64ByPhrase(bs, ch, bm, phrase, tokens) - case valueTypeIPv4: - matchIPv4ByPhrase(bs, ch, bm, phrase, tokens) - case valueTypeTimestampISO8601: - matchTimestampISO8601ByPhrase(bs, ch, bm, phrase, tokens) - default: - logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) - } -} - -func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - _, ok := tryParseTimestampISO8601(phrase) - if ok { - // Fast path - the phrase contains complete timestamp, so we can use exact search - matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens) - return - } - - // Slow path - the phrase contains incomplete timestamp. Search over string representation of the timestamp. - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toTimestampISO8601StringExt(bs, bb, v) - return matchPhrase(s, phrase) - }) - bbPool.Put(bb) -} - -func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - _, ok := tryParseIPv4(phrase) - if ok { - // Fast path - phrase contains the full IP address, so we can use exact matching - matchIPv4ByExactValue(bs, ch, bm, phrase, tokens) - return - } - - // Slow path - the phrase may contain a part of IP address. For example, `1.23` should match `1.23.4.5` and `4.1.23.54`. - // We cannot compare binary represetnation of ip address and need converting - // the ip to string before searching for prefix there. - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toIPv4StringExt(bs, bb, v) - return matchPhrase(s, phrase) - }) - bbPool.Put(bb) -} - -func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - // The phrase may contain a part of the floating-point number. - // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`. - // This means we cannot search in binary representation of floating-point numbers. - // Instead, we need searching for the whole phrase in string representation - // of floating-point numbers :( - _, ok := tryParseFloat64(phrase) - if !ok && phrase != "." && phrase != "+" && phrase != "-" { - bm.resetBits() - return - } - if n := strings.IndexByte(phrase, '.'); n > 0 && n < len(phrase)-1 { - // Fast path - the phrase contains the exact floating-point number, so we can use exact search - matchFloat64ByExactValue(bs, ch, bm, phrase, tokens) - return - } - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toFloat64StringExt(bs, bb, v) - return matchPhrase(s, phrase) - }) - bbPool.Put(bb) -} - func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) { bb := bbPool.Get() for i, v := range ch.valuesDict.values { @@ -236,17 +79,6 @@ func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, va bbPool.Put(bb) } -func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string) { - bb := bbPool.Get() - for i, v := range ch.valuesDict.values { - if matchPhrase(v, phrase) { - bb.B = append(bb.B, byte(i)) - } - } - matchEncodedValuesDict(bs, ch, bm, bb.B) - bbPool.Put(bb) -} - func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) { if len(encodedValues) == 0 { // Fast path - the phrase is missing in the valuesDict @@ -263,16 +95,6 @@ func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encod }) } -func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - if !matchBloomFilterAllTokens(bs, ch, tokens) { - bm.resetBits() - return - } - visitValues(bs, ch, bm, func(v string) bool { - return matchPhrase(v, phrase) - }) -} - func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool { bb := bbPool.Get() defer bbPool.Put(bb) @@ -316,67 +138,6 @@ func isASCIILowercase(s string) bool { return true } -func matchPhrase(s, phrase string) bool { - if len(phrase) == 0 { - // Special case - empty phrase matches only empty string. - return len(s) == 0 - } - n := getPhrasePos(s, phrase) - return n >= 0 -} - -func getPhrasePos(s, phrase string) int { - if len(phrase) == 0 { - return 0 - } - if len(phrase) > len(s) { - return -1 - } - - r := rune(phrase[0]) - if r >= utf8.RuneSelf { - r, _ = utf8.DecodeRuneInString(phrase) - } - startsWithToken := isTokenRune(r) - - r = rune(phrase[len(phrase)-1]) - if r >= utf8.RuneSelf { - r, _ = utf8.DecodeLastRuneInString(phrase) - } - endsWithToken := isTokenRune(r) - - pos := 0 - for { - n := strings.Index(s[pos:], phrase) - if n < 0 { - return -1 - } - pos += n - // Make sure that the found phrase contains non-token chars at the beginning and at the end - if startsWithToken && pos > 0 { - r := rune(s[pos-1]) - if r >= utf8.RuneSelf { - r, _ = utf8.DecodeLastRuneInString(s[:pos]) - } - if r == utf8.RuneError || isTokenRune(r) { - pos++ - continue - } - } - if endsWithToken && pos+len(phrase) < len(s) { - r := rune(s[pos+len(phrase)]) - if r >= utf8.RuneSelf { - r, _ = utf8.DecodeRuneInString(s[pos+len(phrase):]) - } - if r == utf8.RuneError || isTokenRune(r) { - pos++ - continue - } - } - return pos - } -} - type stringBucket struct { a []string } diff --git a/lib/logstorage/filter_phrase.go b/lib/logstorage/filter_phrase.go new file mode 100644 index 000000000..10cd34024 --- /dev/null +++ b/lib/logstorage/filter_phrase.go @@ -0,0 +1,247 @@ +package logstorage + +import ( + "strings" + "sync" + "unicode/utf8" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// filterPhrase filters field entries by phrase match (aka full text search). +// +// A phrase consists of any number of words with delimiters between them. +// +// An empty phrase matches only an empty string. +// A single-word phrase is the simplest LogsQL query: `fieldName:word` +// +// Multi-word phrase is expressed as `fieldName:"word1 ... wordN"` in LogsQL. +// +// A special case `fieldName:""` matches any value without `fieldName` field. +type filterPhrase struct { + fieldName string + phrase string + + tokensOnce sync.Once + tokens []string +} + +func (fp *filterPhrase) String() string { + return quoteFieldNameIfNeeded(fp.fieldName) + quoteTokenIfNeeded(fp.phrase) +} + +func (fp *filterPhrase) getTokens() []string { + fp.tokensOnce.Do(fp.initTokens) + return fp.tokens +} + +func (fp *filterPhrase) initTokens() { + fp.tokens = tokenizeStrings(nil, []string{fp.phrase}) +} + +func (fp *filterPhrase) apply(bs *blockSearch, bm *bitmap) { + fieldName := fp.fieldName + phrase := fp.phrase + + // Verify whether fp matches const column + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchPhrase(v, phrase) { + bm.resetBits() + } + return + } + + // Verify whether fp matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + // It matches anything only for empty phrase. + if len(phrase) > 0 { + bm.resetBits() + } + return + } + + tokens := fp.getTokens() + + switch ch.valueType { + case valueTypeString: + matchStringByPhrase(bs, ch, bm, phrase, tokens) + case valueTypeDict: + matchValuesDictByPhrase(bs, ch, bm, phrase) + case valueTypeUint8: + matchUint8ByExactValue(bs, ch, bm, phrase, tokens) + case valueTypeUint16: + matchUint16ByExactValue(bs, ch, bm, phrase, tokens) + case valueTypeUint32: + matchUint32ByExactValue(bs, ch, bm, phrase, tokens) + case valueTypeUint64: + matchUint64ByExactValue(bs, ch, bm, phrase, tokens) + case valueTypeFloat64: + matchFloat64ByPhrase(bs, ch, bm, phrase, tokens) + case valueTypeIPv4: + matchIPv4ByPhrase(bs, ch, bm, phrase, tokens) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByPhrase(bs, ch, bm, phrase, tokens) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { + _, ok := tryParseTimestampISO8601(phrase) + if ok { + // Fast path - the phrase contains complete timestamp, so we can use exact search + matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens) + return + } + + // Slow path - the phrase contains incomplete timestamp. Search over string representation of the timestamp. + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return matchPhrase(s, phrase) + }) + bbPool.Put(bb) +} + +func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { + _, ok := tryParseIPv4(phrase) + if ok { + // Fast path - phrase contains the full IP address, so we can use exact matching + matchIPv4ByExactValue(bs, ch, bm, phrase, tokens) + return + } + + // Slow path - the phrase may contain a part of IP address. For example, `1.23` should match `1.23.4.5` and `4.1.23.54`. + // We cannot compare binary represetnation of ip address and need converting + // the ip to string before searching for prefix there. + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchPhrase(s, phrase) + }) + bbPool.Put(bb) +} + +func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { + // The phrase may contain a part of the floating-point number. + // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`. + // This means we cannot search in binary representation of floating-point numbers. + // Instead, we need searching for the whole phrase in string representation + // of floating-point numbers :( + _, ok := tryParseFloat64(phrase) + if !ok && phrase != "." && phrase != "+" && phrase != "-" { + bm.resetBits() + return + } + if n := strings.IndexByte(phrase, '.'); n > 0 && n < len(phrase)-1 { + // Fast path - the phrase contains the exact floating-point number, so we can use exact search + matchFloat64ByExactValue(bs, ch, bm, phrase, tokens) + return + } + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchPhrase(s, phrase) + }) + bbPool.Put(bb) +} + +func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchPhrase(v, phrase) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { + if !matchBloomFilterAllTokens(bs, ch, tokens) { + bm.resetBits() + return + } + visitValues(bs, ch, bm, func(v string) bool { + return matchPhrase(v, phrase) + }) +} + +func matchPhrase(s, phrase string) bool { + if len(phrase) == 0 { + // Special case - empty phrase matches only empty string. + return len(s) == 0 + } + n := getPhrasePos(s, phrase) + return n >= 0 +} + +func getPhrasePos(s, phrase string) int { + if len(phrase) == 0 { + return 0 + } + if len(phrase) > len(s) { + return -1 + } + + r := rune(phrase[0]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeRuneInString(phrase) + } + startsWithToken := isTokenRune(r) + + r = rune(phrase[len(phrase)-1]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeLastRuneInString(phrase) + } + endsWithToken := isTokenRune(r) + + pos := 0 + for { + n := strings.Index(s[pos:], phrase) + if n < 0 { + return -1 + } + pos += n + // Make sure that the found phrase contains non-token chars at the beginning and at the end + if startsWithToken && pos > 0 { + r := rune(s[pos-1]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeLastRuneInString(s[:pos]) + } + if r == utf8.RuneError || isTokenRune(r) { + pos++ + continue + } + } + if endsWithToken && pos+len(phrase) < len(s) { + r := rune(s[pos+len(phrase)]) + if r >= utf8.RuneSelf { + r, _ = utf8.DecodeRuneInString(s[pos+len(phrase):]) + } + if r == utf8.RuneError || isTokenRune(r) { + pos++ + continue + } + } + return pos + } +} diff --git a/lib/logstorage/filter_phrase_test.go b/lib/logstorage/filter_phrase_test.go new file mode 100644 index 000000000..659499035 --- /dev/null +++ b/lib/logstorage/filter_phrase_test.go @@ -0,0 +1,893 @@ +package logstorage + +import ( + "testing" +) + +func TestMatchPhrase(t *testing.T) { + f := func(s, phrase string, resultExpected bool) { + t.Helper() + result := matchPhrase(s, phrase) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + f("", "", true) + f("foo", "", false) + f("", "foo", false) + f("foo", "foo", true) + f("foo bar", "foo", true) + f("foo bar", "bar", true) + f("a foo bar", "foo", true) + f("a foo bar", "fo", false) + f("a foo bar", "oo", false) + f("foobar", "foo", false) + f("foobar", "bar", false) + f("foobar", "oob", false) + f("afoobar foo", "foo", true) + f("раз два (три!)", "три", true) + f("", "foo bar", false) + f("foo bar", "foo bar", true) + f("(foo bar)", "foo bar", true) + f("afoo bar", "foo bar", false) + f("afoo bar", "afoo ba", false) + f("foo bar! baz", "foo bar!", true) + f("a.foo bar! baz", ".foo bar! ", true) + f("foo bar! baz", "foo bar! b", false) + f("255.255.255.255", "5", false) + f("255.255.255.255", "55", false) + f("255.255.255.255", "255", true) + f("255.255.255.255", "5.255", false) + f("255.255.255.255", "255.25", false) + f("255.255.255.255", "255.255", true) +} + +func TestFilterPhrase(t *testing.T) { + t.Run("single-row", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "abc def", + }, + }, + { + name: "other column", + values: []string{ + "asdfdsf", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &filterPhrase{ + fieldName: "other column", + phrase: "asdfdsf", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "ab", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "other column", + phrase: "sd", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "non-existing column", + phrase: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "other-column", + values: []string{ + "x", + "x", + "x", + }, + }, + { + name: "foo", + values: []string{ + "abc def", + "abc def", + "abc def", + }, + }, + { + name: "_msg", + values: []string{ + "1 2 3", + "1 2 3", + "1 2 3", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "abc", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: " def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "abc def", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &filterPhrase{ + fieldName: "other-column", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &filterPhrase{ + fieldName: "_msg", + phrase: " 2 ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "abc def ", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "other-column", + phrase: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "non-existing column", + phrase: "x", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "foo", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "foobar", + "abc", + "afdf foobar baz", + "fddf foobarbaz", + "afoobarbaz", + "foobar", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "foobar", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 6}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "baz", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "non-existing column", + phrase: "foobar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "a foo", + "a foobar", + "aa abc a", + "ca afdf a,foobar baz", + "a fddf foobarbaz", + "a afoobarbaz", + "a foobar", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "a", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "НГКШ", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{8}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "!,", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{9}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "aa a", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "@", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "12", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 5}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65535", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "123456", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "1234", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "65536", + "12345678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "12345678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1234", + "0", + "3454", + "-65536", + "1234.5678901", + "1", + "2", + "3", + "4", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "1234.5678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "5678901", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "-65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "65536", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "-1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "+1234", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "123", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "5678", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "33", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "12345678901234567890", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.55.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "foo", + phrase: "127.0.0.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "127", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "127.0.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "2.3", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "0", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + + // mismatch + pf = &filterPhrase{ + fieldName: "foo", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "5", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "127.1", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "27.0", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + + pf = &filterPhrase{ + fieldName: "foo", + phrase: "255.255.255.255", + } + testFilterMatchForColumns(t, columns, pf, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + pf := &filterPhrase{ + fieldName: "_msg", + phrase: "2006-01-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{4}) + + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "2006-01", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "002Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{1}) + + pf = &filterPhrase{ + fieldName: "non-existing-column", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mimatch + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "bar", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "2006-03-02T15:04:05.005Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "06", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it has different string representation of the timestamp + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "2006-01-02T16:04:05.005+01:00", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + + // This filter shouldn't match row=4, since it contains too many digits for millisecond part + pf = &filterPhrase{ + fieldName: "_msg", + phrase: "2006-01-02T15:04:05.00500Z", + } + testFilterMatchForColumns(t, columns, pf, "_msg", nil) + }) +} diff --git a/lib/logstorage/filter_test.go b/lib/logstorage/filter_test.go index 6a05cf218..82a96964a 100644 --- a/lib/logstorage/filter_test.go +++ b/lib/logstorage/filter_test.go @@ -8,45 +8,6 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" ) -func TestMatchPhrase(t *testing.T) { - f := func(s, phrase string, resultExpected bool) { - t.Helper() - result := matchPhrase(s, phrase) - if result != resultExpected { - t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) - } - } - - f("", "", true) - f("foo", "", false) - f("", "foo", false) - f("foo", "foo", true) - f("foo bar", "foo", true) - f("foo bar", "bar", true) - f("a foo bar", "foo", true) - f("a foo bar", "fo", false) - f("a foo bar", "oo", false) - f("foobar", "foo", false) - f("foobar", "bar", false) - f("foobar", "oob", false) - f("afoobar foo", "foo", true) - f("раз два (три!)", "три", true) - f("", "foo bar", false) - f("foo bar", "foo bar", true) - f("(foo bar)", "foo bar", true) - f("afoo bar", "foo bar", false) - f("afoo bar", "afoo ba", false) - f("foo bar! baz", "foo bar!", true) - f("a.foo bar! baz", ".foo bar! ", true) - f("foo bar! baz", "foo bar! b", false) - f("255.255.255.255", "5", false) - f("255.255.255.255", "55", false) - f("255.255.255.255", "255", true) - f("255.255.255.255", "5.255", false) - f("255.255.255.255", "255.25", false) - f("255.255.255.255", "255.255", true) -} - func TestComplexFilters(t *testing.T) { columns := []column{ { @@ -225,855 +186,6 @@ func TestStreamFilter(t *testing.T) { testFilterMatchForColumns(t, columns, f, "foo", nil) } -func TestFilterPhrase(t *testing.T) { - t.Run("single-row", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "abc def", - }, - }, - { - name: "other column", - values: []string{ - "asdfdsf", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "abc", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "abc def", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "def", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) - - pf = &filterPhrase{ - fieldName: "other column", - phrase: "asdfdsf", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "ab", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "other column", - phrase: "sd", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "non-existing column", - phrase: "abc", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("const-column", func(t *testing.T) { - columns := []column{ - { - name: "other-column", - values: []string{ - "x", - "x", - "x", - }, - }, - { - name: "foo", - values: []string{ - "abc def", - "abc def", - "abc def", - }, - }, - { - name: "_msg", - values: []string{ - "1 2 3", - "1 2 3", - "1 2 3", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "abc", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "def", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: " def", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "abc def", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) - - pf = &filterPhrase{ - fieldName: "other-column", - phrase: "x", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) - - pf = &filterPhrase{ - fieldName: "_msg", - phrase: " 2 ", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "abc def ", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "x", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "other-column", - phrase: "foo", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "non-existing column", - phrase: "x", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "foo", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("dict", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "", - "foobar", - "abc", - "afdf foobar baz", - "fddf foobarbaz", - "afoobarbaz", - "foobar", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "foobar", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 3, 6}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "baz", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "non-existing column", - phrase: "foobar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("strings", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "a foo", - "a foobar", - "aa abc a", - "ca afdf a,foobar baz", - "a fddf foobarbaz", - "a afoobarbaz", - "a foobar", - "a kjlkjf dfff", - "a ТЕСТЙЦУК НГКШ ", - "a !!,23.(!1)", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "a", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "НГКШ", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{8}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "!,", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{9}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "aa a", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "@", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("uint8", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "12", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 5}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "0", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{3, 4}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "33", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "1234", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("uint16", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "65535", - "1234", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "1234", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "0", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{1}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "33", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "123456", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("uint32", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "65536", - "1234", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "1234", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "65536", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "33", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "12345678901", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("uint64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "65536", - "12345678901", - "1", - "2", - "3", - "4", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "1234", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "12345678901", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "33", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "12345678901234567890", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("float64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1234", - "0", - "3454", - "-65536", - "1234.5678901", - "1", - "2", - "3", - "4", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "1234", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 4}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "1234.5678901", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "5678901", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{4}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "-65536", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "65536", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{3}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "-1234", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "+1234", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "123", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "5678", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "33", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "12345678901234567890", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("ipv4", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1.2.3.4", - "0.0.0.0", - "127.0.0.1", - "254.255.255.255", - "127.0.0.1", - "127.0.0.1", - "127.0.4.2", - "127.0.0.1", - "12.0.127.6", - "55.55.55.55", - "66.66.66.66", - "7.7.7.7", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "foo", - phrase: "127.0.0.1", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "127", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 6, 7, 8}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "127.0.0", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{2, 4, 5, 7}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "2.3", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0}) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "0", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{1, 2, 4, 5, 6, 7, 8}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) - - // mismatch - pf = &filterPhrase{ - fieldName: "foo", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "5", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "127.1", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "27.0", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - - pf = &filterPhrase{ - fieldName: "foo", - phrase: "255.255.255.255", - } - testFilterMatchForColumns(t, columns, pf, "foo", nil) - }) - - t.Run("timestamp-iso8601", func(t *testing.T) { - columns := []column{ - { - name: "_msg", - values: []string{ - "2006-01-02T15:04:05.001Z", - "2006-01-02T15:04:05.002Z", - "2006-01-02T15:04:05.003Z", - "2006-01-02T15:04:05.004Z", - "2006-01-02T15:04:05.005Z", - "2006-01-02T15:04:05.006Z", - "2006-01-02T15:04:05.007Z", - "2006-01-02T15:04:05.008Z", - "2006-01-02T15:04:05.009Z", - }, - }, - } - - // match - pf := &filterPhrase{ - fieldName: "_msg", - phrase: "2006-01-02T15:04:05.005Z", - } - testFilterMatchForColumns(t, columns, pf, "_msg", []int{4}) - - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "2006-01", - } - testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "002Z", - } - testFilterMatchForColumns(t, columns, pf, "_msg", []int{1}) - - pf = &filterPhrase{ - fieldName: "non-existing-column", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - // mimatch - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "bar", - } - testFilterMatchForColumns(t, columns, pf, "_msg", nil) - - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "", - } - testFilterMatchForColumns(t, columns, pf, "_msg", nil) - - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "2006-03-02T15:04:05.005Z", - } - testFilterMatchForColumns(t, columns, pf, "_msg", nil) - - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "06", - } - testFilterMatchForColumns(t, columns, pf, "_msg", nil) - - // This filter shouldn't match row=4, since it has different string representation of the timestamp - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "2006-01-02T16:04:05.005+01:00", - } - testFilterMatchForColumns(t, columns, pf, "_msg", nil) - - // This filter shouldn't match row=4, since it contains too many digits for millisecond part - pf = &filterPhrase{ - fieldName: "_msg", - phrase: "2006-01-02T15:04:05.00500Z", - } - testFilterMatchForColumns(t, columns, pf, "_msg", nil) - }) -} - func testFilterMatchForTimestamps(t *testing.T, timestamps []int64, f filter, expectedRowIdxs []int) { t.Helper()