From b370a785e9b28a6b869755ba115db2e7dd179255 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 29 Apr 2024 06:49:06 +0200 Subject: [PATCH] wip --- lib/logstorage/filter.go | 140 ---------- lib/logstorage/filter_regexp.go | 147 +++++++++++ lib/logstorage/filter_regexp_test.go | 369 +++++++++++++++++++++++++++ lib/logstorage/filter_test.go | 364 -------------------------- 4 files changed, 516 insertions(+), 504 deletions(-) create mode 100644 lib/logstorage/filter_regexp.go create mode 100644 lib/logstorage/filter_regexp_test.go diff --git a/lib/logstorage/filter.go b/lib/logstorage/filter.go index 8aa6934dd..035426f6a 100644 --- a/lib/logstorage/filter.go +++ b/lib/logstorage/filter.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "math" - "regexp" "strconv" "strings" "sync" @@ -72,65 +71,6 @@ func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) { } } -// filterRegexp matches the given regexp -// -// Example LogsQL: `fieldName:re("regexp")` -type filterRegexp struct { - fieldName string - re *regexp.Regexp -} - -func (fr *filterRegexp) String() string { - return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String()) -} - -func (fr *filterRegexp) apply(bs *blockSearch, bm *bitmap) { - fieldName := fr.fieldName - re := fr.re - - // Verify whether filter matches const column - v := bs.csh.getConstColumnValue(fieldName) - if v != "" { - if !re.MatchString(v) { - bm.resetBits() - } - return - } - - // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) - if ch == nil { - // Fast path - there are no matching columns. - if !re.MatchString("") { - bm.resetBits() - } - return - } - - switch ch.valueType { - case valueTypeString: - matchStringByRegexp(bs, ch, bm, re) - case valueTypeDict: - matchValuesDictByRegexp(bs, ch, bm, re) - case valueTypeUint8: - matchUint8ByRegexp(bs, ch, bm, re) - case valueTypeUint16: - matchUint16ByRegexp(bs, ch, bm, re) - case valueTypeUint32: - matchUint32ByRegexp(bs, ch, bm, re) - case valueTypeUint64: - matchUint64ByRegexp(bs, ch, bm, re) - case valueTypeFloat64: - matchFloat64ByRegexp(bs, ch, bm, re) - case valueTypeIPv4: - matchIPv4ByRegexp(bs, ch, bm, re) - case valueTypeTimestampISO8601: - matchTimestampISO8601ByRegexp(bs, ch, bm, re) - default: - logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) - } -} - // anyCasePrefixFilter matches the given prefix in lower, upper and mixed case. // // Example LogsQL: `fieldName:i(prefix*)` or `fieldName:i("some prefix"*)` @@ -464,15 +404,6 @@ func (pf *phraseFilter) apply(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toTimestampISO8601StringExt(bs, bb, v) - return re.MatchString(s) - }) - bbPool.Put(bb) -} - func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { if prefix == "" { // Fast path - all the timestamp values match an empty prefix aka `*` @@ -516,15 +447,6 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap bbPool.Put(bb) } -func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toIPv4StringExt(bs, bb, v) - return re.MatchString(s) - }) - bbPool.Put(bb) -} - func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { if prefix == "" { // Fast path - all the ipv4 values match an empty prefix aka `*` @@ -570,15 +492,6 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str bbPool.Put(bb) } -func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toFloat64StringExt(bs, bb, v) - return re.MatchString(s) - }) - bbPool.Put(bb) -} - func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { if prefix == "" { // Fast path - all the float64 values match an empty prefix aka `*` @@ -636,17 +549,6 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase bbPool.Put(bb) } -func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - bb := bbPool.Get() - for i, v := range ch.valuesDict.values { - if re.MatchString(v) { - bb.B = append(bb.B, byte(i)) - } - } - matchEncodedValuesDict(bs, ch, bm, bb.B) - bbPool.Put(bb) -} - func matchValuesDictByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefixLowercase string) { bb := bbPool.Get() for i, v := range ch.valuesDict.values { @@ -718,12 +620,6 @@ func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encod }) } -func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - visitValues(bs, ch, bm, func(v string) bool { - return re.MatchString(v) - }) -} - func matchStringByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefixLowercase string) { visitValues(bs, ch, bm, func(v string) bool { return matchAnyCasePrefix(v, prefixLowercase) @@ -770,42 +666,6 @@ func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool { return minLen <= uint64(len(s)) } -func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint8String(bs, bb, v) - return re.MatchString(s) - }) - bbPool.Put(bb) -} - -func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint16String(bs, bb, v) - return re.MatchString(s) - }) - bbPool.Put(bb) -} - -func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint32String(bs, bb, v) - return re.MatchString(s) - }) - bbPool.Put(bb) -} - -func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint64String(bs, bb, v) - return re.MatchString(s) - }) - bbPool.Put(bb) -} - func matchUint8ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { if prefix == "" { // Fast path - all the uint8 values match an empty prefix aka `*` diff --git a/lib/logstorage/filter_regexp.go b/lib/logstorage/filter_regexp.go new file mode 100644 index 000000000..0217d0aaa --- /dev/null +++ b/lib/logstorage/filter_regexp.go @@ -0,0 +1,147 @@ +package logstorage + +import ( + "fmt" + "regexp" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// filterRegexp matches the given regexp +// +// Example LogsQL: `fieldName:re("regexp")` +type filterRegexp struct { + fieldName string + re *regexp.Regexp +} + +func (fr *filterRegexp) String() string { + return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String()) +} + +func (fr *filterRegexp) apply(bs *blockSearch, bm *bitmap) { + fieldName := fr.fieldName + re := fr.re + + // Verify whether filter matches const column + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !re.MatchString(v) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + if !re.MatchString("") { + bm.resetBits() + } + return + } + + switch ch.valueType { + case valueTypeString: + matchStringByRegexp(bs, ch, bm, re) + case valueTypeDict: + matchValuesDictByRegexp(bs, ch, bm, re) + case valueTypeUint8: + matchUint8ByRegexp(bs, ch, bm, re) + case valueTypeUint16: + matchUint16ByRegexp(bs, ch, bm, re) + case valueTypeUint32: + matchUint32ByRegexp(bs, ch, bm, re) + case valueTypeUint64: + matchUint64ByRegexp(bs, ch, bm, re) + case valueTypeFloat64: + matchFloat64ByRegexp(bs, ch, bm, re) + case valueTypeIPv4: + matchIPv4ByRegexp(bs, ch, bm, re) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByRegexp(bs, ch, bm, re) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toTimestampISO8601StringExt(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if re.MatchString(v) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + visitValues(bs, ch, bm, func(v string) bool { + return re.MatchString(v) + }) +} + +func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint8String(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint16String(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint32String(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} + +func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint64String(bs, bb, v) + return re.MatchString(s) + }) + bbPool.Put(bb) +} diff --git a/lib/logstorage/filter_regexp_test.go b/lib/logstorage/filter_regexp_test.go new file mode 100644 index 000000000..8ff80fdd1 --- /dev/null +++ b/lib/logstorage/filter_regexp_test.go @@ -0,0 +1,369 @@ +package logstorage + +import ( + "regexp" + "testing" +) + +func TestFilterRegexp(t *testing.T) { + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "127.0.0.1", + "127.0.0.1", + "127.0.0.1", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("0.0"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) + + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile(`^127\.0\.0\.1$`), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) + + fr = &filterRegexp{ + fieldName: "non-existing-column", + re: regexp.MustCompile("foo.+bar|"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("foo.+bar"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + + fr = &filterRegexp{ + fieldName: "non-existing-column", + re: regexp.MustCompile("foo.+bar"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "127.0.0.1", + "Abc", + "127.255.255.255", + "10.4", + "foo 127.0.0.1", + "127.0.0.1 bar", + "127.0.0.1", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar|^$"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5, 6}) + + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("27.0"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 5, 6, 7}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("bar.+foo"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "A FOO", + "a 10", + "127.0.0.1", + "20", + "15.5", + "-5", + "a fooBaR", + "a 127.0.0.1 dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("(?i)foo|йцу"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 6, 8}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("qwe.+rty|^$"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65535", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "65536", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12345678901", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "123456.78901", + "-0.2", + "2", + "-334", + "4", + "5", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("[32][23]?"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 6, 7, 8}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("127.0.[40].(1|2)"), + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 4, 5, 6, 7}) + + // mismatch + fr = &filterRegexp{ + fieldName: "foo", + re: regexp.MustCompile("foo|bar|834"), + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + fr := &filterRegexp{ + fieldName: "_msg", + re: regexp.MustCompile("2006-[0-9]{2}-.+?(2|5)Z"), + } + testFilterMatchForColumns(t, columns, fr, "_msg", []int{1, 4}) + + // mismatch + fr = &filterRegexp{ + fieldName: "_msg", + re: regexp.MustCompile("^01|04$"), + } + testFilterMatchForColumns(t, columns, fr, "_msg", nil) + }) +} diff --git a/lib/logstorage/filter_test.go b/lib/logstorage/filter_test.go index fe64195c2..933040765 100644 --- a/lib/logstorage/filter_test.go +++ b/lib/logstorage/filter_test.go @@ -3,7 +3,6 @@ package logstorage import ( "fmt" "reflect" - "regexp" "testing" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" @@ -333,369 +332,6 @@ func TestStreamFilter(t *testing.T) { testFilterMatchForColumns(t, columns, f, "foo", nil) } -func TestFilterRegexp(t *testing.T) { - t.Run("const-column", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "127.0.0.1", - "127.0.0.1", - "127.0.0.1", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("0.0"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) - - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile(`^127\.0\.0\.1$`), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) - - fr = &filterRegexp{ - fieldName: "non-existing-column", - re: regexp.MustCompile("foo.+bar|"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("foo.+bar"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - - fr = &filterRegexp{ - fieldName: "non-existing-column", - re: regexp.MustCompile("foo.+bar"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("dict", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "", - "127.0.0.1", - "Abc", - "127.255.255.255", - "10.4", - "foo 127.0.0.1", - "127.0.0.1 bar", - "127.0.0.1", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("foo|bar|^$"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 5, 6}) - - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("27.0"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 5, 6, 7}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("bar.+foo"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("strings", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "A FOO", - "a 10", - "127.0.0.1", - "20", - "15.5", - "-5", - "a fooBaR", - "a 127.0.0.1 dfff", - "a ТЕСТЙЦУК НГКШ ", - "a !!,23.(!1)", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("(?i)foo|йцу"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 6, 8}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("qwe.+rty|^$"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("uint8", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("foo|bar"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("uint16", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "65535", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("foo|bar"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("uint32", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "65536", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("foo|bar"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("uint64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12345678901", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 7, 8}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("foo|bar"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("float64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "123456.78901", - "-0.2", - "2", - "-334", - "4", - "5", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("[32][23]?"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2, 5, 6, 7, 8}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("foo|bar"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("ipv4", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1.2.3.4", - "0.0.0.0", - "127.0.0.1", - "254.255.255.255", - "127.0.0.1", - "127.0.0.1", - "127.0.4.2", - "127.0.0.1", - "12.0.127.6", - "55.55.12.55", - "66.66.66.66", - "7.7.7.7", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("127.0.[40].(1|2)"), - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 4, 5, 6, 7}) - - // mismatch - fr = &filterRegexp{ - fieldName: "foo", - re: regexp.MustCompile("foo|bar|834"), - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("timestamp-iso8601", func(t *testing.T) { - columns := []column{ - { - name: "_msg", - values: []string{ - "2006-01-02T15:04:05.001Z", - "2006-01-02T15:04:05.002Z", - "2006-01-02T15:04:05.003Z", - "2006-01-02T15:04:05.004Z", - "2006-01-02T15:04:05.005Z", - "2006-01-02T15:04:05.006Z", - "2006-01-02T15:04:05.007Z", - "2006-01-02T15:04:05.008Z", - "2006-01-02T15:04:05.009Z", - }, - }, - } - - // match - fr := &filterRegexp{ - fieldName: "_msg", - re: regexp.MustCompile("2006-[0-9]{2}-.+?(2|5)Z"), - } - testFilterMatchForColumns(t, columns, fr, "_msg", []int{1, 4}) - - // mismatch - fr = &filterRegexp{ - fieldName: "_msg", - re: regexp.MustCompile("^01|04$"), - } - testFilterMatchForColumns(t, columns, fr, "_msg", nil) - }) -} - func TestAnyCasePrefixFilter(t *testing.T) { t.Run("single-row", func(t *testing.T) { columns := []column{