From 203bb5f6514b0e58a5efc23c5d6a369f8ad86201 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 29 Apr 2024 06:35:06 +0200 Subject: [PATCH] wip --- lib/logstorage/filter.go | 196 ----------- lib/logstorage/filter_ipv4_range.go | 17 + lib/logstorage/filter_len_range.go | 187 ++++++++++ lib/logstorage/filter_len_range_test.go | 438 ++++++++++++++++++++++++ lib/logstorage/filter_test.go | 433 ----------------------- lib/logstorage/parser.go | 6 +- 6 files changed, 645 insertions(+), 632 deletions(-) create mode 100644 lib/logstorage/filter_len_range.go create mode 100644 lib/logstorage/filter_len_range_test.go diff --git a/lib/logstorage/filter.go b/lib/logstorage/filter.go index c0692f216..bc786a124 100644 --- a/lib/logstorage/filter.go +++ b/lib/logstorage/filter.go @@ -72,73 +72,6 @@ func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) { } } -// lenRangeFilter matches field values with the length in the given range [minLen, maxLen]. -// -// Example LogsQL: `fieldName:len_range(10, 20)` -type lenRangeFilter struct { - fieldName string - minLen uint64 - maxLen uint64 - - stringRepr string -} - -func (fr *lenRangeFilter) String() string { - return quoteFieldNameIfNeeded(fr.fieldName) + "len_range" + fr.stringRepr -} - -func (fr *lenRangeFilter) apply(bs *blockSearch, bm *bitmap) { - fieldName := fr.fieldName - minLen := fr.minLen - maxLen := fr.maxLen - - if minLen > maxLen { - bm.resetBits() - return - } - - v := bs.csh.getConstColumnValue(fieldName) - if v != "" { - if !matchLenRange(v, minLen, maxLen) { - bm.resetBits() - } - return - } - - // Verify whether filter matches other columns - ch := bs.csh.getColumnHeader(fieldName) - if ch == nil { - // Fast path - there are no matching columns. - if !matchLenRange("", minLen, maxLen) { - bm.resetBits() - } - return - } - - switch ch.valueType { - case valueTypeString: - matchStringByLenRange(bs, ch, bm, minLen, maxLen) - case valueTypeDict: - matchValuesDictByLenRange(bs, ch, bm, minLen, maxLen) - case valueTypeUint8: - matchUint8ByLenRange(bs, ch, bm, minLen, maxLen) - case valueTypeUint16: - matchUint16ByLenRange(bs, ch, bm, minLen, maxLen) - case valueTypeUint32: - matchUint32ByLenRange(bs, ch, bm, minLen, maxLen) - case valueTypeUint64: - matchUint64ByLenRange(bs, ch, bm, minLen, maxLen) - case valueTypeFloat64: - matchFloat64ByLenRange(bs, ch, bm, minLen, maxLen) - case valueTypeIPv4: - matchIPv4ByLenRange(bs, ch, bm, minLen, maxLen) - case valueTypeTimestampISO8601: - matchTimestampISO8601ByLenRange(bm, minLen, maxLen) - default: - logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) - } -} - // rangeFilter matches the given range [minValue..maxValue]. // // Example LogsQL: `fieldName:range(minValue, maxValue]` @@ -596,13 +529,6 @@ func (pf *phraseFilter) apply(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByLenRange(bm *bitmap, minLen, maxLen uint64) { - if minLen > uint64(len(iso8601Timestamp)) || maxLen < uint64(len(iso8601Timestamp)) { - bm.resetBits() - return - } -} - func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { @@ -655,36 +581,6 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap bbPool.Put(bb) } -func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { - if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toIPv4StringExt(bs, bb, v) - return matchLenRange(s, minLen, maxLen) - }) - bbPool.Put(bb) -} - -func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue uint32) { - if ch.minValue > uint64(maxValue) || ch.maxValue < uint64(minValue) { - bm.resetBits() - return - } - - visitValues(bs, ch, bm, func(v string) bool { - if len(v) != 4 { - logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v)) - } - b := bytesutil.ToUnsafeBytes(v) - n := encoding.UnmarshalUint32(b) - return n >= minValue && n <= maxValue - }) -} - func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) { bb := bbPool.Get() visitValues(bs, ch, bm, func(v string) bool { @@ -739,20 +635,6 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str bbPool.Put(bb) } -func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { - if minLen > 24 || maxLen == 0 { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toFloat64StringExt(bs, bb, v) - return matchLenRange(s, minLen, maxLen) - }) - bbPool.Put(bb) -} - func matchFloat64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) { if minValue > math.Float64frombits(ch.maxValue) || maxValue < math.Float64frombits(ch.minValue) { bm.resetBits() @@ -836,17 +718,6 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase bbPool.Put(bb) } -func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { - bb := bbPool.Get() - for i, v := range ch.valuesDict.values { - if matchLenRange(v, minLen, maxLen) { - bb.B = append(bb.B, byte(i)) - } - } - matchEncodedValuesDict(bs, ch, bm, bb.B) - bbPool.Put(bb) -} - func matchValuesDictByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) { bb := bbPool.Get() for i, v := range ch.valuesDict.values { @@ -940,12 +811,6 @@ func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encod }) } -func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { - visitValues(bs, ch, bm, func(v string) bool { - return matchLenRange(v, minLen, maxLen) - }) -} - func matchStringByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) { visitValues(bs, ch, bm, func(v string) bool { return matchRange(v, minValue, maxValue) @@ -1004,62 +869,6 @@ func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool { return minLen <= uint64(len(s)) } -func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { - if !matchMinMaxValueLen(ch, minLen, maxLen) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint8String(bs, bb, v) - return matchLenRange(s, minLen, maxLen) - }) - bbPool.Put(bb) -} - -func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { - if !matchMinMaxValueLen(ch, minLen, maxLen) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint16String(bs, bb, v) - return matchLenRange(s, minLen, maxLen) - }) - bbPool.Put(bb) -} - -func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { - if !matchMinMaxValueLen(ch, minLen, maxLen) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint32String(bs, bb, v) - return matchLenRange(s, minLen, maxLen) - }) - bbPool.Put(bb) -} - -func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { - if !matchMinMaxValueLen(ch, minLen, maxLen) { - bm.resetBits() - return - } - - bb := bbPool.Get() - visitValues(bs, ch, bm, func(v string) bool { - s := toUint64String(bs, bb, v) - return matchLenRange(s, minLen, maxLen) - }) - bbPool.Put(bb) -} - func matchUint8ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) { minValueUint, maxValueUint := toUint64Range(minValue, maxValue) if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue { @@ -1348,11 +1157,6 @@ func matchPrefix(s, prefix string) bool { } } -func matchLenRange(s string, minLen, maxLen uint64) bool { - sLen := uint64(utf8.RuneCountInString(s)) - return sLen >= minLen && sLen <= maxLen -} - func matchRange(s string, minValue, maxValue float64) bool { f, ok := tryParseFloat64(s) if !ok { diff --git a/lib/logstorage/filter_ipv4_range.go b/lib/logstorage/filter_ipv4_range.go index ac3c97a3e..83d30761c 100644 --- a/lib/logstorage/filter_ipv4_range.go +++ b/lib/logstorage/filter_ipv4_range.go @@ -3,6 +3,7 @@ package logstorage import ( "fmt" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) @@ -96,3 +97,19 @@ func matchIPv4Range(s string, minValue, maxValue uint32) bool { } return n >= minValue && n <= maxValue } + +func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue uint32) { + if ch.minValue > uint64(maxValue) || ch.maxValue < uint64(minValue) { + bm.resetBits() + return + } + + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 4 { + logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v)) + } + b := bytesutil.ToUnsafeBytes(v) + n := encoding.UnmarshalUint32(b) + return n >= minValue && n <= maxValue + }) +} diff --git a/lib/logstorage/filter_len_range.go b/lib/logstorage/filter_len_range.go new file mode 100644 index 000000000..e329f5ab4 --- /dev/null +++ b/lib/logstorage/filter_len_range.go @@ -0,0 +1,187 @@ +package logstorage + +import ( + "unicode/utf8" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" +) + +// filterLenRange matches field values with the length in the given range [minLen, maxLen]. +// +// Example LogsQL: `fieldName:len_range(10, 20)` +type filterLenRange struct { + fieldName string + minLen uint64 + maxLen uint64 + + stringRepr string +} + +func (fr *filterLenRange) String() string { + return quoteFieldNameIfNeeded(fr.fieldName) + "len_range" + fr.stringRepr +} + +func (fr *filterLenRange) apply(bs *blockSearch, bm *bitmap) { + fieldName := fr.fieldName + minLen := fr.minLen + maxLen := fr.maxLen + + if minLen > maxLen { + bm.resetBits() + return + } + + v := bs.csh.getConstColumnValue(fieldName) + if v != "" { + if !matchLenRange(v, minLen, maxLen) { + bm.resetBits() + } + return + } + + // Verify whether filter matches other columns + ch := bs.csh.getColumnHeader(fieldName) + if ch == nil { + // Fast path - there are no matching columns. + if !matchLenRange("", minLen, maxLen) { + bm.resetBits() + } + return + } + + switch ch.valueType { + case valueTypeString: + matchStringByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeDict: + matchValuesDictByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeUint8: + matchUint8ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeUint16: + matchUint16ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeUint32: + matchUint32ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeUint64: + matchUint64ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeFloat64: + matchFloat64ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeIPv4: + matchIPv4ByLenRange(bs, ch, bm, minLen, maxLen) + case valueTypeTimestampISO8601: + matchTimestampISO8601ByLenRange(bm, minLen, maxLen) + default: + logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) + } +} + +func matchTimestampISO8601ByLenRange(bm *bitmap, minLen, maxLen uint64) { + if minLen > uint64(len(iso8601Timestamp)) || maxLen < uint64(len(iso8601Timestamp)) { + bm.resetBits() + return + } +} + +func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { + if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toIPv4StringExt(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { + if minLen > 24 || maxLen == 0 { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toFloat64StringExt(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if matchLenRange(v, minLen, maxLen) { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} + +func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { + visitValues(bs, ch, bm, func(v string) bool { + return matchLenRange(v, minLen, maxLen) + }) +} + +func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { + if !matchMinMaxValueLen(ch, minLen, maxLen) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint8String(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { + if !matchMinMaxValueLen(ch, minLen, maxLen) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint16String(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { + if !matchMinMaxValueLen(ch, minLen, maxLen) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint32String(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) { + if !matchMinMaxValueLen(ch, minLen, maxLen) { + bm.resetBits() + return + } + + bb := bbPool.Get() + visitValues(bs, ch, bm, func(v string) bool { + s := toUint64String(bs, bb, v) + return matchLenRange(s, minLen, maxLen) + }) + bbPool.Put(bb) +} + +func matchLenRange(s string, minLen, maxLen uint64) bool { + sLen := uint64(utf8.RuneCountInString(s)) + return sLen >= minLen && sLen <= maxLen +} diff --git a/lib/logstorage/filter_len_range_test.go b/lib/logstorage/filter_len_range_test.go new file mode 100644 index 000000000..accdb1671 --- /dev/null +++ b/lib/logstorage/filter_len_range_test.go @@ -0,0 +1,438 @@ +package logstorage + +import ( + "testing" +) + +func TestMatchLenRange(t *testing.T) { + f := func(s string, minLen, maxLen uint64, resultExpected bool) { + t.Helper() + result := matchLenRange(s, minLen, maxLen) + if result != resultExpected { + t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) + } + } + + f("", 0, 0, true) + f("", 0, 1, true) + f("", 1, 1, false) + + f("abc", 0, 2, false) + f("abc", 0, 3, true) + f("abc", 0, 4, true) + f("abc", 3, 4, true) + f("abc", 4, 4, false) + f("abc", 4, 2, false) + + f("ФЫВА", 3, 3, false) + f("ФЫВА", 4, 4, true) + f("ФЫВА", 5, 5, false) + f("ФЫВА", 0, 10, true) +} + +func TestFilterLenRange(t *testing.T) { + t.Run("const-column", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "10", + "10", + "10", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 2, + maxLen: 20, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) + + fr = &filterLenRange{ + fieldName: "non-existing-column", + minLen: 0, + maxLen: 10, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 3, + maxLen: 20, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + + fr = &filterLenRange{ + fieldName: "non-existing-column", + minLen: 10, + maxLen: 20, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("dict", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "", + "10", + "Abc", + "20", + "10.5", + "10 AFoobarbaz", + "foobar", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 2, + maxLen: 3, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2, 3}) + + fr = &filterLenRange{ + fieldName: "foo", + minLen: 0, + maxLen: 1, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 20, + maxLen: 30, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("strings", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "A FOO", + "a 10", + "10", + "20", + "15.5", + "-5", + "a fooBaR", + "a kjlkjf dfff", + "a ТЕСТЙЦУК НГКШ ", + "a !!,23.(!1)", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 2, + maxLen: 3, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 5}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 100, + maxLen: 200, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("uint8", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 0, + maxLen: 0, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + + fr = &filterLenRange{ + fieldName: "foo", + minLen: 10, + maxLen: 10, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("uint16", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "256", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 0, + maxLen: 0, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + + fr = &filterLenRange{ + fieldName: "foo", + minLen: 10, + maxLen: 10, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("uint32", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "65536", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 0, + maxLen: 0, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + + fr = &filterLenRange{ + fieldName: "foo", + minLen: 10, + maxLen: 10, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("uint64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123456789012", + "12", + "32", + "0", + "0", + "12", + "1", + "2", + "3", + "4", + "5", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 0, + maxLen: 0, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + + fr = &filterLenRange{ + fieldName: "foo", + minLen: 20, + maxLen: 20, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("float64", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "123", + "12", + "32", + "0", + "0", + "123456.78901", + "-0.2", + "2", + "-334", + "4", + "5", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 2, + maxLen: 2, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 100, + maxLen: 200, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("ipv4", func(t *testing.T) { + columns := []column{ + { + name: "foo", + values: []string{ + "1.2.3.4", + "0.0.0.0", + "127.0.0.1", + "254.255.255.255", + "127.0.0.1", + "127.0.0.1", + "127.0.4.2", + "127.0.0.1", + "12.0.127.6", + "55.55.12.55", + "66.66.66.66", + "7.7.7.7", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "foo", + minLen: 3, + maxLen: 7, + } + testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 11}) + + // mismatch + fr = &filterLenRange{ + fieldName: "foo", + minLen: 20, + maxLen: 30, + } + testFilterMatchForColumns(t, columns, fr, "foo", nil) + }) + + t.Run("timestamp-iso8601", func(t *testing.T) { + columns := []column{ + { + name: "_msg", + values: []string{ + "2006-01-02T15:04:05.001Z", + "2006-01-02T15:04:05.002Z", + "2006-01-02T15:04:05.003Z", + "2006-01-02T15:04:05.004Z", + "2006-01-02T15:04:05.005Z", + "2006-01-02T15:04:05.006Z", + "2006-01-02T15:04:05.007Z", + "2006-01-02T15:04:05.008Z", + "2006-01-02T15:04:05.009Z", + }, + }, + } + + // match + fr := &filterLenRange{ + fieldName: "_msg", + minLen: 10, + maxLen: 30, + } + testFilterMatchForColumns(t, columns, fr, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) + + // mismatch + fr = &filterLenRange{ + fieldName: "_msg", + minLen: 10, + maxLen: 11, + } + testFilterMatchForColumns(t, columns, fr, "_msg", nil) + }) +} diff --git a/lib/logstorage/filter_test.go b/lib/logstorage/filter_test.go index 695b974c8..7b552351e 100644 --- a/lib/logstorage/filter_test.go +++ b/lib/logstorage/filter_test.go @@ -78,32 +78,6 @@ func TestMatchAnyCasePhrase(t *testing.T) { f("Тест", "ест", false) } -func TestMatchLenRange(t *testing.T) { - f := func(s string, minLen, maxLen uint64, resultExpected bool) { - t.Helper() - result := matchLenRange(s, minLen, maxLen) - if result != resultExpected { - t.Fatalf("unexpected result; got %v; want %v", result, resultExpected) - } - } - - f("", 0, 0, true) - f("", 0, 1, true) - f("", 1, 1, false) - - f("abc", 0, 2, false) - f("abc", 0, 3, true) - f("abc", 0, 4, true) - f("abc", 3, 4, true) - f("abc", 4, 4, false) - f("abc", 4, 2, false) - - f("ФЫВА", 3, 3, false) - f("ФЫВА", 4, 4, true) - f("ФЫВА", 5, 5, false) - f("ФЫВА", 0, 10, true) -} - func TestMatchPhrase(t *testing.T) { f := func(s, phrase string, resultExpected bool) { t.Helper() @@ -723,413 +697,6 @@ func TestRegexpFilter(t *testing.T) { }) } -func TestLenRangeFilter(t *testing.T) { - t.Run("const-column", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "10", - "10", - "10", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 2, - maxLen: 20, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) - - fr = &lenRangeFilter{ - fieldName: "non-existing-column", - minLen: 0, - maxLen: 10, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 3, - maxLen: 20, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - - fr = &lenRangeFilter{ - fieldName: "non-existing-column", - minLen: 10, - maxLen: 20, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("dict", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "", - "10", - "Abc", - "20", - "10.5", - "10 AFoobarbaz", - "foobar", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 2, - maxLen: 3, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2, 3}) - - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 0, - maxLen: 1, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 20, - maxLen: 30, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("strings", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "A FOO", - "a 10", - "10", - "20", - "15.5", - "-5", - "a fooBaR", - "a kjlkjf dfff", - "a ТЕСТЙЦУК НГКШ ", - "a !!,23.(!1)", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 2, - maxLen: 3, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 5}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 100, - maxLen: 200, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("uint8", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "12", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 2, - maxLen: 2, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 0, - maxLen: 0, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 10, - maxLen: 10, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("uint16", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "256", - "12", - "32", - "0", - "0", - "12", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 2, - maxLen: 2, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 0, - maxLen: 0, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 10, - maxLen: 10, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("uint32", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "65536", - "12", - "32", - "0", - "0", - "12", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 2, - maxLen: 2, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 0, - maxLen: 0, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 10, - maxLen: 10, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("uint64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123456789012", - "12", - "32", - "0", - "0", - "12", - "1", - "2", - "3", - "4", - "5", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 2, - maxLen: 2, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 0, - maxLen: 0, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 20, - maxLen: 20, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("float64", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "123", - "12", - "32", - "0", - "0", - "123456.78901", - "-0.2", - "2", - "-334", - "4", - "5", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 2, - maxLen: 2, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 100, - maxLen: 200, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("ipv4", func(t *testing.T) { - columns := []column{ - { - name: "foo", - values: []string{ - "1.2.3.4", - "0.0.0.0", - "127.0.0.1", - "254.255.255.255", - "127.0.0.1", - "127.0.0.1", - "127.0.4.2", - "127.0.0.1", - "12.0.127.6", - "55.55.12.55", - "66.66.66.66", - "7.7.7.7", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "foo", - minLen: 3, - maxLen: 7, - } - testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 11}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "foo", - minLen: 20, - maxLen: 30, - } - testFilterMatchForColumns(t, columns, fr, "foo", nil) - }) - - t.Run("timestamp-iso8601", func(t *testing.T) { - columns := []column{ - { - name: "_msg", - values: []string{ - "2006-01-02T15:04:05.001Z", - "2006-01-02T15:04:05.002Z", - "2006-01-02T15:04:05.003Z", - "2006-01-02T15:04:05.004Z", - "2006-01-02T15:04:05.005Z", - "2006-01-02T15:04:05.006Z", - "2006-01-02T15:04:05.007Z", - "2006-01-02T15:04:05.008Z", - "2006-01-02T15:04:05.009Z", - }, - }, - } - - // match - fr := &lenRangeFilter{ - fieldName: "_msg", - minLen: 10, - maxLen: 30, - } - testFilterMatchForColumns(t, columns, fr, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8}) - - // mismatch - fr = &lenRangeFilter{ - fieldName: "_msg", - minLen: 10, - maxLen: 11, - } - testFilterMatchForColumns(t, columns, fr, "_msg", nil) - }) -} - func TestRangeFilter(t *testing.T) { t.Run("const-column", func(t *testing.T) { columns := []column{ diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index dffed80a3..a982c18c5 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -330,7 +330,7 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) { case lex.isKeyword("ipv4_range"): return parseFilterIPv4Range(lex, fieldName) case lex.isKeyword("len_range"): - return parseLenRangeFilter(lex, fieldName) + return parseFilterLenRange(lex, fieldName) case lex.isKeyword("range"): return parseRangeFilter(lex, fieldName) case lex.isKeyword("re"): @@ -516,7 +516,7 @@ func parseFuncArgMaybePrefix(lex *lexer, funcName, fieldName string, callback fu return callback(phrase, isPrefixFilter) } -func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) { +func parseFilterLenRange(lex *lexer, fieldName string) (filter, error) { funcName := lex.token return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { if len(args) != 2 { @@ -531,7 +531,7 @@ func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) { return nil, fmt.Errorf("cannot parse maxLen at %s(): %w", funcName, err) } stringRepr := "(" + args[0] + ", " + args[1] + ")" - fr := &lenRangeFilter{ + fr := &filterLenRange{ fieldName: fieldName, minLen: minLen, maxLen: maxLen,