This commit is contained in:
Aliaksandr Valialkin 2024-04-29 06:35:06 +02:00
parent 75838f0a87
commit 203bb5f651
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
6 changed files with 645 additions and 632 deletions

View file

@ -72,73 +72,6 @@ func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) {
}
}
// lenRangeFilter matches field values with the length in the given range [minLen, maxLen].
//
// Example LogsQL: `fieldName:len_range(10, 20)`
type lenRangeFilter struct {
fieldName string
minLen uint64
maxLen uint64
stringRepr string
}
func (fr *lenRangeFilter) String() string {
return quoteFieldNameIfNeeded(fr.fieldName) + "len_range" + fr.stringRepr
}
func (fr *lenRangeFilter) apply(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName
minLen := fr.minLen
maxLen := fr.maxLen
if minLen > maxLen {
bm.resetBits()
return
}
v := bs.csh.getConstColumnValue(fieldName)
if v != "" {
if !matchLenRange(v, minLen, maxLen) {
bm.resetBits()
}
return
}
// Verify whether filter matches other columns
ch := bs.csh.getColumnHeader(fieldName)
if ch == nil {
// Fast path - there are no matching columns.
if !matchLenRange("", minLen, maxLen) {
bm.resetBits()
}
return
}
switch ch.valueType {
case valueTypeString:
matchStringByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeDict:
matchValuesDictByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint8:
matchUint8ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint16:
matchUint16ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint32:
matchUint32ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint64:
matchUint64ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeFloat64:
matchFloat64ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeIPv4:
matchIPv4ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeTimestampISO8601:
matchTimestampISO8601ByLenRange(bm, minLen, maxLen)
default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
}
}
// rangeFilter matches the given range [minValue..maxValue].
//
// Example LogsQL: `fieldName:range(minValue, maxValue]`
@ -596,13 +529,6 @@ func (pf *phraseFilter) apply(bs *blockSearch, bm *bitmap) {
}
}
func matchTimestampISO8601ByLenRange(bm *bitmap, minLen, maxLen uint64) {
if minLen > uint64(len(iso8601Timestamp)) || maxLen < uint64(len(iso8601Timestamp)) {
bm.resetBits()
return
}
}
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
@ -655,36 +581,6 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb)
}
func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue uint32) {
if ch.minValue > uint64(maxValue) || ch.maxValue < uint64(minValue) {
bm.resetBits()
return
}
visitValues(bs, ch, bm, func(v string) bool {
if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := encoding.UnmarshalUint32(b)
return n >= minValue && n <= maxValue
})
}
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
@ -739,20 +635,6 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str
bbPool.Put(bb)
}
func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 24 || maxLen == 0 {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchFloat64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
if minValue > math.Float64frombits(ch.maxValue) || maxValue < math.Float64frombits(ch.minValue) {
bm.resetBits()
@ -836,17 +718,6 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
bbPool.Put(bb)
}
func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
if matchLenRange(v, minLen, maxLen) {
bb.B = append(bb.B, byte(i))
}
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
}
func matchValuesDictByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
@ -940,12 +811,6 @@ func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encod
})
}
func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
visitValues(bs, ch, bm, func(v string) bool {
return matchLenRange(v, minLen, maxLen)
})
}
func matchStringByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
visitValues(bs, ch, bm, func(v string) bool {
return matchRange(v, minValue, maxValue)
@ -1004,62 +869,6 @@ func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool {
return minLen <= uint64(len(s))
}
func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint8String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint16String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint32String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint64String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchUint8ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
if maxValue < 0 || minValueUint > ch.maxValue || maxValueUint < ch.minValue {
@ -1348,11 +1157,6 @@ func matchPrefix(s, prefix string) bool {
}
}
func matchLenRange(s string, minLen, maxLen uint64) bool {
sLen := uint64(utf8.RuneCountInString(s))
return sLen >= minLen && sLen <= maxLen
}
func matchRange(s string, minValue, maxValue float64) bool {
f, ok := tryParseFloat64(s)
if !ok {

View file

@ -3,6 +3,7 @@ package logstorage
import (
"fmt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@ -96,3 +97,19 @@ func matchIPv4Range(s string, minValue, maxValue uint32) bool {
}
return n >= minValue && n <= maxValue
}
func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue uint32) {
if ch.minValue > uint64(maxValue) || ch.maxValue < uint64(minValue) {
bm.resetBits()
return
}
visitValues(bs, ch, bm, func(v string) bool {
if len(v) != 4 {
logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
}
b := bytesutil.ToUnsafeBytes(v)
n := encoding.UnmarshalUint32(b)
return n >= minValue && n <= maxValue
})
}

View file

@ -0,0 +1,187 @@
package logstorage
import (
"unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// filterLenRange matches field values with the length in the given range [minLen, maxLen].
//
// Example LogsQL: `fieldName:len_range(10, 20)`
type filterLenRange struct {
fieldName string
minLen uint64
maxLen uint64
stringRepr string
}
func (fr *filterLenRange) String() string {
return quoteFieldNameIfNeeded(fr.fieldName) + "len_range" + fr.stringRepr
}
func (fr *filterLenRange) apply(bs *blockSearch, bm *bitmap) {
fieldName := fr.fieldName
minLen := fr.minLen
maxLen := fr.maxLen
if minLen > maxLen {
bm.resetBits()
return
}
v := bs.csh.getConstColumnValue(fieldName)
if v != "" {
if !matchLenRange(v, minLen, maxLen) {
bm.resetBits()
}
return
}
// Verify whether filter matches other columns
ch := bs.csh.getColumnHeader(fieldName)
if ch == nil {
// Fast path - there are no matching columns.
if !matchLenRange("", minLen, maxLen) {
bm.resetBits()
}
return
}
switch ch.valueType {
case valueTypeString:
matchStringByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeDict:
matchValuesDictByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint8:
matchUint8ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint16:
matchUint16ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint32:
matchUint32ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeUint64:
matchUint64ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeFloat64:
matchFloat64ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeIPv4:
matchIPv4ByLenRange(bs, ch, bm, minLen, maxLen)
case valueTypeTimestampISO8601:
matchTimestampISO8601ByLenRange(bm, minLen, maxLen)
default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
}
}
func matchTimestampISO8601ByLenRange(bm *bitmap, minLen, maxLen uint64) {
if minLen > uint64(len(iso8601Timestamp)) || maxLen < uint64(len(iso8601Timestamp)) {
bm.resetBits()
return
}
}
func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if minLen > 24 || maxLen == 0 {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
if matchLenRange(v, minLen, maxLen) {
bb.B = append(bb.B, byte(i))
}
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
}
func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
visitValues(bs, ch, bm, func(v string) bool {
return matchLenRange(v, minLen, maxLen)
})
}
func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint8String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint16String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint32String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
if !matchMinMaxValueLen(ch, minLen, maxLen) {
bm.resetBits()
return
}
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toUint64String(bs, bb, v)
return matchLenRange(s, minLen, maxLen)
})
bbPool.Put(bb)
}
func matchLenRange(s string, minLen, maxLen uint64) bool {
sLen := uint64(utf8.RuneCountInString(s))
return sLen >= minLen && sLen <= maxLen
}

View file

@ -0,0 +1,438 @@
package logstorage
import (
"testing"
)
func TestMatchLenRange(t *testing.T) {
f := func(s string, minLen, maxLen uint64, resultExpected bool) {
t.Helper()
result := matchLenRange(s, minLen, maxLen)
if result != resultExpected {
t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
}
}
f("", 0, 0, true)
f("", 0, 1, true)
f("", 1, 1, false)
f("abc", 0, 2, false)
f("abc", 0, 3, true)
f("abc", 0, 4, true)
f("abc", 3, 4, true)
f("abc", 4, 4, false)
f("abc", 4, 2, false)
f("ФЫВА", 3, 3, false)
f("ФЫВА", 4, 4, true)
f("ФЫВА", 5, 5, false)
f("ФЫВА", 0, 10, true)
}
func TestFilterLenRange(t *testing.T) {
t.Run("const-column", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"10",
"10",
"10",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
fr = &filterLenRange{
fieldName: "non-existing-column",
minLen: 0,
maxLen: 10,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 3,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterLenRange{
fieldName: "non-existing-column",
minLen: 10,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("dict", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"",
"10",
"Abc",
"20",
"10.5",
"10 AFoobarbaz",
"foobar",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 3,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2, 3})
fr = &filterLenRange{
fieldName: "foo",
minLen: 0,
maxLen: 1,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 20,
maxLen: 30,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("strings", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"A FOO",
"a 10",
"10",
"20",
"15.5",
"-5",
"a fooBaR",
"a kjlkjf dfff",
"a ТЕСТЙЦУК НГКШ ",
"a !!,23.(!1)",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 3,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 5})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 100,
maxLen: 200,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("uint8", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterLenRange{
fieldName: "foo",
minLen: 10,
maxLen: 10,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("uint16", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"256",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterLenRange{
fieldName: "foo",
minLen: 10,
maxLen: 10,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("uint32", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"65536",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterLenRange{
fieldName: "foo",
minLen: 10,
maxLen: 10,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("uint64", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123456789012",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &filterLenRange{
fieldName: "foo",
minLen: 20,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"123456.78901",
"-0.2",
"2",
"-334",
"4",
"5",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 100,
maxLen: 200,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("ipv4", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"1.2.3.4",
"0.0.0.0",
"127.0.0.1",
"254.255.255.255",
"127.0.0.1",
"127.0.0.1",
"127.0.4.2",
"127.0.0.1",
"12.0.127.6",
"55.55.12.55",
"66.66.66.66",
"7.7.7.7",
},
},
}
// match
fr := &filterLenRange{
fieldName: "foo",
minLen: 3,
maxLen: 7,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 11})
// mismatch
fr = &filterLenRange{
fieldName: "foo",
minLen: 20,
maxLen: 30,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("timestamp-iso8601", func(t *testing.T) {
columns := []column{
{
name: "_msg",
values: []string{
"2006-01-02T15:04:05.001Z",
"2006-01-02T15:04:05.002Z",
"2006-01-02T15:04:05.003Z",
"2006-01-02T15:04:05.004Z",
"2006-01-02T15:04:05.005Z",
"2006-01-02T15:04:05.006Z",
"2006-01-02T15:04:05.007Z",
"2006-01-02T15:04:05.008Z",
"2006-01-02T15:04:05.009Z",
},
},
}
// match
fr := &filterLenRange{
fieldName: "_msg",
minLen: 10,
maxLen: 30,
}
testFilterMatchForColumns(t, columns, fr, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
// mismatch
fr = &filterLenRange{
fieldName: "_msg",
minLen: 10,
maxLen: 11,
}
testFilterMatchForColumns(t, columns, fr, "_msg", nil)
})
}

View file

@ -78,32 +78,6 @@ func TestMatchAnyCasePhrase(t *testing.T) {
f("Тест", "ест", false)
}
func TestMatchLenRange(t *testing.T) {
f := func(s string, minLen, maxLen uint64, resultExpected bool) {
t.Helper()
result := matchLenRange(s, minLen, maxLen)
if result != resultExpected {
t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
}
}
f("", 0, 0, true)
f("", 0, 1, true)
f("", 1, 1, false)
f("abc", 0, 2, false)
f("abc", 0, 3, true)
f("abc", 0, 4, true)
f("abc", 3, 4, true)
f("abc", 4, 4, false)
f("abc", 4, 2, false)
f("ФЫВА", 3, 3, false)
f("ФЫВА", 4, 4, true)
f("ФЫВА", 5, 5, false)
f("ФЫВА", 0, 10, true)
}
func TestMatchPhrase(t *testing.T) {
f := func(s, phrase string, resultExpected bool) {
t.Helper()
@ -723,413 +697,6 @@ func TestRegexpFilter(t *testing.T) {
})
}
func TestLenRangeFilter(t *testing.T) {
t.Run("const-column", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"10",
"10",
"10",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 2,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
fr = &lenRangeFilter{
fieldName: "non-existing-column",
minLen: 0,
maxLen: 10,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 2})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 3,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &lenRangeFilter{
fieldName: "non-existing-column",
minLen: 10,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("dict", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"",
"10",
"Abc",
"20",
"10.5",
"10 AFoobarbaz",
"foobar",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 2,
maxLen: 3,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2, 3})
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 0,
maxLen: 1,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 20,
maxLen: 30,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("strings", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"A FOO",
"a 10",
"10",
"20",
"15.5",
"-5",
"a fooBaR",
"a kjlkjf dfff",
"a ТЕСТЙЦУК НГКШ ",
"a !!,23.(!1)",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 2,
maxLen: 3,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 5})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 100,
maxLen: 200,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("uint8", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 10,
maxLen: 10,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("uint16", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"256",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 10,
maxLen: 10,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("uint32", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"65536",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 10,
maxLen: 10,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("uint64", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123456789012",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{2, 3, 6})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 0,
maxLen: 0,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 20,
maxLen: 20,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"123456.78901",
"-0.2",
"2",
"-334",
"4",
"5",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 2,
maxLen: 2,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{1, 2})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 100,
maxLen: 200,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("ipv4", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"1.2.3.4",
"0.0.0.0",
"127.0.0.1",
"254.255.255.255",
"127.0.0.1",
"127.0.0.1",
"127.0.4.2",
"127.0.0.1",
"12.0.127.6",
"55.55.12.55",
"66.66.66.66",
"7.7.7.7",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "foo",
minLen: 3,
maxLen: 7,
}
testFilterMatchForColumns(t, columns, fr, "foo", []int{0, 1, 11})
// mismatch
fr = &lenRangeFilter{
fieldName: "foo",
minLen: 20,
maxLen: 30,
}
testFilterMatchForColumns(t, columns, fr, "foo", nil)
})
t.Run("timestamp-iso8601", func(t *testing.T) {
columns := []column{
{
name: "_msg",
values: []string{
"2006-01-02T15:04:05.001Z",
"2006-01-02T15:04:05.002Z",
"2006-01-02T15:04:05.003Z",
"2006-01-02T15:04:05.004Z",
"2006-01-02T15:04:05.005Z",
"2006-01-02T15:04:05.006Z",
"2006-01-02T15:04:05.007Z",
"2006-01-02T15:04:05.008Z",
"2006-01-02T15:04:05.009Z",
},
},
}
// match
fr := &lenRangeFilter{
fieldName: "_msg",
minLen: 10,
maxLen: 30,
}
testFilterMatchForColumns(t, columns, fr, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
// mismatch
fr = &lenRangeFilter{
fieldName: "_msg",
minLen: 10,
maxLen: 11,
}
testFilterMatchForColumns(t, columns, fr, "_msg", nil)
})
}
func TestRangeFilter(t *testing.T) {
t.Run("const-column", func(t *testing.T) {
columns := []column{

View file

@ -330,7 +330,7 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
case lex.isKeyword("ipv4_range"):
return parseFilterIPv4Range(lex, fieldName)
case lex.isKeyword("len_range"):
return parseLenRangeFilter(lex, fieldName)
return parseFilterLenRange(lex, fieldName)
case lex.isKeyword("range"):
return parseRangeFilter(lex, fieldName)
case lex.isKeyword("re"):
@ -516,7 +516,7 @@ func parseFuncArgMaybePrefix(lex *lexer, funcName, fieldName string, callback fu
return callback(phrase, isPrefixFilter)
}
func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) {
func parseFilterLenRange(lex *lexer, fieldName string) (filter, error) {
funcName := lex.token
return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
if len(args) != 2 {
@ -531,7 +531,7 @@ func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) {
return nil, fmt.Errorf("cannot parse maxLen at %s(): %w", funcName, err)
}
stringRepr := "(" + args[0] + ", " + args[1] + ")"
fr := &lenRangeFilter{
fr := &filterLenRange{
fieldName: fieldName,
minLen: minLen,
maxLen: maxLen,