This commit is contained in:
Aliaksandr Valialkin 2024-04-29 05:04:20 +02:00
parent b6a1576e0d
commit 68ec1cb1dd
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
8 changed files with 1118 additions and 1521 deletions

View file

@ -39,142 +39,39 @@ type streamFilter struct {
streamIDs map[streamID]struct{} streamIDs map[streamID]struct{}
} }
func (sf *streamFilter) String() string { func (fs *streamFilter) String() string {
s := sf.f.String() s := fs.f.String()
if s == "{}" { if s == "{}" {
return "" return ""
} }
return "_stream:" + s return "_stream:" + s
} }
func (sf *streamFilter) getStreamIDs() map[streamID]struct{} { func (fs *streamFilter) getStreamIDs() map[streamID]struct{} {
sf.streamIDsOnce.Do(sf.initStreamIDs) fs.streamIDsOnce.Do(fs.initStreamIDs)
return sf.streamIDs return fs.streamIDs
} }
func (sf *streamFilter) initStreamIDs() { func (fs *streamFilter) initStreamIDs() {
streamIDs := sf.idb.searchStreamIDs(sf.tenantIDs, sf.f) streamIDs := fs.idb.searchStreamIDs(fs.tenantIDs, fs.f)
m := make(map[streamID]struct{}, len(streamIDs)) m := make(map[streamID]struct{}, len(streamIDs))
for i := range streamIDs { for i := range streamIDs {
m[streamIDs[i]] = struct{}{} m[streamIDs[i]] = struct{}{}
} }
sf.streamIDs = m fs.streamIDs = m
} }
func (sf *streamFilter) apply(bs *blockSearch, bm *bitmap) { func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) {
if sf.f.isEmpty() { if fs.f.isEmpty() {
return return
} }
streamIDs := sf.getStreamIDs() streamIDs := fs.getStreamIDs()
if _, ok := streamIDs[bs.bsw.bh.streamID]; !ok { if _, ok := streamIDs[bs.bsw.bh.streamID]; !ok {
bm.resetBits() bm.resetBits()
return return
} }
} }
// sequenceFilter matches an ordered sequence of phrases
//
// Example LogsQL: `fieldName:seq(foo, "bar baz")`
type sequenceFilter struct {
fieldName string
phrases []string
tokensOnce sync.Once
tokens []string
nonEmptyPhrasesOnce sync.Once
nonEmptyPhrases []string
}
func (sf *sequenceFilter) String() string {
phrases := sf.phrases
a := make([]string, len(phrases))
for i, phrase := range phrases {
a[i] = quoteTokenIfNeeded(phrase)
}
return fmt.Sprintf("%sseq(%s)", quoteFieldNameIfNeeded(sf.fieldName), strings.Join(a, ","))
}
func (sf *sequenceFilter) getTokens() []string {
sf.tokensOnce.Do(sf.initTokens)
return sf.tokens
}
func (sf *sequenceFilter) initTokens() {
phrases := sf.getNonEmptyPhrases()
tokens := tokenizeStrings(nil, phrases)
sf.tokens = tokens
}
func (sf *sequenceFilter) getNonEmptyPhrases() []string {
sf.nonEmptyPhrasesOnce.Do(sf.initNonEmptyPhrases)
return sf.nonEmptyPhrases
}
func (sf *sequenceFilter) initNonEmptyPhrases() {
phrases := sf.phrases
result := make([]string, 0, len(phrases))
for _, phrase := range phrases {
if phrase != "" {
result = append(result, phrase)
}
}
sf.nonEmptyPhrases = result
}
func (sf *sequenceFilter) apply(bs *blockSearch, bm *bitmap) {
fieldName := sf.fieldName
phrases := sf.getNonEmptyPhrases()
if len(phrases) == 0 {
return
}
v := bs.csh.getConstColumnValue(fieldName)
if v != "" {
if !matchSequence(v, phrases) {
bm.resetBits()
}
return
}
// Verify whether filter matches other columns
ch := bs.csh.getColumnHeader(fieldName)
if ch == nil {
// Fast path - there are no matching columns.
// It matches anything only for empty phrase.
if !matchSequence("", phrases) {
bm.resetBits()
}
return
}
tokens := sf.getTokens()
switch ch.valueType {
case valueTypeString:
matchStringBySequence(bs, ch, bm, phrases, tokens)
case valueTypeDict:
matchValuesDictBySequence(bs, ch, bm, phrases)
case valueTypeUint8:
matchUint8BySequence(bs, ch, bm, phrases, tokens)
case valueTypeUint16:
matchUint16BySequence(bs, ch, bm, phrases, tokens)
case valueTypeUint32:
matchUint32BySequence(bs, ch, bm, phrases, tokens)
case valueTypeUint64:
matchUint64BySequence(bs, ch, bm, phrases, tokens)
case valueTypeFloat64:
matchFloat64BySequence(bs, ch, bm, phrases, tokens)
case valueTypeIPv4:
matchIPv4BySequence(bs, ch, bm, phrases, tokens)
case valueTypeTimestampISO8601:
matchTimestampISO8601BySequence(bs, ch, bm, phrases, tokens)
default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
}
}
// exactPrefixFilter matches the exact prefix. // exactPrefixFilter matches the exact prefix.
// //
// Example LogsQL: `fieldName:exact("foo bar"*) // Example LogsQL: `fieldName:exact("foo bar"*)
@ -1329,25 +1226,6 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) == 1 {
matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens)
return
}
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
// Slow path - phrases contain incomplete timestamp. Search over string representation of the timestamp.
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
}
func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
if prefix == "" { if prefix == "" {
return return
@ -1473,27 +1351,6 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) == 1 {
matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens)
return
}
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
// Slow path - phrases contain parts of IP address. For example, `1.23` should match `1.23.4.5` and `4.1.23.54`.
// We cannot compare binary represetnation of ip address and need converting
// the ip to string before searching for prefix there.
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
}
func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
if prefix == "" { if prefix == "" {
return return
@ -1629,24 +1486,6 @@ func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
// The phrase may contain a part of the floating-point number.
// For example, `foo:"123"` must match `123`, `123.456` and `-0.123`.
// This means we cannot search in binary representation of floating-point numbers.
// Instead, we need searching for the whole phrase in string representation
// of floating-point numbers :(
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
}
func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
if prefix == "" { if prefix == "" {
// An empty prefix matches all the values // An empty prefix matches all the values
@ -1795,17 +1634,6 @@ func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
if matchSequence(v, phrases) {
bb.B = append(bb.B, byte(i))
}
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
}
func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) { func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
bb := bbPool.Get() bb := bbPool.Get()
for i, v := range ch.valuesDict.values { for i, v := range ch.valuesDict.values {
@ -1918,16 +1746,6 @@ func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix s
}) })
} }
func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
visitValues(bs, ch, bm, func(v string) bool {
return matchSequence(v, phrases)
})
}
func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
@ -2279,38 +2097,6 @@ func matchUint64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix s
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchUint64ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return return
@ -2583,17 +2369,6 @@ func matchRange(s string, minValue, maxValue float64) bool {
return f >= minValue && f <= maxValue return f >= minValue && f <= maxValue
} }
func matchSequence(s string, phrases []string) bool {
for _, phrase := range phrases {
n := getPhrasePos(s, phrase)
if n < 0 {
return false
}
s = s[n+len(phrase):]
}
return true
}
func matchAnyCasePhrase(s, phraseLowercase string) bool { func matchAnyCasePhrase(s, phraseLowercase string) bool {
if len(phraseLowercase) == 0 { if len(phraseLowercase) == 0 {
// Special case - empty phrase matches only empty string. // Special case - empty phrase matches only empty string.

View file

@ -69,7 +69,7 @@ func (fa *filterAnd) initMsgTokens() {
if isMsgFieldName(t.fieldName) { if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...) a = append(a, t.getTokens()...)
} }
case *sequenceFilter: case *filterSequence:
if isMsgFieldName(t.fieldName) { if isMsgFieldName(t.fieldName) {
a = append(a, t.getTokens()...) a = append(a, t.getTokens()...)
} }

View file

@ -0,0 +1,234 @@
package logstorage
import (
"fmt"
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// filterSequence matches an ordered sequence of phrases
//
// Example LogsQL: `fieldName:seq(foo, "bar baz")`
type filterSequence struct {
fieldName string
phrases []string
tokensOnce sync.Once
tokens []string
nonEmptyPhrasesOnce sync.Once
nonEmptyPhrases []string
}
func (fs *filterSequence) String() string {
phrases := fs.phrases
a := make([]string, len(phrases))
for i, phrase := range phrases {
a[i] = quoteTokenIfNeeded(phrase)
}
return fmt.Sprintf("%sseq(%s)", quoteFieldNameIfNeeded(fs.fieldName), strings.Join(a, ","))
}
func (fs *filterSequence) getTokens() []string {
fs.tokensOnce.Do(fs.initTokens)
return fs.tokens
}
func (fs *filterSequence) initTokens() {
phrases := fs.getNonEmptyPhrases()
tokens := tokenizeStrings(nil, phrases)
fs.tokens = tokens
}
func (fs *filterSequence) getNonEmptyPhrases() []string {
fs.nonEmptyPhrasesOnce.Do(fs.initNonEmptyPhrases)
return fs.nonEmptyPhrases
}
func (fs *filterSequence) initNonEmptyPhrases() {
phrases := fs.phrases
result := make([]string, 0, len(phrases))
for _, phrase := range phrases {
if phrase != "" {
result = append(result, phrase)
}
}
fs.nonEmptyPhrases = result
}
func (fs *filterSequence) apply(bs *blockSearch, bm *bitmap) {
fieldName := fs.fieldName
phrases := fs.getNonEmptyPhrases()
if len(phrases) == 0 {
return
}
v := bs.csh.getConstColumnValue(fieldName)
if v != "" {
if !matchSequence(v, phrases) {
bm.resetBits()
}
return
}
// Verify whether filter matches other columns
ch := bs.csh.getColumnHeader(fieldName)
if ch == nil {
// Fast path - there are no matching columns.
// It matches anything only for empty phrase.
if !matchSequence("", phrases) {
bm.resetBits()
}
return
}
tokens := fs.getTokens()
switch ch.valueType {
case valueTypeString:
matchStringBySequence(bs, ch, bm, phrases, tokens)
case valueTypeDict:
matchValuesDictBySequence(bs, ch, bm, phrases)
case valueTypeUint8:
matchUint8BySequence(bs, ch, bm, phrases, tokens)
case valueTypeUint16:
matchUint16BySequence(bs, ch, bm, phrases, tokens)
case valueTypeUint32:
matchUint32BySequence(bs, ch, bm, phrases, tokens)
case valueTypeUint64:
matchUint64BySequence(bs, ch, bm, phrases, tokens)
case valueTypeFloat64:
matchFloat64BySequence(bs, ch, bm, phrases, tokens)
case valueTypeIPv4:
matchIPv4BySequence(bs, ch, bm, phrases, tokens)
case valueTypeTimestampISO8601:
matchTimestampISO8601BySequence(bs, ch, bm, phrases, tokens)
default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
}
}
func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) == 1 {
matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens)
return
}
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
// Slow path - phrases contain incomplete timestamp. Search over string representation of the timestamp.
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toTimestampISO8601StringExt(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
}
func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) == 1 {
matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens)
return
}
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
// Slow path - phrases contain parts of IP address. For example, `1.23` should match `1.23.4.5` and `4.1.23.54`.
// We cannot compare binary represetnation of ip address and need converting
// the ip to string before searching for prefix there.
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toIPv4StringExt(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
}
func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
// The phrase may contain a part of the floating-point number.
// For example, `foo:"123"` must match `123`, `123.456` and `-0.123`.
// This means we cannot search in binary representation of floating-point numbers.
// Instead, we need searching for the whole phrase in string representation
// of floating-point numbers :(
bb := bbPool.Get()
visitValues(bs, ch, bm, func(v string) bool {
s := toFloat64StringExt(bs, bb, v)
return matchSequence(s, phrases)
})
bbPool.Put(bb)
}
func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
if matchSequence(v, phrases) {
bb.B = append(bb.B, byte(i))
}
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
}
func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []string) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
visitValues(bs, ch, bm, func(v string) bool {
return matchSequence(v, phrases)
})
}
func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
if len(phrases) > 1 {
bm.resetBits()
return
}
matchUint64ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchSequence(s string, phrases []string) bool {
for _, phrase := range phrases {
n := getPhrasePos(s, phrase)
if n < 0 {
return false
}
s = s[n+len(phrase):]
}
return true
}

View file

@ -0,0 +1,821 @@
package logstorage
import (
"testing"
)
func TestMatchSequence(t *testing.T) {
f := func(s string, phrases []string, resultExpected bool) {
t.Helper()
result := matchSequence(s, phrases)
if result != resultExpected {
t.Fatalf("unexpected result; got %v; want %v", result, resultExpected)
}
}
f("", []string{""}, true)
f("foo", []string{""}, true)
f("", []string{"foo"}, false)
f("foo", []string{"foo"}, true)
f("foo bar", []string{"foo"}, true)
f("foo bar", []string{"bar"}, true)
f("foo bar", []string{"foo bar"}, true)
f("foo bar", []string{"foo", "bar"}, true)
f("foo bar", []string{"foo", " bar"}, true)
f("foo bar", []string{"foo ", "bar"}, true)
f("foo bar", []string{"foo ", " bar"}, false)
f("foo bar", []string{"bar", "foo"}, false)
}
func TestFilterSequence(t *testing.T) {
t.Run("single-row", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"abc def",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"abc"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"def"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"abc def"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"abc ", "", "def", ""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"ab"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"abc", "abc"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"abc", "def", "foo"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("const-column", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"abc def",
"abc def",
"abc def",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"abc", " def"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"abc ", ""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{"", ""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"abc def ", "foobar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "non-existing column",
phrases: []string{"x", "yz"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("dict", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"",
"baz foobar",
"abc",
"afdf foobar baz",
"fddf foobarbaz",
"afoobarbaz",
"foobar",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"foobar", "baz"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{3})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"baz", "aaaa"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "non-existing column",
phrases: []string{"foobar", "aaaa"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("strings", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"a bb foo",
"bb a foobar",
"aa abc a",
"ca afdf a,foobar baz",
"a fddf foobarbaz",
"a afoobarbaz",
"a foobar bb",
"a kjlkjf dfff",
"a ТЕСТЙЦУК НГКШ ",
"a !!,23.(!1)",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"a", "bb"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 6})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"НГКШ", " "},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{8})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"!,", "(!1)"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{9})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"aa a", "bcdasqq"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"@", "!!!!"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("uint8", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"12",
"1",
"2",
"3",
"4",
"5",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"12"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{1, 5})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"", "bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234", "567"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("uint16", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"12",
"256",
"2",
"3",
"4",
"5",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"12"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{1, 5})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"", "bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234", "567"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("uint32", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"12",
"65536",
"2",
"3",
"4",
"5",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"12"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{1, 5})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"", "bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234", "567"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("uint64", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"123",
"12",
"32",
"0",
"0",
"12",
"12345678901",
"2",
"3",
"4",
"5",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"12"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{1, 5})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"", "bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234", "567"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("float64", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"1234",
"0",
"3454",
"-65536",
"1234.5678901",
"1",
"2",
"3",
"4",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"-", "65536"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{3})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"1234.", "5678901"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{4})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"", "5678901"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{4})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"", ""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"65536", "-"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"5678901", "1234"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"12345678901234567890"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("ipv4", func(t *testing.T) {
columns := []column{
{
name: "foo",
values: []string{
"1.2.3.4",
"0.0.0.0",
"127.0.0.1",
"254.255.255.255",
"127.0.0.1",
"127.0.0.1",
"127.0.4.2",
"127.0.0.1",
"1.0.127.6",
"55.55.55.55",
"66.66.66.66",
"7.7.7.7",
},
},
}
// match
fs := &filterSequence{
fieldName: "foo",
phrases: []string{"127.0.0.1"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{2, 4, 5, 7})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"127", "1"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{2, 4, 5, 7})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"127.0.0"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{2, 4, 5, 7})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"2.3", ".4"},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
fs = &filterSequence{
fieldName: "foo",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "foo", []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
// mismatch
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"5"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"127.", "1", "1", "345"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"27.0"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
fs = &filterSequence{
fieldName: "foo",
phrases: []string{"255.255.255.255"},
}
testFilterMatchForColumns(t, columns, fs, "foo", nil)
})
t.Run("timestamp-iso8601", func(t *testing.T) {
columns := []column{
{
name: "_msg",
values: []string{
"2006-01-02T15:04:05.001Z",
"2006-01-02T15:04:05.002Z",
"2006-01-02T15:04:05.003Z",
"2006-01-02T15:04:05.004Z",
"2006-01-02T15:04:05.005Z",
"2006-01-02T15:04:05.006Z",
"2006-01-02T15:04:05.007Z",
"2006-01-02T15:04:05.008Z",
"2006-01-02T15:04:05.009Z",
},
},
}
// match
fs := &filterSequence{
fieldName: "_msg",
phrases: []string{"2006-01-02T15:04:05.005Z"},
}
testFilterMatchForColumns(t, columns, fs, "_msg", []int{4})
fs = &filterSequence{
fieldName: "_msg",
phrases: []string{"2006-01", "04:05."},
}
testFilterMatchForColumns(t, columns, fs, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
fs = &filterSequence{
fieldName: "_msg",
phrases: []string{"2006", "002Z"},
}
testFilterMatchForColumns(t, columns, fs, "_msg", []int{1})
fs = &filterSequence{
fieldName: "_msg",
phrases: []string{},
}
testFilterMatchForColumns(t, columns, fs, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
fs = &filterSequence{
fieldName: "_msg",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
fs = &filterSequence{
fieldName: "non-existing-column",
phrases: []string{""},
}
testFilterMatchForColumns(t, columns, fs, "_msg", []int{0, 1, 2, 3, 4, 5, 6, 7, 8})
// mimatch
fs = &filterSequence{
fieldName: "_msg",
phrases: []string{"bar"},
}
testFilterMatchForColumns(t, columns, fs, "_msg", nil)
fs = &filterSequence{
fieldName: "_msg",
phrases: []string{"002Z", "2006"},
}
testFilterMatchForColumns(t, columns, fs, "_msg", nil)
fs = &filterSequence{
fieldName: "_msg",
phrases: []string{"2006-04-02T15:04:05.005Z", "2023"},
}
testFilterMatchForColumns(t, columns, fs, "_msg", nil)
fs = &filterSequence{
fieldName: "_msg",
phrases: []string{"06"},
}
testFilterMatchForColumns(t, columns, fs, "_msg", nil)
})
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,40 @@
package logstorage
// filterTime filters by time.
//
// It is expressed as `_time:(start, end]` in LogsQL.
type filterTime struct {
minTimestamp int64
maxTimestamp int64
stringRepr string
}
func (ft *filterTime) String() string {
return "_time:" + ft.stringRepr
}
func (ft *filterTime) apply(bs *blockSearch, bm *bitmap) {
minTimestamp := ft.minTimestamp
maxTimestamp := ft.maxTimestamp
if minTimestamp > maxTimestamp {
bm.resetBits()
return
}
th := bs.bsw.bh.timestampsHeader
if minTimestamp > th.maxTimestamp || maxTimestamp < th.minTimestamp {
bm.resetBits()
return
}
if minTimestamp <= th.minTimestamp && maxTimestamp >= th.maxTimestamp {
return
}
timestamps := bs.getTimestamps()
bm.forEachSetBit(func(idx int) bool {
ts := timestamps[idx]
return ts >= minTimestamp && ts <= maxTimestamp
})
}

View file

@ -336,7 +336,7 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
case lex.isKeyword("re"): case lex.isKeyword("re"):
return parseRegexpFilter(lex, fieldName) return parseRegexpFilter(lex, fieldName)
case lex.isKeyword("seq"): case lex.isKeyword("seq"):
return parseSequenceFilter(lex, fieldName) return parseFilterSequence(lex, fieldName)
case lex.isKeyword("string_range"): case lex.isKeyword("string_range"):
return parseStringRangeFilter(lex, fieldName) return parseStringRangeFilter(lex, fieldName)
case lex.isKeyword(`"`, "'", "`"): case lex.isKeyword(`"`, "'", "`"):
@ -622,13 +622,13 @@ func parseInFilter(lex *lexer, fieldName string) (filter, error) {
}) })
} }
func parseSequenceFilter(lex *lexer, fieldName string) (filter, error) { func parseFilterSequence(lex *lexer, fieldName string) (filter, error) {
return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) { return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
sf := &sequenceFilter{ fs := &filterSequence{
fieldName: fieldName, fieldName: fieldName,
phrases: args, phrases: args,
} }
return sf, nil return fs, nil
}) })
} }

View file

@ -274,22 +274,22 @@ func TestParseTimeRange(t *testing.T) {
f(`[2023-03-01+02:20,2023-04-06T23] offset 30m5s`, minTimestamp, maxTimestamp) f(`[2023-03-01+02:20,2023-04-06T23] offset 30m5s`, minTimestamp, maxTimestamp)
} }
func TestParseSequenceFilter(t *testing.T) { func TestParseFilterSequence(t *testing.T) {
f := func(s, fieldNameExpected string, phrasesExpected []string) { f := func(s, fieldNameExpected string, phrasesExpected []string) {
t.Helper() t.Helper()
q, err := ParseQuery(s) q, err := ParseQuery(s)
if err != nil { if err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
sf, ok := q.f.(*sequenceFilter) fs, ok := q.f.(*filterSequence)
if !ok { if !ok {
t.Fatalf("unexpected filter type; got %T; want *sequenceFilter; filter: %s", q.f, q.f) t.Fatalf("unexpected filter type; got %T; want *filterSequence; filter: %s", q.f, q.f)
} }
if sf.fieldName != fieldNameExpected { if fs.fieldName != fieldNameExpected {
t.Fatalf("unexpected fieldName; got %q; want %q", sf.fieldName, fieldNameExpected) t.Fatalf("unexpected fieldName; got %q; want %q", fs.fieldName, fieldNameExpected)
} }
if !reflect.DeepEqual(sf.phrases, phrasesExpected) { if !reflect.DeepEqual(fs.phrases, phrasesExpected) {
t.Fatalf("unexpected phrases\ngot\n%q\nwant\n%q", sf.phrases, phrasesExpected) t.Fatalf("unexpected phrases\ngot\n%q\nwant\n%q", fs.phrases, phrasesExpected)
} }
} }