This commit is contained in:
Aliaksandr Valialkin 2024-05-27 21:29:24 +02:00
parent ca4b5686c1
commit 9903654e0f
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB

View file

@ -13,8 +13,13 @@ import (
type filterAnd struct { type filterAnd struct {
filters []filter filters []filter
msgTokensOnce sync.Once byFieldTokensOnce sync.Once
msgTokens []string byFieldTokens []fieldTokens
}
type fieldTokens struct {
field string
tokens []string
} }
func (fa *filterAnd) String() string { func (fa *filterAnd) String() string {
@ -49,8 +54,8 @@ func (fa *filterAnd) applyToBlockResult(br *blockResult, bm *bitmap) {
} }
func (fa *filterAnd) applyToBlockSearch(bs *blockSearch, bm *bitmap) { func (fa *filterAnd) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
if !fa.matchMessageBloomFilter(bs) { if !fa.matchBloomFilters(bs) {
// Fast path - fa doesn't match _msg bloom filter. // Fast path - fa doesn't match bloom filters.
bm.resetBits() bm.resetBits()
return return
} }
@ -66,64 +71,103 @@ func (fa *filterAnd) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func (fa *filterAnd) matchMessageBloomFilter(bs *blockSearch) bool { func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool {
tokens := fa.getMessageTokens() byFieldTokens := fa.getByFieldTokens()
if len(tokens) == 0 { if len(byFieldTokens) == 0 {
return true return true
} }
v := bs.csh.getConstColumnValue("_msg") for _, fieldTokens := range byFieldTokens {
fieldName := fieldTokens.field
tokens := fieldTokens.tokens
v := bs.csh.getConstColumnValue(fieldName)
if v != "" { if v != "" {
return matchStringByAllTokens(v, tokens) if !matchStringByAllTokens(v, tokens) {
return false
}
continue
} }
ch := bs.csh.getColumnHeader("_msg") ch := bs.csh.getColumnHeader(fieldName)
if ch == nil { if ch == nil {
return false return false
} }
if ch.valueType == valueTypeDict { if ch.valueType == valueTypeDict {
return matchDictValuesByAllTokens(ch.valuesDict.values, tokens) if !matchDictValuesByAllTokens(ch.valuesDict.values, tokens) {
return false
}
continue
}
if !matchBloomFilterAllTokens(bs, ch, tokens) {
return false
} }
return matchBloomFilterAllTokens(bs, ch, tokens)
} }
func (fa *filterAnd) getMessageTokens() []string { return true
fa.msgTokensOnce.Do(fa.initMsgTokens)
return fa.msgTokens
} }
func (fa *filterAnd) initMsgTokens() { func (fa *filterAnd) getByFieldTokens() []fieldTokens {
var a []string fa.byFieldTokensOnce.Do(fa.initByFieldTokens)
return fa.byFieldTokens
}
func (fa *filterAnd) initByFieldTokens() {
m := make(map[string][]string)
byFieldFilters := make(map[string]int)
var fieldNames []string
for _, f := range fa.filters { for _, f := range fa.filters {
fieldName := ""
var tokens []string
switch t := f.(type) { switch t := f.(type) {
case *filterExact: case *filterExact:
if isMsgFieldName(t.fieldName) { fieldName = t.fieldName
a = append(a, t.getTokens()...) tokens = t.getTokens()
}
case *filterExactPrefix: case *filterExactPrefix:
if isMsgFieldName(t.fieldName) { fieldName = t.fieldName
a = append(a, t.getTokens()...) tokens = t.getTokens()
}
case *filterPhrase: case *filterPhrase:
if isMsgFieldName(t.fieldName) { fieldName = t.fieldName
a = append(a, t.getTokens()...) tokens = t.getTokens()
}
case *filterPrefix: case *filterPrefix:
if isMsgFieldName(t.fieldName) { fieldName = t.fieldName
a = append(a, t.getTokens()...) tokens = t.getTokens()
}
case *filterRegexp: case *filterRegexp:
if isMsgFieldName(t.fieldName) { fieldName = t.fieldName
a = append(a, t.getTokens()...) tokens = t.getTokens()
}
case *filterSequence: case *filterSequence:
if isMsgFieldName(t.fieldName) { fieldName = t.fieldName
a = append(a, t.getTokens()...) tokens = t.getTokens()
}
fieldName = getCanonicalColumnName(fieldName)
byFieldFilters[fieldName]++
if len(tokens) > 0 {
a, ok := m[fieldName]
if !ok {
fieldNames = append(fieldNames, fieldName)
}
m[fieldName] = append(a, tokens...)
} }
} }
var byFieldTokens []fieldTokens
for _, fieldName := range fieldNames {
if byFieldFilters[fieldName] < 2 {
// It is faster to perform bloom filter match inline when visiting the corresponding column
continue
} }
fa.msgTokens = a byFieldTokens = append(byFieldTokens, fieldTokens{
field: fieldName,
tokens: m[fieldName],
})
}
fa.byFieldTokens = byFieldTokens
} }
func matchStringByAllTokens(v string, tokens []string) bool { func matchStringByAllTokens(v string, tokens []string) bool {