lib/logstorage: pre-calculate hashes from tokens used in bloom filter search

Previously per-token hashes for per-block bloom filters were re-calculated on every scanned block.
This could be slow when the number of tokens is big or when the number of blocks to scan is big.
Pre-calculate hashes for bloom filters and then use them for searching in bloom filters.
This improves performance by 2.5x for in(...) filters with many values to search inside `in()`.
This commit is contained in:
Aliaksandr Valialkin 2024-09-05 19:40:50 +02:00
parent d7be0e7c9a
commit 258ccfb953
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
14 changed files with 225 additions and 163 deletions

View file

@ -15,6 +15,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
## tip ## tip
* FEATURE: optimize [multi-exact queries](https://docs.victoriametrics.com/victorialogs/logsql/#multi-exact-filter) with many phrases to search. For example, `ip:in(path:="/foo/bar" | keep ip)` when there are many unique values for `ip` field among log entries with `/foo/bar` path.
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add support for displaying the top 5 log streams in the hits graph. The remaining log streams are grouped into an "other" label. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6545). * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add support for displaying the top 5 log streams in the hits graph. The remaining log streams are grouped into an "other" label. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6545).
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add the ability to customize the graph display with options for bar, line, stepped line, and points. * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add the ability to customize the graph display with options for bar, line, stepped line, and points.
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add fields for setting AccountID and ProjectID. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6631). * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add fields for setting AccountID and ProjectID. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6631).

View file

@ -72,49 +72,64 @@ func (bf *bloomFilter) mustInit(tokens []string) {
// bloomFilterAdd adds the given tokens to the bloom filter bits // bloomFilterAdd adds the given tokens to the bloom filter bits
func bloomFilterAdd(bits []uint64, tokens []string) { func bloomFilterAdd(bits []uint64, tokens []string) {
hashesCount := len(tokens) * bloomFilterHashesCount
a := encoding.GetUint64s(hashesCount)
a.A = appendTokensHashes(a.A[:0], tokens)
maxBits := uint64(len(bits)) * 64 maxBits := uint64(len(bits)) * 64
for _, h := range a.A {
idx := h % maxBits
i := idx / 64
j := idx % 64
mask := uint64(1) << j
w := bits[i]
if (w & mask) == 0 {
bits[i] = w | mask
}
}
encoding.PutUint64s(a)
}
// appendTokensHashes appends hashes for the given tokens to dst and returns the result.
//
// the appended hashes can be then passed to bloomFilter.containsAll().
func appendTokensHashes(dst []uint64, tokens []string) []uint64 {
dstLen := len(dst)
hashesCount := len(tokens) * bloomFilterHashesCount
dst = slicesutil.SetLength(dst, dstLen+hashesCount)
dst = dst[:dstLen]
var buf [8]byte var buf [8]byte
hp := (*uint64)(unsafe.Pointer(&buf[0])) hp := (*uint64)(unsafe.Pointer(&buf[0]))
for _, token := range tokens { for _, token := range tokens {
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token)) *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
for i := 0; i < bloomFilterHashesCount; i++ { for i := 0; i < bloomFilterHashesCount; i++ {
hi := xxhash.Sum64(buf[:]) h := xxhash.Sum64(buf[:])
(*hp)++ (*hp)++
idx := hi % maxBits dst = append(dst, h)
i := idx / 64
j := idx % 64
mask := uint64(1) << j
w := bits[i]
if (w & mask) == 0 {
bits[i] = w | mask
}
} }
} }
return dst
} }
// containsAll returns true if bf contains all the given tokens. // containsAll returns true if bf contains all the given tokens hashes generated by appendTokensHashes.
func (bf *bloomFilter) containsAll(tokens []string) bool { func (bf *bloomFilter) containsAll(hashes []uint64) bool {
bits := bf.bits bits := bf.bits
if len(bits) == 0 { if len(bits) == 0 {
return true return true
} }
maxBits := uint64(len(bits)) * 64 maxBits := uint64(len(bits)) * 64
var buf [8]byte for _, h := range hashes {
hp := (*uint64)(unsafe.Pointer(&buf[0])) idx := h % maxBits
for _, token := range tokens { i := idx / 64
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token)) j := idx % 64
for i := 0; i < bloomFilterHashesCount; i++ { mask := uint64(1) << j
hi := xxhash.Sum64(buf[:]) w := bits[i]
(*hp)++ if (w & mask) == 0 {
idx := hi % maxBits // The token is missing
i := idx / 64 return false
j := idx % 64
mask := uint64(1) << j
w := bits[i]
if (w & mask) == 0 {
// The token is missing
return false
}
} }
} }
return true return true

View file

@ -14,8 +14,9 @@ func TestBloomFilter(t *testing.T) {
if err := bf.unmarshal(data); err != nil { if err := bf.unmarshal(data); err != nil {
t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err) t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err)
} }
if !bf.containsAll(tokens) { tokensHashes := appendTokensHashes(nil, tokens)
t.Fatalf("bloomFilterContains must return true for the added tokens") if !bf.containsAll(tokensHashes) {
t.Fatalf("containsAll must return true for the added tokens")
} }
} }
f(nil) f(nil)
@ -67,7 +68,8 @@ func TestBloomFilterFalsePositive(t *testing.T) {
falsePositives := 0 falsePositives := 0
for i := range tokens { for i := range tokens {
token := fmt.Sprintf("non-existing-token_%d", i) token := fmt.Sprintf("non-existing-token_%d", i)
if bf.containsAll([]string{token}) { tokensHashes := appendTokensHashes(nil, []string{token})
if bf.containsAll(tokensHashes) {
falsePositives++ falsePositives++
} }
} }

View file

@ -18,8 +18,9 @@ type filterAnd struct {
} }
type fieldTokens struct { type fieldTokens struct {
field string field string
tokens []string tokens []string
tokensHashes []uint64
} }
func (fa *filterAnd) String() string { func (fa *filterAnd) String() string {
@ -76,16 +77,16 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool {
return true return true
} }
for _, fieldTokens := range byFieldTokens { for _, ft := range byFieldTokens {
fieldName := fieldTokens.field fieldName := ft.field
tokens := fieldTokens.tokens tokens := ft.tokens
v := bs.csh.getConstColumnValue(fieldName) v := bs.csh.getConstColumnValue(fieldName)
if v != "" { if v != "" {
if !matchStringByAllTokens(v, tokens) { if matchStringByAllTokens(v, tokens) {
return false continue
} }
continue return false
} }
ch := bs.csh.getColumnHeader(fieldName) ch := bs.csh.getColumnHeader(fieldName)
@ -94,12 +95,12 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool {
} }
if ch.valueType == valueTypeDict { if ch.valueType == valueTypeDict {
if !matchDictValuesByAllTokens(ch.valuesDict.values, tokens) { if matchDictValuesByAllTokens(ch.valuesDict.values, tokens) {
return false continue
} }
continue return false
} }
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, ft.tokensHashes) {
return false return false
} }
} }
@ -170,8 +171,9 @@ func (fa *filterAnd) initByFieldTokens() {
} }
byFieldTokens = append(byFieldTokens, fieldTokens{ byFieldTokens = append(byFieldTokens, fieldTokens{
field: fieldName, field: fieldName,
tokens: tokens, tokens: tokens,
tokensHashes: appendTokensHashes(nil, tokens),
}) })
} }

View file

@ -24,11 +24,9 @@ type filterAnyCasePhrase struct {
phraseUppercaseOnce sync.Once phraseUppercaseOnce sync.Once
phraseUppercase string phraseUppercase string
tokensOnce sync.Once tokensOnce sync.Once
tokens []string tokensHashes []uint64
tokensHashesUppercase []uint64
tokensUppercaseOnce sync.Once
tokensUppercase []string
} }
func (fp *filterAnyCasePhrase) String() string { func (fp *filterAnyCasePhrase) String() string {
@ -39,27 +37,25 @@ func (fp *filterAnyCasePhrase) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName) neededFields.add(fp.fieldName)
} }
func (fp *filterAnyCasePhrase) getTokens() []string { func (fp *filterAnyCasePhrase) getTokensHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens) fp.tokensOnce.Do(fp.initTokens)
return fp.tokens return fp.tokensHashes
}
func (fp *filterAnyCasePhrase) getTokensHashesUppercase() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokensHashesUppercase
} }
func (fp *filterAnyCasePhrase) initTokens() { func (fp *filterAnyCasePhrase) initTokens() {
fp.tokens = tokenizeStrings(nil, []string{fp.phrase}) tokens := tokenizeStrings(nil, []string{fp.phrase})
} fp.tokensHashes = appendTokensHashes(nil, tokens)
func (fp *filterAnyCasePhrase) getTokensUppercase() []string {
fp.tokensUppercaseOnce.Do(fp.initTokensUppercase)
return fp.tokensUppercase
}
func (fp *filterAnyCasePhrase) initTokensUppercase() {
tokens := fp.getTokens()
tokensUppercase := make([]string, len(tokens)) tokensUppercase := make([]string, len(tokens))
for i, token := range tokens { for i, token := range tokens {
tokensUppercase[i] = strings.ToUpper(token) tokensUppercase[i] = strings.ToUpper(token)
} }
fp.tokensUppercase = tokensUppercase fp.tokensHashesUppercase = appendTokensHashes(nil, tokensUppercase)
} }
func (fp *filterAnyCasePhrase) getPhraseLowercase() string { func (fp *filterAnyCasePhrase) getPhraseLowercase() string {
@ -109,7 +105,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
tokens := fp.getTokens() tokens := fp.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -130,7 +126,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchIPv4ByPhrase(bs, ch, bm, phraseLowercase, tokens) matchIPv4ByPhrase(bs, ch, bm, phraseLowercase, tokens)
case valueTypeTimestampISO8601: case valueTypeTimestampISO8601:
phraseUppercase := fp.getPhraseUppercase() phraseUppercase := fp.getPhraseUppercase()
tokensUppercase := fp.getTokensUppercase() tokensUppercase := fp.getTokensHashesUppercase()
matchTimestampISO8601ByPhrase(bs, ch, bm, phraseUppercase, tokensUppercase) matchTimestampISO8601ByPhrase(bs, ch, bm, phraseUppercase, tokensUppercase)
default: default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)

View file

@ -25,11 +25,9 @@ type filterAnyCasePrefix struct {
prefixUppercaseOnce sync.Once prefixUppercaseOnce sync.Once
prefixUppercase string prefixUppercase string
tokensOnce sync.Once tokensOnce sync.Once
tokens []string tokensHashes []uint64
tokensUppercaseHashes []uint64
tokensUppercaseOnce sync.Once
tokensUppercase []string
} }
func (fp *filterAnyCasePrefix) String() string { func (fp *filterAnyCasePrefix) String() string {
@ -43,27 +41,25 @@ func (fp *filterAnyCasePrefix) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName) neededFields.add(fp.fieldName)
} }
func (fp *filterAnyCasePrefix) getTokens() []string { func (fp *filterAnyCasePrefix) getTokensHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens) fp.tokensOnce.Do(fp.initTokens)
return fp.tokens return fp.tokensHashes
}
func (fp *filterAnyCasePrefix) getTokensUppercaseHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokensUppercaseHashes
} }
func (fp *filterAnyCasePrefix) initTokens() { func (fp *filterAnyCasePrefix) initTokens() {
fp.tokens = getTokensSkipLast(fp.prefix) tokens := getTokensSkipLast(fp.prefix)
} fp.tokensHashes = appendTokensHashes(nil, tokens)
func (fp *filterAnyCasePrefix) getTokensUppercase() []string {
fp.tokensUppercaseOnce.Do(fp.initTokensUppercase)
return fp.tokensUppercase
}
func (fp *filterAnyCasePrefix) initTokensUppercase() {
tokens := fp.getTokens()
tokensUppercase := make([]string, len(tokens)) tokensUppercase := make([]string, len(tokens))
for i, token := range tokens { for i, token := range tokens {
tokensUppercase[i] = strings.ToUpper(token) tokensUppercase[i] = strings.ToUpper(token)
} }
fp.tokensUppercase = tokensUppercase fp.tokensUppercaseHashes = appendTokensHashes(nil, tokensUppercase)
} }
func (fp *filterAnyCasePrefix) getPrefixLowercase() string { func (fp *filterAnyCasePrefix) getPrefixLowercase() string {
@ -110,7 +106,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
tokens := fp.getTokens() tokens := fp.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -131,7 +127,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchIPv4ByPrefix(bs, ch, bm, prefixLowercase, tokens) matchIPv4ByPrefix(bs, ch, bm, prefixLowercase, tokens)
case valueTypeTimestampISO8601: case valueTypeTimestampISO8601:
prefixUppercase := fp.getPrefixUppercase() prefixUppercase := fp.getPrefixUppercase()
tokensUppercase := fp.getTokensUppercase() tokensUppercase := fp.getTokensUppercaseHashes()
matchTimestampISO8601ByPrefix(bs, ch, bm, prefixUppercase, tokensUppercase) matchTimestampISO8601ByPrefix(bs, ch, bm, prefixUppercase, tokensUppercase)
default: default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)

View file

@ -16,8 +16,9 @@ type filterExact struct {
fieldName string fieldName string
value string value string
tokensOnce sync.Once tokensOnce sync.Once
tokens []string tokens []string
tokensHashes []uint64
} }
func (fe *filterExact) String() string { func (fe *filterExact) String() string {
@ -33,8 +34,14 @@ func (fe *filterExact) getTokens() []string {
return fe.tokens return fe.tokens
} }
func (fe *filterExact) getTokensHashes() []uint64 {
fe.tokensOnce.Do(fe.initTokens)
return fe.tokensHashes
}
func (fe *filterExact) initTokens() { func (fe *filterExact) initTokens() {
fe.tokens = tokenizeStrings(nil, []string{fe.value}) fe.tokens = tokenizeStrings(nil, []string{fe.value})
fe.tokensHashes = appendTokensHashes(nil, fe.tokens)
} }
func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) { func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
@ -186,7 +193,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
tokens := fe.getTokens() tokens := fe.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -212,7 +219,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
n, ok := tryParseTimestampISO8601(value) n, ok := tryParseTimestampISO8601(value)
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) { if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
bm.resetBits() bm.resetBits()
@ -224,7 +231,7 @@ func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bi
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
n, ok := tryParseIPv4(value) n, ok := tryParseIPv4(value)
if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue { if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue {
bm.resetBits() bm.resetBits()
@ -236,7 +243,7 @@ func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
f, ok := tryParseFloat64(value) f, ok := tryParseFloat64(value)
if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) { if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) {
bm.resetBits() bm.resetBits()
@ -262,7 +269,7 @@ func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap,
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -272,7 +279,7 @@ func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, valu
}) })
} }
func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseUint64(phrase) n, ok := tryParseUint64(phrase)
if !ok || n < ch.minValue || n > ch.maxValue { if !ok || n < ch.minValue || n > ch.maxValue {
bm.resetBits() bm.resetBits()
@ -284,7 +291,7 @@ func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseUint64(phrase) n, ok := tryParseUint64(phrase)
if !ok || n < ch.minValue || n > ch.maxValue { if !ok || n < ch.minValue || n > ch.maxValue {
bm.resetBits() bm.resetBits()
@ -296,7 +303,7 @@ func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseUint64(phrase) n, ok := tryParseUint64(phrase)
if !ok || n < ch.minValue || n > ch.maxValue { if !ok || n < ch.minValue || n > ch.maxValue {
bm.resetBits() bm.resetBits()
@ -308,7 +315,7 @@ func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseUint64(phrase) n, ok := tryParseUint64(phrase)
if !ok || n < ch.minValue || n > ch.maxValue { if !ok || n < ch.minValue || n > ch.maxValue {
bm.resetBits() bm.resetBits()
@ -320,7 +327,7 @@ func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []string) { func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return

View file

@ -15,8 +15,9 @@ type filterExactPrefix struct {
fieldName string fieldName string
prefix string prefix string
tokensOnce sync.Once tokensOnce sync.Once
tokens []string tokens []string
tokensHashes []uint64
} }
func (fep *filterExactPrefix) String() string { func (fep *filterExactPrefix) String() string {
@ -32,8 +33,14 @@ func (fep *filterExactPrefix) getTokens() []string {
return fep.tokens return fep.tokens
} }
func (fep *filterExactPrefix) getTokensHashes() []uint64 {
fep.tokensOnce.Do(fep.initTokens)
return fep.tokensHashes
}
func (fep *filterExactPrefix) initTokens() { func (fep *filterExactPrefix) initTokens() {
fep.tokens = getTokensSkipLast(fep.prefix) fep.tokens = getTokensSkipLast(fep.prefix)
fep.tokensHashes = appendTokensHashes(nil, fep.tokens)
} }
func (fep *filterExactPrefix) applyToBlockResult(br *blockResult, bm *bitmap) { func (fep *filterExactPrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
@ -62,7 +69,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
tokens := fep.getTokens() tokens := fep.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -88,7 +95,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" { if prefix == "" {
return return
} }
@ -105,11 +112,11 @@ func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *b
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" { if prefix == "" {
return return
} }
if prefix < "0" || prefix > "9" || len(tokens) > 3 || !matchBloomFilterAllTokens(bs, ch, tokens) { if prefix < "0" || prefix > "9" || len(tokens) > 3*bloomFilterHashesCount || !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
} }
@ -122,12 +129,12 @@ func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefi
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" { if prefix == "" {
// An empty prefix matches all the values // An empty prefix matches all the values
return return
} }
if len(tokens) > 2 || !matchBloomFilterAllTokens(bs, ch, tokens) { if len(tokens) > 2*bloomFilterHashesCount || !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
} }
@ -153,7 +160,7 @@ func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap,
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -163,7 +170,7 @@ func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
}) })
} }
func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return return
} }
@ -176,7 +183,7 @@ func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return return
} }
@ -189,7 +196,7 @@ func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return return
} }
@ -202,7 +209,7 @@ func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return return
} }
@ -215,7 +222,7 @@ func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []string) bool { func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) bool {
if prefix == "" { if prefix == "" {
// An empty prefix matches all the values // An empty prefix matches all the values
return false return false

View file

@ -28,9 +28,9 @@ type filterIn struct {
// qFieldName must be set to field name for obtaining values from if q is non-nil. // qFieldName must be set to field name for obtaining values from if q is non-nil.
qFieldName string qFieldName string
tokensOnce sync.Once tokensOnce sync.Once
commonTokens []string commonTokensHashes []uint64
tokenSets [][]string tokenSetsHashes [][]uint64
stringValuesOnce sync.Once stringValuesOnce sync.Once
stringValues map[string]struct{} stringValues map[string]struct{}
@ -76,16 +76,21 @@ func (fi *filterIn) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fi.fieldName) neededFields.add(fi.fieldName)
} }
func (fi *filterIn) getTokens() ([]string, [][]string) { func (fi *filterIn) getTokensHashes() ([]uint64, [][]uint64) {
fi.tokensOnce.Do(fi.initTokens) fi.tokensOnce.Do(fi.initTokens)
return fi.commonTokens, fi.tokenSets return fi.commonTokensHashes, fi.tokenSetsHashes
} }
func (fi *filterIn) initTokens() { func (fi *filterIn) initTokens() {
commonTokens, tokenSets := getCommonTokensAndTokenSets(fi.values) commonTokens, tokenSets := getCommonTokensAndTokenSets(fi.values)
fi.commonTokens = commonTokens fi.commonTokensHashes = appendTokensHashes(nil, commonTokens)
fi.tokenSets = tokenSets
tokenSetsHashes := make([][]uint64, len(tokenSets))
for i, tokens := range tokenSets {
tokenSetsHashes[i] = appendTokensHashes(nil, tokens)
}
fi.tokenSetsHashes = tokenSetsHashes
} }
func (fi *filterIn) getStringValues() map[string]struct{} { func (fi *filterIn) getStringValues() map[string]struct{} {
@ -374,7 +379,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
commonTokens, tokenSets := fi.getTokens() commonTokens, tokenSets := fi.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -409,7 +414,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []string, tokenSets [][]string) { func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []uint64, tokenSets [][]uint64) {
if len(values) == 0 { if len(values) == 0 {
bm.resetBits() bm.resetBits()
return return
@ -424,7 +429,7 @@ func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[str
}) })
} }
func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []string, tokenSets [][]string) bool { func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []uint64, tokenSets [][]uint64) bool {
if len(commonTokens) > 0 { if len(commonTokens) > 0 {
if !matchBloomFilterAllTokens(bs, ch, commonTokens) { if !matchBloomFilterAllTokens(bs, ch, commonTokens) {
return false return false
@ -511,6 +516,9 @@ func getCommonTokens(tokenSets [][]string) []string {
} }
} }
} }
if len(m) == 0 {
return nil
}
tokens := make([]string, 0, len(m)) tokens := make([]string, 0, len(m))
for token := range m { for token := range m {

View file

@ -89,9 +89,9 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool {
return true return true
} }
for _, fieldTokens := range byFieldTokens { for _, ft := range byFieldTokens {
fieldName := fieldTokens.field fieldName := ft.field
tokens := fieldTokens.tokens tokens := ft.tokens
v := bs.csh.getConstColumnValue(fieldName) v := bs.csh.getConstColumnValue(fieldName)
if v != "" { if v != "" {
@ -112,7 +112,7 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool {
} }
continue continue
} }
if matchBloomFilterAllTokens(bs, ch, tokens) { if matchBloomFilterAllTokens(bs, ch, ft.tokensHashes) {
return true return true
} }
} }
@ -190,8 +190,9 @@ func (fo *filterOr) initByFieldTokens() {
break break
} }
byFieldTokens = append(byFieldTokens, fieldTokens{ byFieldTokens = append(byFieldTokens, fieldTokens{
field: fieldName, field: fieldName,
tokens: commonTokens, tokens: commonTokens,
tokensHashes: appendTokensHashes(nil, commonTokens),
}) })
} }

View file

@ -24,8 +24,9 @@ type filterPhrase struct {
fieldName string fieldName string
phrase string phrase string
tokensOnce sync.Once tokensOnce sync.Once
tokens []string tokens []string
tokensHashes []uint64
} }
func (fp *filterPhrase) String() string { func (fp *filterPhrase) String() string {
@ -41,8 +42,14 @@ func (fp *filterPhrase) getTokens() []string {
return fp.tokens return fp.tokens
} }
func (fp *filterPhrase) getTokensHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokensHashes
}
func (fp *filterPhrase) initTokens() { func (fp *filterPhrase) initTokens() {
fp.tokens = tokenizeStrings(nil, []string{fp.phrase}) fp.tokens = tokenizeStrings(nil, []string{fp.phrase})
fp.tokensHashes = appendTokensHashes(nil, fp.tokens)
} }
func (fp *filterPhrase) applyToBlockResult(br *blockResult, bm *bitmap) { func (fp *filterPhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
@ -73,7 +80,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
tokens := fp.getTokens() tokens := fp.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -99,7 +106,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
_, ok := tryParseTimestampISO8601(phrase) _, ok := tryParseTimestampISO8601(phrase)
if ok { if ok {
// Fast path - the phrase contains complete timestamp, so we can use exact search // Fast path - the phrase contains complete timestamp, so we can use exact search
@ -121,7 +128,7 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
_, ok := tryParseIPv4(phrase) _, ok := tryParseIPv4(phrase)
if ok { if ok {
// Fast path - phrase contains the full IP address, so we can use exact matching // Fast path - phrase contains the full IP address, so we can use exact matching
@ -145,7 +152,7 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
// The phrase may contain a part of the floating-point number. // The phrase may contain a part of the floating-point number.
// For example, `foo:"123"` must match `123`, `123.456` and `-0.123`. // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`.
// This means we cannot search in binary representation of floating-point numbers. // This means we cannot search in binary representation of floating-point numbers.
@ -187,7 +194,7 @@ func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -288,7 +295,7 @@ func visitValues(bs *blockSearch, ch *columnHeader, bm *bitmap, f func(value str
}) })
} }
func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool { func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []uint64) bool {
if len(tokens) == 0 { if len(tokens) == 0 {
return true return true
} }

View file

@ -19,8 +19,9 @@ type filterPrefix struct {
fieldName string fieldName string
prefix string prefix string
tokensOnce sync.Once tokensOnce sync.Once
tokens []string tokens []string
tokensHashes []uint64
} }
func (fp *filterPrefix) String() string { func (fp *filterPrefix) String() string {
@ -39,8 +40,14 @@ func (fp *filterPrefix) getTokens() []string {
return fp.tokens return fp.tokens
} }
func (fp *filterPrefix) getTokensHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokensHashes
}
func (fp *filterPrefix) initTokens() { func (fp *filterPrefix) initTokens() {
fp.tokens = getTokensSkipLast(fp.prefix) fp.tokens = getTokensSkipLast(fp.prefix)
fp.tokensHashes = appendTokensHashes(nil, fp.tokens)
} }
func (fp *filterPrefix) applyToBlockResult(bs *blockResult, bm *bitmap) { func (fp *filterPrefix) applyToBlockResult(bs *blockResult, bm *bitmap) {
@ -68,7 +75,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
tokens := fp.getTokens() tokens := fp.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -94,7 +101,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" { if prefix == "" {
// Fast path - all the timestamp values match an empty prefix aka `*` // Fast path - all the timestamp values match an empty prefix aka `*`
return return
@ -115,7 +122,7 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" { if prefix == "" {
// Fast path - all the ipv4 values match an empty prefix aka `*` // Fast path - all the ipv4 values match an empty prefix aka `*`
return return
@ -136,7 +143,7 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" { if prefix == "" {
// Fast path - all the float64 values match an empty prefix aka `*` // Fast path - all the float64 values match an empty prefix aka `*`
return return
@ -177,7 +184,7 @@ func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return

View file

@ -16,8 +16,9 @@ type filterRegexp struct {
fieldName string fieldName string
re *regexutil.Regex re *regexutil.Regex
tokens []string tokensOnce sync.Once
tokensOnce sync.Once tokens []string
tokensHashes []uint64
} }
func (fr *filterRegexp) String() string { func (fr *filterRegexp) String() string {
@ -33,12 +34,18 @@ func (fr *filterRegexp) getTokens() []string {
return fr.tokens return fr.tokens
} }
func (fr *filterRegexp) getTokensHashes() []uint64 {
fr.tokensOnce.Do(fr.initTokens)
return fr.tokensHashes
}
func (fr *filterRegexp) initTokens() { func (fr *filterRegexp) initTokens() {
literals := fr.re.GetLiterals() literals := fr.re.GetLiterals()
for i, literal := range literals { for i, literal := range literals {
literals[i] = skipFirstLastToken(literal) literals[i] = skipFirstLastToken(literal)
} }
fr.tokens = tokenizeStrings(nil, literals) fr.tokens = tokenizeStrings(nil, literals)
fr.tokensHashes = appendTokensHashes(nil, fr.tokens)
} }
func skipFirstLastToken(s string) string { func skipFirstLastToken(s string) string {
@ -89,7 +96,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
tokens := fr.getTokens() tokens := fr.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -115,7 +122,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -128,7 +135,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -141,7 +148,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexu
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -167,7 +174,7 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -177,7 +184,7 @@ func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
}) })
} }
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -190,7 +197,7 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -203,7 +210,7 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -216,7 +223,7 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return

View file

@ -15,8 +15,9 @@ type filterSequence struct {
fieldName string fieldName string
phrases []string phrases []string
tokensOnce sync.Once tokensOnce sync.Once
tokens []string tokens []string
tokensHashes []uint64
nonEmptyPhrasesOnce sync.Once nonEmptyPhrasesOnce sync.Once
nonEmptyPhrases []string nonEmptyPhrases []string
@ -40,10 +41,15 @@ func (fs *filterSequence) getTokens() []string {
return fs.tokens return fs.tokens
} }
func (fs *filterSequence) getTokensHashes() []uint64 {
fs.tokensOnce.Do(fs.initTokens)
return fs.tokensHashes
}
func (fs *filterSequence) initTokens() { func (fs *filterSequence) initTokens() {
phrases := fs.getNonEmptyPhrases() phrases := fs.getNonEmptyPhrases()
tokens := tokenizeStrings(nil, phrases) fs.tokens = tokenizeStrings(nil, phrases)
fs.tokens = tokens fs.tokensHashes = appendTokensHashes(nil, fs.tokens)
} }
func (fs *filterSequence) getNonEmptyPhrases() []string { func (fs *filterSequence) getNonEmptyPhrases() []string {
@ -100,7 +106,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return return
} }
tokens := fs.getTokens() tokens := fs.getTokensHashes()
switch ch.valueType { switch ch.valueType {
case valueTypeString: case valueTypeString:
@ -126,7 +132,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
} }
func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) == 1 { if len(phrases) == 1 {
matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens) matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens)
return return
@ -145,7 +151,7 @@ func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitm
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) == 1 { if len(phrases) == 1 {
matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens) matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens)
return return
@ -166,7 +172,7 @@ func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases,
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -197,7 +203,7 @@ func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, ph
bbPool.Put(bb) bbPool.Put(bb)
} }
func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []string) { func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) { if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits() bm.resetBits()
return return
@ -207,7 +213,7 @@ func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
}) })
} }
func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 { if len(phrases) > 1 {
bm.resetBits() bm.resetBits()
return return
@ -215,7 +221,7 @@ func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases
matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens) matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens)
} }
func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 { if len(phrases) > 1 {
bm.resetBits() bm.resetBits()
return return
@ -223,7 +229,7 @@ func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens) matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens)
} }
func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 { if len(phrases) > 1 {
bm.resetBits() bm.resetBits()
return return
@ -231,7 +237,7 @@ func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens) matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens)
} }
func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 { if len(phrases) > 1 {
bm.resetBits() bm.resetBits()
return return