lib/logstorage: pre-calculate hashes from tokens used in bloom filter search

Previously per-token hashes for per-block bloom filters were re-calculated on every scanned block.
This could be slow when the number of tokens is big or when the number of blocks to scan is big.
Pre-calculate hashes for bloom filters and then use them for searching in bloom filters.
This improves performance by 2.5x for in(...) filters with many values to search inside `in()`.
This commit is contained in:
Aliaksandr Valialkin 2024-09-05 19:40:50 +02:00
parent 6fe0a2700e
commit 7dcce1ca02
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
14 changed files with 225 additions and 163 deletions

View file

@ -15,6 +15,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
## tip
* FEATURE: optimize [multi-exact queries](https://docs.victoriametrics.com/victorialogs/logsql/#multi-exact-filter) with many phrases to search. For example, `ip:in(path:="/foo/bar" | keep ip)` when there are many unique values for `ip` field among log entries with `/foo/bar` path.
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add support for displaying the top 5 log streams in the hits graph. The remaining log streams are grouped into an "other" label. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6545).
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add the ability to customize the graph display with options for bar, line, stepped line, and points.
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add fields for setting AccountID and ProjectID. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6631).

View file

@ -72,49 +72,64 @@ func (bf *bloomFilter) mustInit(tokens []string) {
// bloomFilterAdd adds the given tokens to the bloom filter bits
func bloomFilterAdd(bits []uint64, tokens []string) {
hashesCount := len(tokens) * bloomFilterHashesCount
a := encoding.GetUint64s(hashesCount)
a.A = appendTokensHashes(a.A[:0], tokens)
maxBits := uint64(len(bits)) * 64
for _, h := range a.A {
idx := h % maxBits
i := idx / 64
j := idx % 64
mask := uint64(1) << j
w := bits[i]
if (w & mask) == 0 {
bits[i] = w | mask
}
}
encoding.PutUint64s(a)
}
// appendTokensHashes appends hashes for the given tokens to dst and returns the result.
//
// the appended hashes can be then passed to bloomFilter.containsAll().
func appendTokensHashes(dst []uint64, tokens []string) []uint64 {
dstLen := len(dst)
hashesCount := len(tokens) * bloomFilterHashesCount
dst = slicesutil.SetLength(dst, dstLen+hashesCount)
dst = dst[:dstLen]
var buf [8]byte
hp := (*uint64)(unsafe.Pointer(&buf[0]))
for _, token := range tokens {
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
for i := 0; i < bloomFilterHashesCount; i++ {
hi := xxhash.Sum64(buf[:])
h := xxhash.Sum64(buf[:])
(*hp)++
idx := hi % maxBits
i := idx / 64
j := idx % 64
mask := uint64(1) << j
w := bits[i]
if (w & mask) == 0 {
bits[i] = w | mask
}
dst = append(dst, h)
}
}
return dst
}
// containsAll returns true if bf contains all the given tokens.
func (bf *bloomFilter) containsAll(tokens []string) bool {
// containsAll returns true if bf contains all the given tokens hashes generated by appendTokensHashes.
func (bf *bloomFilter) containsAll(hashes []uint64) bool {
bits := bf.bits
if len(bits) == 0 {
return true
}
maxBits := uint64(len(bits)) * 64
var buf [8]byte
hp := (*uint64)(unsafe.Pointer(&buf[0]))
for _, token := range tokens {
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
for i := 0; i < bloomFilterHashesCount; i++ {
hi := xxhash.Sum64(buf[:])
(*hp)++
idx := hi % maxBits
i := idx / 64
j := idx % 64
mask := uint64(1) << j
w := bits[i]
if (w & mask) == 0 {
// The token is missing
return false
}
for _, h := range hashes {
idx := h % maxBits
i := idx / 64
j := idx % 64
mask := uint64(1) << j
w := bits[i]
if (w & mask) == 0 {
// The token is missing
return false
}
}
return true

View file

@ -14,8 +14,9 @@ func TestBloomFilter(t *testing.T) {
if err := bf.unmarshal(data); err != nil {
t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err)
}
if !bf.containsAll(tokens) {
t.Fatalf("bloomFilterContains must return true for the added tokens")
tokensHashes := appendTokensHashes(nil, tokens)
if !bf.containsAll(tokensHashes) {
t.Fatalf("containsAll must return true for the added tokens")
}
}
f(nil)
@ -67,7 +68,8 @@ func TestBloomFilterFalsePositive(t *testing.T) {
falsePositives := 0
for i := range tokens {
token := fmt.Sprintf("non-existing-token_%d", i)
if bf.containsAll([]string{token}) {
tokensHashes := appendTokensHashes(nil, []string{token})
if bf.containsAll(tokensHashes) {
falsePositives++
}
}

View file

@ -18,8 +18,9 @@ type filterAnd struct {
}
type fieldTokens struct {
field string
tokens []string
field string
tokens []string
tokensHashes []uint64
}
func (fa *filterAnd) String() string {
@ -76,16 +77,16 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool {
return true
}
for _, fieldTokens := range byFieldTokens {
fieldName := fieldTokens.field
tokens := fieldTokens.tokens
for _, ft := range byFieldTokens {
fieldName := ft.field
tokens := ft.tokens
v := bs.csh.getConstColumnValue(fieldName)
if v != "" {
if !matchStringByAllTokens(v, tokens) {
return false
if matchStringByAllTokens(v, tokens) {
continue
}
continue
return false
}
ch := bs.csh.getColumnHeader(fieldName)
@ -94,12 +95,12 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool {
}
if ch.valueType == valueTypeDict {
if !matchDictValuesByAllTokens(ch.valuesDict.values, tokens) {
return false
if matchDictValuesByAllTokens(ch.valuesDict.values, tokens) {
continue
}
continue
return false
}
if !matchBloomFilterAllTokens(bs, ch, tokens) {
if !matchBloomFilterAllTokens(bs, ch, ft.tokensHashes) {
return false
}
}
@ -170,8 +171,9 @@ func (fa *filterAnd) initByFieldTokens() {
}
byFieldTokens = append(byFieldTokens, fieldTokens{
field: fieldName,
tokens: tokens,
field: fieldName,
tokens: tokens,
tokensHashes: appendTokensHashes(nil, tokens),
})
}

View file

@ -24,11 +24,9 @@ type filterAnyCasePhrase struct {
phraseUppercaseOnce sync.Once
phraseUppercase string
tokensOnce sync.Once
tokens []string
tokensUppercaseOnce sync.Once
tokensUppercase []string
tokensOnce sync.Once
tokensHashes []uint64
tokensHashesUppercase []uint64
}
func (fp *filterAnyCasePhrase) String() string {
@ -39,27 +37,25 @@ func (fp *filterAnyCasePhrase) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterAnyCasePhrase) getTokens() []string {
func (fp *filterAnyCasePhrase) getTokensHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokens
return fp.tokensHashes
}
func (fp *filterAnyCasePhrase) getTokensHashesUppercase() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokensHashesUppercase
}
func (fp *filterAnyCasePhrase) initTokens() {
fp.tokens = tokenizeStrings(nil, []string{fp.phrase})
}
tokens := tokenizeStrings(nil, []string{fp.phrase})
fp.tokensHashes = appendTokensHashes(nil, tokens)
func (fp *filterAnyCasePhrase) getTokensUppercase() []string {
fp.tokensUppercaseOnce.Do(fp.initTokensUppercase)
return fp.tokensUppercase
}
func (fp *filterAnyCasePhrase) initTokensUppercase() {
tokens := fp.getTokens()
tokensUppercase := make([]string, len(tokens))
for i, token := range tokens {
tokensUppercase[i] = strings.ToUpper(token)
}
fp.tokensUppercase = tokensUppercase
fp.tokensHashesUppercase = appendTokensHashes(nil, tokensUppercase)
}
func (fp *filterAnyCasePhrase) getPhraseLowercase() string {
@ -109,7 +105,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fp.getTokens()
tokens := fp.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -130,7 +126,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchIPv4ByPhrase(bs, ch, bm, phraseLowercase, tokens)
case valueTypeTimestampISO8601:
phraseUppercase := fp.getPhraseUppercase()
tokensUppercase := fp.getTokensUppercase()
tokensUppercase := fp.getTokensHashesUppercase()
matchTimestampISO8601ByPhrase(bs, ch, bm, phraseUppercase, tokensUppercase)
default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)

View file

@ -25,11 +25,9 @@ type filterAnyCasePrefix struct {
prefixUppercaseOnce sync.Once
prefixUppercase string
tokensOnce sync.Once
tokens []string
tokensUppercaseOnce sync.Once
tokensUppercase []string
tokensOnce sync.Once
tokensHashes []uint64
tokensUppercaseHashes []uint64
}
func (fp *filterAnyCasePrefix) String() string {
@ -43,27 +41,25 @@ func (fp *filterAnyCasePrefix) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fp.fieldName)
}
func (fp *filterAnyCasePrefix) getTokens() []string {
func (fp *filterAnyCasePrefix) getTokensHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokens
return fp.tokensHashes
}
func (fp *filterAnyCasePrefix) getTokensUppercaseHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokensUppercaseHashes
}
func (fp *filterAnyCasePrefix) initTokens() {
fp.tokens = getTokensSkipLast(fp.prefix)
}
tokens := getTokensSkipLast(fp.prefix)
fp.tokensHashes = appendTokensHashes(nil, tokens)
func (fp *filterAnyCasePrefix) getTokensUppercase() []string {
fp.tokensUppercaseOnce.Do(fp.initTokensUppercase)
return fp.tokensUppercase
}
func (fp *filterAnyCasePrefix) initTokensUppercase() {
tokens := fp.getTokens()
tokensUppercase := make([]string, len(tokens))
for i, token := range tokens {
tokensUppercase[i] = strings.ToUpper(token)
}
fp.tokensUppercase = tokensUppercase
fp.tokensUppercaseHashes = appendTokensHashes(nil, tokensUppercase)
}
func (fp *filterAnyCasePrefix) getPrefixLowercase() string {
@ -110,7 +106,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fp.getTokens()
tokens := fp.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -131,7 +127,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
matchIPv4ByPrefix(bs, ch, bm, prefixLowercase, tokens)
case valueTypeTimestampISO8601:
prefixUppercase := fp.getPrefixUppercase()
tokensUppercase := fp.getTokensUppercase()
tokensUppercase := fp.getTokensUppercaseHashes()
matchTimestampISO8601ByPrefix(bs, ch, bm, prefixUppercase, tokensUppercase)
default:
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)

View file

@ -16,8 +16,9 @@ type filterExact struct {
fieldName string
value string
tokensOnce sync.Once
tokens []string
tokensOnce sync.Once
tokens []string
tokensHashes []uint64
}
func (fe *filterExact) String() string {
@ -33,8 +34,14 @@ func (fe *filterExact) getTokens() []string {
return fe.tokens
}
func (fe *filterExact) getTokensHashes() []uint64 {
fe.tokensOnce.Do(fe.initTokens)
return fe.tokensHashes
}
func (fe *filterExact) initTokens() {
fe.tokens = tokenizeStrings(nil, []string{fe.value})
fe.tokensHashes = appendTokensHashes(nil, fe.tokens)
}
func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
@ -186,7 +193,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fe.getTokens()
tokens := fe.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -212,7 +219,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
}
func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
n, ok := tryParseTimestampISO8601(value)
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
bm.resetBits()
@ -224,7 +231,7 @@ func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bi
bbPool.Put(bb)
}
func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
n, ok := tryParseIPv4(value)
if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue {
bm.resetBits()
@ -236,7 +243,7 @@ func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value
bbPool.Put(bb)
}
func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
f, ok := tryParseFloat64(value)
if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) {
bm.resetBits()
@ -262,7 +269,7 @@ func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap,
bbPool.Put(bb)
}
func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -272,7 +279,7 @@ func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, valu
})
}
func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseUint64(phrase)
if !ok || n < ch.minValue || n > ch.maxValue {
bm.resetBits()
@ -284,7 +291,7 @@ func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
bbPool.Put(bb)
}
func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseUint64(phrase)
if !ok || n < ch.minValue || n > ch.maxValue {
bm.resetBits()
@ -296,7 +303,7 @@ func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb)
}
func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseUint64(phrase)
if !ok || n < ch.minValue || n > ch.maxValue {
bm.resetBits()
@ -308,7 +315,7 @@ func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb)
}
func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
n, ok := tryParseUint64(phrase)
if !ok || n < ch.minValue || n > ch.maxValue {
bm.resetBits()
@ -320,7 +327,7 @@ func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb)
}
func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []string) {
func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return

View file

@ -15,8 +15,9 @@ type filterExactPrefix struct {
fieldName string
prefix string
tokensOnce sync.Once
tokens []string
tokensOnce sync.Once
tokens []string
tokensHashes []uint64
}
func (fep *filterExactPrefix) String() string {
@ -32,8 +33,14 @@ func (fep *filterExactPrefix) getTokens() []string {
return fep.tokens
}
func (fep *filterExactPrefix) getTokensHashes() []uint64 {
fep.tokensOnce.Do(fep.initTokens)
return fep.tokensHashes
}
func (fep *filterExactPrefix) initTokens() {
fep.tokens = getTokensSkipLast(fep.prefix)
fep.tokensHashes = appendTokensHashes(nil, fep.tokens)
}
func (fep *filterExactPrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
@ -62,7 +69,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fep.getTokens()
tokens := fep.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -88,7 +95,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
}
func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" {
return
}
@ -105,11 +112,11 @@ func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *b
bbPool.Put(bb)
}
func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" {
return
}
if prefix < "0" || prefix > "9" || len(tokens) > 3 || !matchBloomFilterAllTokens(bs, ch, tokens) {
if prefix < "0" || prefix > "9" || len(tokens) > 3*bloomFilterHashesCount || !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
@ -122,12 +129,12 @@ func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefi
bbPool.Put(bb)
}
func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" {
// An empty prefix matches all the values
return
}
if len(tokens) > 2 || !matchBloomFilterAllTokens(bs, ch, tokens) {
if len(tokens) > 2*bloomFilterHashesCount || !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
}
@ -153,7 +160,7 @@ func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap,
bbPool.Put(bb)
}
func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -163,7 +170,7 @@ func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
})
}
func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return
}
@ -176,7 +183,7 @@ func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref
bbPool.Put(bb)
}
func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return
}
@ -189,7 +196,7 @@ func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
bbPool.Put(bb)
}
func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return
}
@ -202,7 +209,7 @@ func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
bbPool.Put(bb)
}
func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
return
}
@ -215,7 +222,7 @@ func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
bbPool.Put(bb)
}
func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []string) bool {
func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) bool {
if prefix == "" {
// An empty prefix matches all the values
return false

View file

@ -28,9 +28,9 @@ type filterIn struct {
// qFieldName must be set to field name for obtaining values from if q is non-nil.
qFieldName string
tokensOnce sync.Once
commonTokens []string
tokenSets [][]string
tokensOnce sync.Once
commonTokensHashes []uint64
tokenSetsHashes [][]uint64
stringValuesOnce sync.Once
stringValues map[string]struct{}
@ -76,16 +76,21 @@ func (fi *filterIn) updateNeededFields(neededFields fieldsSet) {
neededFields.add(fi.fieldName)
}
func (fi *filterIn) getTokens() ([]string, [][]string) {
func (fi *filterIn) getTokensHashes() ([]uint64, [][]uint64) {
fi.tokensOnce.Do(fi.initTokens)
return fi.commonTokens, fi.tokenSets
return fi.commonTokensHashes, fi.tokenSetsHashes
}
func (fi *filterIn) initTokens() {
commonTokens, tokenSets := getCommonTokensAndTokenSets(fi.values)
fi.commonTokens = commonTokens
fi.tokenSets = tokenSets
fi.commonTokensHashes = appendTokensHashes(nil, commonTokens)
tokenSetsHashes := make([][]uint64, len(tokenSets))
for i, tokens := range tokenSets {
tokenSetsHashes[i] = appendTokensHashes(nil, tokens)
}
fi.tokenSetsHashes = tokenSetsHashes
}
func (fi *filterIn) getStringValues() map[string]struct{} {
@ -374,7 +379,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
commonTokens, tokenSets := fi.getTokens()
commonTokens, tokenSets := fi.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -409,7 +414,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
}
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []string, tokenSets [][]string) {
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []uint64, tokenSets [][]uint64) {
if len(values) == 0 {
bm.resetBits()
return
@ -424,7 +429,7 @@ func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[str
})
}
func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []string, tokenSets [][]string) bool {
func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []uint64, tokenSets [][]uint64) bool {
if len(commonTokens) > 0 {
if !matchBloomFilterAllTokens(bs, ch, commonTokens) {
return false
@ -511,6 +516,9 @@ func getCommonTokens(tokenSets [][]string) []string {
}
}
}
if len(m) == 0 {
return nil
}
tokens := make([]string, 0, len(m))
for token := range m {

View file

@ -89,9 +89,9 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool {
return true
}
for _, fieldTokens := range byFieldTokens {
fieldName := fieldTokens.field
tokens := fieldTokens.tokens
for _, ft := range byFieldTokens {
fieldName := ft.field
tokens := ft.tokens
v := bs.csh.getConstColumnValue(fieldName)
if v != "" {
@ -112,7 +112,7 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool {
}
continue
}
if matchBloomFilterAllTokens(bs, ch, tokens) {
if matchBloomFilterAllTokens(bs, ch, ft.tokensHashes) {
return true
}
}
@ -190,8 +190,9 @@ func (fo *filterOr) initByFieldTokens() {
break
}
byFieldTokens = append(byFieldTokens, fieldTokens{
field: fieldName,
tokens: commonTokens,
field: fieldName,
tokens: commonTokens,
tokensHashes: appendTokensHashes(nil, commonTokens),
})
}

View file

@ -24,8 +24,9 @@ type filterPhrase struct {
fieldName string
phrase string
tokensOnce sync.Once
tokens []string
tokensOnce sync.Once
tokens []string
tokensHashes []uint64
}
func (fp *filterPhrase) String() string {
@ -41,8 +42,14 @@ func (fp *filterPhrase) getTokens() []string {
return fp.tokens
}
func (fp *filterPhrase) getTokensHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokensHashes
}
func (fp *filterPhrase) initTokens() {
fp.tokens = tokenizeStrings(nil, []string{fp.phrase})
fp.tokensHashes = appendTokensHashes(nil, fp.tokens)
}
func (fp *filterPhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
@ -73,7 +80,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fp.getTokens()
tokens := fp.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -99,7 +106,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
}
func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
_, ok := tryParseTimestampISO8601(phrase)
if ok {
// Fast path - the phrase contains complete timestamp, so we can use exact search
@ -121,7 +128,7 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb)
}
func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
_, ok := tryParseIPv4(phrase)
if ok {
// Fast path - phrase contains the full IP address, so we can use exact matching
@ -145,7 +152,7 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str
bbPool.Put(bb)
}
func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
// The phrase may contain a part of the floating-point number.
// For example, `foo:"123"` must match `123`, `123.456` and `-0.123`.
// This means we cannot search in binary representation of floating-point numbers.
@ -187,7 +194,7 @@ func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
bbPool.Put(bb)
}
func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -288,7 +295,7 @@ func visitValues(bs *blockSearch, ch *columnHeader, bm *bitmap, f func(value str
})
}
func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool {
func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []uint64) bool {
if len(tokens) == 0 {
return true
}

View file

@ -19,8 +19,9 @@ type filterPrefix struct {
fieldName string
prefix string
tokensOnce sync.Once
tokens []string
tokensOnce sync.Once
tokens []string
tokensHashes []uint64
}
func (fp *filterPrefix) String() string {
@ -39,8 +40,14 @@ func (fp *filterPrefix) getTokens() []string {
return fp.tokens
}
func (fp *filterPrefix) getTokensHashes() []uint64 {
fp.tokensOnce.Do(fp.initTokens)
return fp.tokensHashes
}
func (fp *filterPrefix) initTokens() {
fp.tokens = getTokensSkipLast(fp.prefix)
fp.tokensHashes = appendTokensHashes(nil, fp.tokens)
}
func (fp *filterPrefix) applyToBlockResult(bs *blockResult, bm *bitmap) {
@ -68,7 +75,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fp.getTokens()
tokens := fp.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -94,7 +101,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
}
func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" {
// Fast path - all the timestamp values match an empty prefix aka `*`
return
@ -115,7 +122,7 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb)
}
func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" {
// Fast path - all the ipv4 values match an empty prefix aka `*`
return
@ -136,7 +143,7 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str
bbPool.Put(bb)
}
func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if prefix == "" {
// Fast path - all the float64 values match an empty prefix aka `*`
return
@ -177,7 +184,7 @@ func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref
bbPool.Put(bb)
}
func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return

View file

@ -16,8 +16,9 @@ type filterRegexp struct {
fieldName string
re *regexutil.Regex
tokens []string
tokensOnce sync.Once
tokensOnce sync.Once
tokens []string
tokensHashes []uint64
}
func (fr *filterRegexp) String() string {
@ -33,12 +34,18 @@ func (fr *filterRegexp) getTokens() []string {
return fr.tokens
}
func (fr *filterRegexp) getTokensHashes() []uint64 {
fr.tokensOnce.Do(fr.initTokens)
return fr.tokensHashes
}
func (fr *filterRegexp) initTokens() {
literals := fr.re.GetLiterals()
for i, literal := range literals {
literals[i] = skipFirstLastToken(literal)
}
fr.tokens = tokenizeStrings(nil, literals)
fr.tokensHashes = appendTokensHashes(nil, fr.tokens)
}
func skipFirstLastToken(s string) string {
@ -89,7 +96,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fr.getTokens()
tokens := fr.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -115,7 +122,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
}
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -128,7 +135,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
bbPool.Put(bb)
}
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -141,7 +148,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexu
bbPool.Put(bb)
}
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -167,7 +174,7 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *
bbPool.Put(bb)
}
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -177,7 +184,7 @@ func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
})
}
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -190,7 +197,7 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex
bbPool.Put(bb)
}
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -203,7 +210,7 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb)
}
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -216,7 +223,7 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
bbPool.Put(bb)
}
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return

View file

@ -15,8 +15,9 @@ type filterSequence struct {
fieldName string
phrases []string
tokensOnce sync.Once
tokens []string
tokensOnce sync.Once
tokens []string
tokensHashes []uint64
nonEmptyPhrasesOnce sync.Once
nonEmptyPhrases []string
@ -40,10 +41,15 @@ func (fs *filterSequence) getTokens() []string {
return fs.tokens
}
func (fs *filterSequence) getTokensHashes() []uint64 {
fs.tokensOnce.Do(fs.initTokens)
return fs.tokensHashes
}
func (fs *filterSequence) initTokens() {
phrases := fs.getNonEmptyPhrases()
tokens := tokenizeStrings(nil, phrases)
fs.tokens = tokens
fs.tokens = tokenizeStrings(nil, phrases)
fs.tokensHashes = appendTokensHashes(nil, fs.tokens)
}
func (fs *filterSequence) getNonEmptyPhrases() []string {
@ -100,7 +106,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
return
}
tokens := fs.getTokens()
tokens := fs.getTokensHashes()
switch ch.valueType {
case valueTypeString:
@ -126,7 +132,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
}
func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) == 1 {
matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens)
return
@ -145,7 +151,7 @@ func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitm
bbPool.Put(bb)
}
func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) == 1 {
matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens)
return
@ -166,7 +172,7 @@ func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases,
bbPool.Put(bb)
}
func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -197,7 +203,7 @@ func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, ph
bbPool.Put(bb)
}
func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []string) {
func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if !matchBloomFilterAllTokens(bs, ch, tokens) {
bm.resetBits()
return
@ -207,7 +213,7 @@ func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
})
}
func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 {
bm.resetBits()
return
@ -215,7 +221,7 @@ func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases
matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 {
bm.resetBits()
return
@ -223,7 +229,7 @@ func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 {
bm.resetBits()
return
@ -231,7 +237,7 @@ func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens)
}
func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
if len(phrases) > 1 {
bm.resetBits()
return