diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 5cf6b5504..2af3859db 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -15,6 +15,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* FEATURE: optimize [multi-exact queries](https://docs.victoriametrics.com/victorialogs/logsql/#multi-exact-filter) with many phrases to search. For example, `ip:in(path:="/foo/bar" | keep ip)` when there are many unique values for `ip` field among log entries with `/foo/bar` path. * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add support for displaying the top 5 log streams in the hits graph. The remaining log streams are grouped into an "other" label. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6545). * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add the ability to customize the graph display with options for bar, line, stepped line, and points. * FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add fields for setting AccountID and ProjectID. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6631). diff --git a/lib/logstorage/bloomfilter.go b/lib/logstorage/bloomfilter.go index 187073e29..e012ac145 100644 --- a/lib/logstorage/bloomfilter.go +++ b/lib/logstorage/bloomfilter.go @@ -72,49 +72,64 @@ func (bf *bloomFilter) mustInit(tokens []string) { // bloomFilterAdd adds the given tokens to the bloom filter bits func bloomFilterAdd(bits []uint64, tokens []string) { + hashesCount := len(tokens) * bloomFilterHashesCount + a := encoding.GetUint64s(hashesCount) + a.A = appendTokensHashes(a.A[:0], tokens) + maxBits := uint64(len(bits)) * 64 + for _, h := range a.A { + idx := h % maxBits + i := idx / 64 + j := idx % 64 + mask := uint64(1) << j + w := bits[i] + if (w & mask) == 0 { + bits[i] = w | mask + } + } + + encoding.PutUint64s(a) +} + +// appendTokensHashes appends hashes for the given tokens to dst and returns the result. +// +// the appended hashes can be then passed to bloomFilter.containsAll(). +func appendTokensHashes(dst []uint64, tokens []string) []uint64 { + dstLen := len(dst) + hashesCount := len(tokens) * bloomFilterHashesCount + + dst = slicesutil.SetLength(dst, dstLen+hashesCount) + dst = dst[:dstLen] + var buf [8]byte hp := (*uint64)(unsafe.Pointer(&buf[0])) for _, token := range tokens { *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token)) for i := 0; i < bloomFilterHashesCount; i++ { - hi := xxhash.Sum64(buf[:]) + h := xxhash.Sum64(buf[:]) (*hp)++ - idx := hi % maxBits - i := idx / 64 - j := idx % 64 - mask := uint64(1) << j - w := bits[i] - if (w & mask) == 0 { - bits[i] = w | mask - } + dst = append(dst, h) } } + return dst } -// containsAll returns true if bf contains all the given tokens. -func (bf *bloomFilter) containsAll(tokens []string) bool { +// containsAll returns true if bf contains all the given tokens hashes generated by appendTokensHashes. +func (bf *bloomFilter) containsAll(hashes []uint64) bool { bits := bf.bits if len(bits) == 0 { return true } maxBits := uint64(len(bits)) * 64 - var buf [8]byte - hp := (*uint64)(unsafe.Pointer(&buf[0])) - for _, token := range tokens { - *hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token)) - for i := 0; i < bloomFilterHashesCount; i++ { - hi := xxhash.Sum64(buf[:]) - (*hp)++ - idx := hi % maxBits - i := idx / 64 - j := idx % 64 - mask := uint64(1) << j - w := bits[i] - if (w & mask) == 0 { - // The token is missing - return false - } + for _, h := range hashes { + idx := h % maxBits + i := idx / 64 + j := idx % 64 + mask := uint64(1) << j + w := bits[i] + if (w & mask) == 0 { + // The token is missing + return false } } return true diff --git a/lib/logstorage/bloomfilter_test.go b/lib/logstorage/bloomfilter_test.go index cd94e665a..4242b4890 100644 --- a/lib/logstorage/bloomfilter_test.go +++ b/lib/logstorage/bloomfilter_test.go @@ -14,8 +14,9 @@ func TestBloomFilter(t *testing.T) { if err := bf.unmarshal(data); err != nil { t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err) } - if !bf.containsAll(tokens) { - t.Fatalf("bloomFilterContains must return true for the added tokens") + tokensHashes := appendTokensHashes(nil, tokens) + if !bf.containsAll(tokensHashes) { + t.Fatalf("containsAll must return true for the added tokens") } } f(nil) @@ -67,7 +68,8 @@ func TestBloomFilterFalsePositive(t *testing.T) { falsePositives := 0 for i := range tokens { token := fmt.Sprintf("non-existing-token_%d", i) - if bf.containsAll([]string{token}) { + tokensHashes := appendTokensHashes(nil, []string{token}) + if bf.containsAll(tokensHashes) { falsePositives++ } } diff --git a/lib/logstorage/filter_and.go b/lib/logstorage/filter_and.go index 513d3e23b..64fa0c17a 100644 --- a/lib/logstorage/filter_and.go +++ b/lib/logstorage/filter_and.go @@ -18,8 +18,9 @@ type filterAnd struct { } type fieldTokens struct { - field string - tokens []string + field string + tokens []string + tokensHashes []uint64 } func (fa *filterAnd) String() string { @@ -76,16 +77,16 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool { return true } - for _, fieldTokens := range byFieldTokens { - fieldName := fieldTokens.field - tokens := fieldTokens.tokens + for _, ft := range byFieldTokens { + fieldName := ft.field + tokens := ft.tokens v := bs.csh.getConstColumnValue(fieldName) if v != "" { - if !matchStringByAllTokens(v, tokens) { - return false + if matchStringByAllTokens(v, tokens) { + continue } - continue + return false } ch := bs.csh.getColumnHeader(fieldName) @@ -94,12 +95,12 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool { } if ch.valueType == valueTypeDict { - if !matchDictValuesByAllTokens(ch.valuesDict.values, tokens) { - return false + if matchDictValuesByAllTokens(ch.valuesDict.values, tokens) { + continue } - continue + return false } - if !matchBloomFilterAllTokens(bs, ch, tokens) { + if !matchBloomFilterAllTokens(bs, ch, ft.tokensHashes) { return false } } @@ -170,8 +171,9 @@ func (fa *filterAnd) initByFieldTokens() { } byFieldTokens = append(byFieldTokens, fieldTokens{ - field: fieldName, - tokens: tokens, + field: fieldName, + tokens: tokens, + tokensHashes: appendTokensHashes(nil, tokens), }) } diff --git a/lib/logstorage/filter_any_case_phrase.go b/lib/logstorage/filter_any_case_phrase.go index db3358d46..8c3c924c7 100644 --- a/lib/logstorage/filter_any_case_phrase.go +++ b/lib/logstorage/filter_any_case_phrase.go @@ -24,11 +24,9 @@ type filterAnyCasePhrase struct { phraseUppercaseOnce sync.Once phraseUppercase string - tokensOnce sync.Once - tokens []string - - tokensUppercaseOnce sync.Once - tokensUppercase []string + tokensOnce sync.Once + tokensHashes []uint64 + tokensHashesUppercase []uint64 } func (fp *filterAnyCasePhrase) String() string { @@ -39,27 +37,25 @@ func (fp *filterAnyCasePhrase) updateNeededFields(neededFields fieldsSet) { neededFields.add(fp.fieldName) } -func (fp *filterAnyCasePhrase) getTokens() []string { +func (fp *filterAnyCasePhrase) getTokensHashes() []uint64 { fp.tokensOnce.Do(fp.initTokens) - return fp.tokens + return fp.tokensHashes +} + +func (fp *filterAnyCasePhrase) getTokensHashesUppercase() []uint64 { + fp.tokensOnce.Do(fp.initTokens) + return fp.tokensHashesUppercase } func (fp *filterAnyCasePhrase) initTokens() { - fp.tokens = tokenizeStrings(nil, []string{fp.phrase}) -} + tokens := tokenizeStrings(nil, []string{fp.phrase}) + fp.tokensHashes = appendTokensHashes(nil, tokens) -func (fp *filterAnyCasePhrase) getTokensUppercase() []string { - fp.tokensUppercaseOnce.Do(fp.initTokensUppercase) - return fp.tokensUppercase -} - -func (fp *filterAnyCasePhrase) initTokensUppercase() { - tokens := fp.getTokens() tokensUppercase := make([]string, len(tokens)) for i, token := range tokens { tokensUppercase[i] = strings.ToUpper(token) } - fp.tokensUppercase = tokensUppercase + fp.tokensHashesUppercase = appendTokensHashes(nil, tokensUppercase) } func (fp *filterAnyCasePhrase) getPhraseLowercase() string { @@ -109,7 +105,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - tokens := fp.getTokens() + tokens := fp.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -130,7 +126,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { matchIPv4ByPhrase(bs, ch, bm, phraseLowercase, tokens) case valueTypeTimestampISO8601: phraseUppercase := fp.getPhraseUppercase() - tokensUppercase := fp.getTokensUppercase() + tokensUppercase := fp.getTokensHashesUppercase() matchTimestampISO8601ByPhrase(bs, ch, bm, phraseUppercase, tokensUppercase) default: logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) diff --git a/lib/logstorage/filter_any_case_prefix.go b/lib/logstorage/filter_any_case_prefix.go index 06d429303..10561c18e 100644 --- a/lib/logstorage/filter_any_case_prefix.go +++ b/lib/logstorage/filter_any_case_prefix.go @@ -25,11 +25,9 @@ type filterAnyCasePrefix struct { prefixUppercaseOnce sync.Once prefixUppercase string - tokensOnce sync.Once - tokens []string - - tokensUppercaseOnce sync.Once - tokensUppercase []string + tokensOnce sync.Once + tokensHashes []uint64 + tokensUppercaseHashes []uint64 } func (fp *filterAnyCasePrefix) String() string { @@ -43,27 +41,25 @@ func (fp *filterAnyCasePrefix) updateNeededFields(neededFields fieldsSet) { neededFields.add(fp.fieldName) } -func (fp *filterAnyCasePrefix) getTokens() []string { +func (fp *filterAnyCasePrefix) getTokensHashes() []uint64 { fp.tokensOnce.Do(fp.initTokens) - return fp.tokens + return fp.tokensHashes +} + +func (fp *filterAnyCasePrefix) getTokensUppercaseHashes() []uint64 { + fp.tokensOnce.Do(fp.initTokens) + return fp.tokensUppercaseHashes } func (fp *filterAnyCasePrefix) initTokens() { - fp.tokens = getTokensSkipLast(fp.prefix) -} + tokens := getTokensSkipLast(fp.prefix) + fp.tokensHashes = appendTokensHashes(nil, tokens) -func (fp *filterAnyCasePrefix) getTokensUppercase() []string { - fp.tokensUppercaseOnce.Do(fp.initTokensUppercase) - return fp.tokensUppercase -} - -func (fp *filterAnyCasePrefix) initTokensUppercase() { - tokens := fp.getTokens() tokensUppercase := make([]string, len(tokens)) for i, token := range tokens { tokensUppercase[i] = strings.ToUpper(token) } - fp.tokensUppercase = tokensUppercase + fp.tokensUppercaseHashes = appendTokensHashes(nil, tokensUppercase) } func (fp *filterAnyCasePrefix) getPrefixLowercase() string { @@ -110,7 +106,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - tokens := fp.getTokens() + tokens := fp.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -131,7 +127,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { matchIPv4ByPrefix(bs, ch, bm, prefixLowercase, tokens) case valueTypeTimestampISO8601: prefixUppercase := fp.getPrefixUppercase() - tokensUppercase := fp.getTokensUppercase() + tokensUppercase := fp.getTokensUppercaseHashes() matchTimestampISO8601ByPrefix(bs, ch, bm, prefixUppercase, tokensUppercase) default: logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType) diff --git a/lib/logstorage/filter_exact.go b/lib/logstorage/filter_exact.go index 98e9ea1ee..70c119aba 100644 --- a/lib/logstorage/filter_exact.go +++ b/lib/logstorage/filter_exact.go @@ -16,8 +16,9 @@ type filterExact struct { fieldName string value string - tokensOnce sync.Once - tokens []string + tokensOnce sync.Once + tokens []string + tokensHashes []uint64 } func (fe *filterExact) String() string { @@ -33,8 +34,14 @@ func (fe *filterExact) getTokens() []string { return fe.tokens } +func (fe *filterExact) getTokensHashes() []uint64 { + fe.tokensOnce.Do(fe.initTokens) + return fe.tokensHashes +} + func (fe *filterExact) initTokens() { fe.tokens = tokenizeStrings(nil, []string{fe.value}) + fe.tokensHashes = appendTokensHashes(nil, fe.tokens) } func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) { @@ -186,7 +193,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - tokens := fe.getTokens() + tokens := fe.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -212,7 +219,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { +func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) { n, ok := tryParseTimestampISO8601(value) if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) { bm.resetBits() @@ -224,7 +231,7 @@ func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bi bbPool.Put(bb) } -func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { +func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) { n, ok := tryParseIPv4(value) if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue { bm.resetBits() @@ -236,7 +243,7 @@ func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value bbPool.Put(bb) } -func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { +func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) { f, ok := tryParseFloat64(value) if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) { bm.resetBits() @@ -262,7 +269,7 @@ func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, bbPool.Put(bb) } -func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { +func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -272,7 +279,7 @@ func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, valu }) } -func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { +func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) { n, ok := tryParseUint64(phrase) if !ok || n < ch.minValue || n > ch.maxValue { bm.resetBits() @@ -284,7 +291,7 @@ func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phras bbPool.Put(bb) } -func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { +func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) { n, ok := tryParseUint64(phrase) if !ok || n < ch.minValue || n > ch.maxValue { bm.resetBits() @@ -296,7 +303,7 @@ func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra bbPool.Put(bb) } -func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { +func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) { n, ok := tryParseUint64(phrase) if !ok || n < ch.minValue || n > ch.maxValue { bm.resetBits() @@ -308,7 +315,7 @@ func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra bbPool.Put(bb) } -func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { +func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) { n, ok := tryParseUint64(phrase) if !ok || n < ch.minValue || n > ch.maxValue { bm.resetBits() @@ -320,7 +327,7 @@ func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra bbPool.Put(bb) } -func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []string) { +func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return diff --git a/lib/logstorage/filter_exact_prefix.go b/lib/logstorage/filter_exact_prefix.go index e0b3b8648..7c241841c 100644 --- a/lib/logstorage/filter_exact_prefix.go +++ b/lib/logstorage/filter_exact_prefix.go @@ -15,8 +15,9 @@ type filterExactPrefix struct { fieldName string prefix string - tokensOnce sync.Once - tokens []string + tokensOnce sync.Once + tokens []string + tokensHashes []uint64 } func (fep *filterExactPrefix) String() string { @@ -32,8 +33,14 @@ func (fep *filterExactPrefix) getTokens() []string { return fep.tokens } +func (fep *filterExactPrefix) getTokensHashes() []uint64 { + fep.tokensOnce.Do(fep.initTokens) + return fep.tokensHashes +} + func (fep *filterExactPrefix) initTokens() { fep.tokens = getTokensSkipLast(fep.prefix) + fep.tokensHashes = appendTokensHashes(nil, fep.tokens) } func (fep *filterExactPrefix) applyToBlockResult(br *blockResult, bm *bitmap) { @@ -62,7 +69,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - tokens := fep.getTokens() + tokens := fep.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -88,7 +95,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if prefix == "" { return } @@ -105,11 +112,11 @@ func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *b bbPool.Put(bb) } -func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if prefix == "" { return } - if prefix < "0" || prefix > "9" || len(tokens) > 3 || !matchBloomFilterAllTokens(bs, ch, tokens) { + if prefix < "0" || prefix > "9" || len(tokens) > 3*bloomFilterHashesCount || !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return } @@ -122,12 +129,12 @@ func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefi bbPool.Put(bb) } -func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if prefix == "" { // An empty prefix matches all the values return } - if len(tokens) > 2 || !matchBloomFilterAllTokens(bs, ch, tokens) { + if len(tokens) > 2*bloomFilterHashesCount || !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return } @@ -153,7 +160,7 @@ func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, bbPool.Put(bb) } -func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -163,7 +170,7 @@ func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre }) } -func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { return } @@ -176,7 +183,7 @@ func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref bbPool.Put(bb) } -func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { return } @@ -189,7 +196,7 @@ func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre bbPool.Put(bb) } -func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { return } @@ -202,7 +209,7 @@ func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre bbPool.Put(bb) } -func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) { return } @@ -215,7 +222,7 @@ func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre bbPool.Put(bb) } -func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []string) bool { +func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) bool { if prefix == "" { // An empty prefix matches all the values return false diff --git a/lib/logstorage/filter_in.go b/lib/logstorage/filter_in.go index eb39f41f6..8b9e14caa 100644 --- a/lib/logstorage/filter_in.go +++ b/lib/logstorage/filter_in.go @@ -28,9 +28,9 @@ type filterIn struct { // qFieldName must be set to field name for obtaining values from if q is non-nil. qFieldName string - tokensOnce sync.Once - commonTokens []string - tokenSets [][]string + tokensOnce sync.Once + commonTokensHashes []uint64 + tokenSetsHashes [][]uint64 stringValuesOnce sync.Once stringValues map[string]struct{} @@ -76,16 +76,21 @@ func (fi *filterIn) updateNeededFields(neededFields fieldsSet) { neededFields.add(fi.fieldName) } -func (fi *filterIn) getTokens() ([]string, [][]string) { +func (fi *filterIn) getTokensHashes() ([]uint64, [][]uint64) { fi.tokensOnce.Do(fi.initTokens) - return fi.commonTokens, fi.tokenSets + return fi.commonTokensHashes, fi.tokenSetsHashes } func (fi *filterIn) initTokens() { commonTokens, tokenSets := getCommonTokensAndTokenSets(fi.values) - fi.commonTokens = commonTokens - fi.tokenSets = tokenSets + fi.commonTokensHashes = appendTokensHashes(nil, commonTokens) + + tokenSetsHashes := make([][]uint64, len(tokenSets)) + for i, tokens := range tokenSets { + tokenSetsHashes[i] = appendTokensHashes(nil, tokens) + } + fi.tokenSetsHashes = tokenSetsHashes } func (fi *filterIn) getStringValues() map[string]struct{} { @@ -374,7 +379,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - commonTokens, tokenSets := fi.getTokens() + commonTokens, tokenSets := fi.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -409,7 +414,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } } -func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []string, tokenSets [][]string) { +func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []uint64, tokenSets [][]uint64) { if len(values) == 0 { bm.resetBits() return @@ -424,7 +429,7 @@ func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[str }) } -func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []string, tokenSets [][]string) bool { +func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []uint64, tokenSets [][]uint64) bool { if len(commonTokens) > 0 { if !matchBloomFilterAllTokens(bs, ch, commonTokens) { return false @@ -511,6 +516,9 @@ func getCommonTokens(tokenSets [][]string) []string { } } } + if len(m) == 0 { + return nil + } tokens := make([]string, 0, len(m)) for token := range m { diff --git a/lib/logstorage/filter_or.go b/lib/logstorage/filter_or.go index db281c1b6..a292bdedd 100644 --- a/lib/logstorage/filter_or.go +++ b/lib/logstorage/filter_or.go @@ -89,9 +89,9 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool { return true } - for _, fieldTokens := range byFieldTokens { - fieldName := fieldTokens.field - tokens := fieldTokens.tokens + for _, ft := range byFieldTokens { + fieldName := ft.field + tokens := ft.tokens v := bs.csh.getConstColumnValue(fieldName) if v != "" { @@ -112,7 +112,7 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool { } continue } - if matchBloomFilterAllTokens(bs, ch, tokens) { + if matchBloomFilterAllTokens(bs, ch, ft.tokensHashes) { return true } } @@ -190,8 +190,9 @@ func (fo *filterOr) initByFieldTokens() { break } byFieldTokens = append(byFieldTokens, fieldTokens{ - field: fieldName, - tokens: commonTokens, + field: fieldName, + tokens: commonTokens, + tokensHashes: appendTokensHashes(nil, commonTokens), }) } diff --git a/lib/logstorage/filter_phrase.go b/lib/logstorage/filter_phrase.go index 3b4c04174..aa73d8414 100644 --- a/lib/logstorage/filter_phrase.go +++ b/lib/logstorage/filter_phrase.go @@ -24,8 +24,9 @@ type filterPhrase struct { fieldName string phrase string - tokensOnce sync.Once - tokens []string + tokensOnce sync.Once + tokens []string + tokensHashes []uint64 } func (fp *filterPhrase) String() string { @@ -41,8 +42,14 @@ func (fp *filterPhrase) getTokens() []string { return fp.tokens } +func (fp *filterPhrase) getTokensHashes() []uint64 { + fp.tokensOnce.Do(fp.initTokens) + return fp.tokensHashes +} + func (fp *filterPhrase) initTokens() { fp.tokens = tokenizeStrings(nil, []string{fp.phrase}) + fp.tokensHashes = appendTokensHashes(nil, fp.tokens) } func (fp *filterPhrase) applyToBlockResult(br *blockResult, bm *bitmap) { @@ -73,7 +80,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - tokens := fp.getTokens() + tokens := fp.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -99,7 +106,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { +func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) { _, ok := tryParseTimestampISO8601(phrase) if ok { // Fast path - the phrase contains complete timestamp, so we can use exact search @@ -121,7 +128,7 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap bbPool.Put(bb) } -func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { +func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) { _, ok := tryParseIPv4(phrase) if ok { // Fast path - phrase contains the full IP address, so we can use exact matching @@ -145,7 +152,7 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str bbPool.Put(bb) } -func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { +func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) { // The phrase may contain a part of the floating-point number. // For example, `foo:"123"` must match `123`, `123.456` and `-0.123`. // This means we cannot search in binary representation of floating-point numbers. @@ -187,7 +194,7 @@ func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phra bbPool.Put(bb) } -func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { +func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -288,7 +295,7 @@ func visitValues(bs *blockSearch, ch *columnHeader, bm *bitmap, f func(value str }) } -func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool { +func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []uint64) bool { if len(tokens) == 0 { return true } diff --git a/lib/logstorage/filter_prefix.go b/lib/logstorage/filter_prefix.go index 7b0fa9964..4d0f75fde 100644 --- a/lib/logstorage/filter_prefix.go +++ b/lib/logstorage/filter_prefix.go @@ -19,8 +19,9 @@ type filterPrefix struct { fieldName string prefix string - tokensOnce sync.Once - tokens []string + tokensOnce sync.Once + tokens []string + tokensHashes []uint64 } func (fp *filterPrefix) String() string { @@ -39,8 +40,14 @@ func (fp *filterPrefix) getTokens() []string { return fp.tokens } +func (fp *filterPrefix) getTokensHashes() []uint64 { + fp.tokensOnce.Do(fp.initTokens) + return fp.tokensHashes +} + func (fp *filterPrefix) initTokens() { fp.tokens = getTokensSkipLast(fp.prefix) + fp.tokensHashes = appendTokensHashes(nil, fp.tokens) } func (fp *filterPrefix) applyToBlockResult(bs *blockResult, bm *bitmap) { @@ -68,7 +75,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - tokens := fp.getTokens() + tokens := fp.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -94,7 +101,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if prefix == "" { // Fast path - all the timestamp values match an empty prefix aka `*` return @@ -115,7 +122,7 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap bbPool.Put(bb) } -func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if prefix == "" { // Fast path - all the ipv4 values match an empty prefix aka `*` return @@ -136,7 +143,7 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str bbPool.Put(bb) } -func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if prefix == "" { // Fast path - all the float64 values match an empty prefix aka `*` return @@ -177,7 +184,7 @@ func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref bbPool.Put(bb) } -func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) { +func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return diff --git a/lib/logstorage/filter_regexp.go b/lib/logstorage/filter_regexp.go index 5e88ada0b..df8e59733 100644 --- a/lib/logstorage/filter_regexp.go +++ b/lib/logstorage/filter_regexp.go @@ -16,8 +16,9 @@ type filterRegexp struct { fieldName string re *regexutil.Regex - tokens []string - tokensOnce sync.Once + tokensOnce sync.Once + tokens []string + tokensHashes []uint64 } func (fr *filterRegexp) String() string { @@ -33,12 +34,18 @@ func (fr *filterRegexp) getTokens() []string { return fr.tokens } +func (fr *filterRegexp) getTokensHashes() []uint64 { + fr.tokensOnce.Do(fr.initTokens) + return fr.tokensHashes +} + func (fr *filterRegexp) initTokens() { literals := fr.re.GetLiterals() for i, literal := range literals { literals[i] = skipFirstLastToken(literal) } fr.tokens = tokenizeStrings(nil, literals) + fr.tokensHashes = appendTokensHashes(nil, fr.tokens) } func skipFirstLastToken(s string) string { @@ -89,7 +96,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - tokens := fr.getTokens() + tokens := fr.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -115,7 +122,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { +func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -128,7 +135,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap bbPool.Put(bb) } -func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { +func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -141,7 +148,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexu bbPool.Put(bb) } -func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { +func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -167,7 +174,7 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re * bbPool.Put(bb) } -func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { +func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -177,7 +184,7 @@ func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege }) } -func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { +func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -190,7 +197,7 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex bbPool.Put(bb) } -func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { +func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -203,7 +210,7 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege bbPool.Put(bb) } -func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { +func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -216,7 +223,7 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege bbPool.Put(bb) } -func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) { +func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return diff --git a/lib/logstorage/filter_sequence.go b/lib/logstorage/filter_sequence.go index 46ca9732f..1fda32d05 100644 --- a/lib/logstorage/filter_sequence.go +++ b/lib/logstorage/filter_sequence.go @@ -15,8 +15,9 @@ type filterSequence struct { fieldName string phrases []string - tokensOnce sync.Once - tokens []string + tokensOnce sync.Once + tokens []string + tokensHashes []uint64 nonEmptyPhrasesOnce sync.Once nonEmptyPhrases []string @@ -40,10 +41,15 @@ func (fs *filterSequence) getTokens() []string { return fs.tokens } +func (fs *filterSequence) getTokensHashes() []uint64 { + fs.tokensOnce.Do(fs.initTokens) + return fs.tokensHashes +} + func (fs *filterSequence) initTokens() { phrases := fs.getNonEmptyPhrases() - tokens := tokenizeStrings(nil, phrases) - fs.tokens = tokens + fs.tokens = tokenizeStrings(nil, phrases) + fs.tokensHashes = appendTokensHashes(nil, fs.tokens) } func (fs *filterSequence) getNonEmptyPhrases() []string { @@ -100,7 +106,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) { return } - tokens := fs.getTokens() + tokens := fs.getTokensHashes() switch ch.valueType { case valueTypeString: @@ -126,7 +132,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } } -func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { +func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) { if len(phrases) == 1 { matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens) return @@ -145,7 +151,7 @@ func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitm bbPool.Put(bb) } -func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { +func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) { if len(phrases) == 1 { matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens) return @@ -166,7 +172,7 @@ func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, bbPool.Put(bb) } -func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { +func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -197,7 +203,7 @@ func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, ph bbPool.Put(bb) } -func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []string) { +func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) { if !matchBloomFilterAllTokens(bs, ch, tokens) { bm.resetBits() return @@ -207,7 +213,7 @@ func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase }) } -func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { +func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) { if len(phrases) > 1 { bm.resetBits() return @@ -215,7 +221,7 @@ func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens) } -func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { +func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) { if len(phrases) > 1 { bm.resetBits() return @@ -223,7 +229,7 @@ func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens) } -func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { +func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) { if len(phrases) > 1 { bm.resetBits() return @@ -231,7 +237,7 @@ func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens) } -func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) { +func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) { if len(phrases) > 1 { bm.resetBits() return