This commit is contained in:
Aliaksandr Valialkin 2024-04-29 07:42:40 +02:00
parent e876b99b59
commit 65dfdda14b
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 74 additions and 71 deletions

View file

@ -1,7 +1,6 @@
package logstorage
import (
"bytes"
"math"
"strconv"
"sync"
@ -68,76 +67,6 @@ func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) {
}
}
func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
if _, ok := values[v]; ok {
bb.B = append(bb.B, byte(i))
}
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
}
func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) {
if len(encodedValues) == 0 {
// Fast path - the phrase is missing in the valuesDict
bm.resetBits()
return
}
// Slow path - iterate over values
visitValues(bs, ch, bm, func(v string) bool {
if len(v) != 1 {
logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v))
}
n := bytes.IndexByte(encodedValues, v[0])
return n >= 0
})
}
func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool {
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = strconv.AppendUint(bb.B[:0], ch.minValue, 10)
s := bytesutil.ToUnsafeString(bb.B)
if maxLen < uint64(len(s)) {
return false
}
bb.B = strconv.AppendUint(bb.B[:0], ch.maxValue, 10)
s = bytesutil.ToUnsafeString(bb.B)
return minLen <= uint64(len(s))
}
func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool {
if len(tokens) == 0 {
return true
}
bf := bs.getBloomFilterForColumn(ch)
return bf.containsAll(tokens)
}
func visitValues(bs *blockSearch, ch *columnHeader, bm *bitmap, f func(value string) bool) {
if bm.isZero() {
// Fast path - nothing to visit
return
}
values := bs.getValuesForColumn(ch)
bm.forEachSetBit(func(idx int) bool {
return f(values[idx])
})
}
func isASCIILowercase(s string) bool {
for i := 0; i < len(s); i++ {
c := s[i]
if c >= utf8.RuneSelf || (c >= 'A' && c <= 'Z') {
return false
}
}
return true
}
type stringBucket struct {
a []string
}

View file

@ -4,6 +4,7 @@ import (
"fmt"
"strings"
"sync"
"unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@ -137,3 +138,13 @@ func matchAnyCasePhrase(s, phraseLowercase string) bool {
return ok
}
func isASCIILowercase(s string) bool {
for i := 0; i < len(s); i++ {
c := s[i]
if c >= utf8.RuneSelf || (c >= 'A' && c <= 'Z') {
return false
}
}
return true
}

View file

@ -341,3 +341,14 @@ func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets []
}
return false
}
func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) {
bb := bbPool.Get()
for i, v := range ch.valuesDict.values {
if _, ok := values[v]; ok {
bb.B = append(bb.B, byte(i))
}
}
matchEncodedValuesDict(bs, ch, bm, bb.B)
bbPool.Put(bb)
}

View file

@ -1,8 +1,10 @@
package logstorage
import (
"strconv"
"unicode/utf8"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
@ -185,3 +187,17 @@ func matchLenRange(s string, minLen, maxLen uint64) bool {
sLen := uint64(utf8.RuneCountInString(s))
return sLen >= minLen && sLen <= maxLen
}
func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool {
bb := bbPool.Get()
defer bbPool.Put(bb)
bb.B = strconv.AppendUint(bb.B[:0], ch.minValue, 10)
s := bytesutil.ToUnsafeString(bb.B)
if maxLen < uint64(len(s)) {
return false
}
bb.B = strconv.AppendUint(bb.B[:0], ch.maxValue, 10)
s = bytesutil.ToUnsafeString(bb.B)
return minLen <= uint64(len(s))
}

View file

@ -1,6 +1,7 @@
package logstorage
import (
"bytes"
"strings"
"sync"
"unicode/utf8"
@ -245,3 +246,38 @@ func getPhrasePos(s, phrase string) int {
return pos
}
}
func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) {
if len(encodedValues) == 0 {
// Fast path - the phrase is missing in the valuesDict
bm.resetBits()
return
}
// Slow path - iterate over values
visitValues(bs, ch, bm, func(v string) bool {
if len(v) != 1 {
logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v))
}
n := bytes.IndexByte(encodedValues, v[0])
return n >= 0
})
}
func visitValues(bs *blockSearch, ch *columnHeader, bm *bitmap, f func(value string) bool) {
if bm.isZero() {
// Fast path - nothing to visit
return
}
values := bs.getValuesForColumn(ch)
bm.forEachSetBit(func(idx int) bool {
return f(values[idx])
})
}
func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool {
if len(tokens) == 0 {
return true
}
bf := bs.getBloomFilterForColumn(ch)
return bf.containsAll(tokens)
}