diff --git a/lib/logstorage/filter.go b/lib/logstorage/filter.go index f4df75487..b8f0ba4c6 100644 --- a/lib/logstorage/filter.go +++ b/lib/logstorage/filter.go @@ -1,7 +1,6 @@ package logstorage import ( - "bytes" "math" "strconv" "sync" @@ -68,76 +67,6 @@ func (fs *streamFilter) apply(bs *blockSearch, bm *bitmap) { } } -func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) { - bb := bbPool.Get() - for i, v := range ch.valuesDict.values { - if _, ok := values[v]; ok { - bb.B = append(bb.B, byte(i)) - } - } - matchEncodedValuesDict(bs, ch, bm, bb.B) - bbPool.Put(bb) -} - -func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) { - if len(encodedValues) == 0 { - // Fast path - the phrase is missing in the valuesDict - bm.resetBits() - return - } - // Slow path - iterate over values - visitValues(bs, ch, bm, func(v string) bool { - if len(v) != 1 { - logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v)) - } - n := bytes.IndexByte(encodedValues, v[0]) - return n >= 0 - }) -} - -func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool { - bb := bbPool.Get() - defer bbPool.Put(bb) - - bb.B = strconv.AppendUint(bb.B[:0], ch.minValue, 10) - s := bytesutil.ToUnsafeString(bb.B) - if maxLen < uint64(len(s)) { - return false - } - bb.B = strconv.AppendUint(bb.B[:0], ch.maxValue, 10) - s = bytesutil.ToUnsafeString(bb.B) - return minLen <= uint64(len(s)) -} - -func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool { - if len(tokens) == 0 { - return true - } - bf := bs.getBloomFilterForColumn(ch) - return bf.containsAll(tokens) -} - -func visitValues(bs *blockSearch, ch *columnHeader, bm *bitmap, f func(value string) bool) { - if bm.isZero() { - // Fast path - nothing to visit - return - } - values := bs.getValuesForColumn(ch) - bm.forEachSetBit(func(idx int) bool { - return f(values[idx]) - }) -} - -func isASCIILowercase(s string) bool { - for i := 0; i < len(s); i++ { - c := s[i] - if c >= utf8.RuneSelf || (c >= 'A' && c <= 'Z') { - return false - } - } - return true -} - type stringBucket struct { a []string } diff --git a/lib/logstorage/filter_any_case_phrase.go b/lib/logstorage/filter_any_case_phrase.go index 0febdf341..6498c651b 100644 --- a/lib/logstorage/filter_any_case_phrase.go +++ b/lib/logstorage/filter_any_case_phrase.go @@ -4,6 +4,7 @@ import ( "fmt" "strings" "sync" + "unicode/utf8" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" @@ -137,3 +138,13 @@ func matchAnyCasePhrase(s, phraseLowercase string) bool { return ok } + +func isASCIILowercase(s string) bool { + for i := 0; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf || (c >= 'A' && c <= 'Z') { + return false + } + } + return true +} diff --git a/lib/logstorage/filter_in.go b/lib/logstorage/filter_in.go index cd5742a6c..36ca0c5c4 100644 --- a/lib/logstorage/filter_in.go +++ b/lib/logstorage/filter_in.go @@ -341,3 +341,14 @@ func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets [] } return false } + +func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) { + bb := bbPool.Get() + for i, v := range ch.valuesDict.values { + if _, ok := values[v]; ok { + bb.B = append(bb.B, byte(i)) + } + } + matchEncodedValuesDict(bs, ch, bm, bb.B) + bbPool.Put(bb) +} diff --git a/lib/logstorage/filter_len_range.go b/lib/logstorage/filter_len_range.go index e329f5ab4..85050814c 100644 --- a/lib/logstorage/filter_len_range.go +++ b/lib/logstorage/filter_len_range.go @@ -1,8 +1,10 @@ package logstorage import ( + "strconv" "unicode/utf8" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) @@ -185,3 +187,17 @@ func matchLenRange(s string, minLen, maxLen uint64) bool { sLen := uint64(utf8.RuneCountInString(s)) return sLen >= minLen && sLen <= maxLen } + +func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool { + bb := bbPool.Get() + defer bbPool.Put(bb) + + bb.B = strconv.AppendUint(bb.B[:0], ch.minValue, 10) + s := bytesutil.ToUnsafeString(bb.B) + if maxLen < uint64(len(s)) { + return false + } + bb.B = strconv.AppendUint(bb.B[:0], ch.maxValue, 10) + s = bytesutil.ToUnsafeString(bb.B) + return minLen <= uint64(len(s)) +} diff --git a/lib/logstorage/filter_phrase.go b/lib/logstorage/filter_phrase.go index 10cd34024..b2f2d5eb1 100644 --- a/lib/logstorage/filter_phrase.go +++ b/lib/logstorage/filter_phrase.go @@ -1,6 +1,7 @@ package logstorage import ( + "bytes" "strings" "sync" "unicode/utf8" @@ -245,3 +246,38 @@ func getPhrasePos(s, phrase string) int { return pos } } + +func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) { + if len(encodedValues) == 0 { + // Fast path - the phrase is missing in the valuesDict + bm.resetBits() + return + } + // Slow path - iterate over values + visitValues(bs, ch, bm, func(v string) bool { + if len(v) != 1 { + logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v)) + } + n := bytes.IndexByte(encodedValues, v[0]) + return n >= 0 + }) +} + +func visitValues(bs *blockSearch, ch *columnHeader, bm *bitmap, f func(value string) bool) { + if bm.isZero() { + // Fast path - nothing to visit + return + } + values := bs.getValuesForColumn(ch) + bm.forEachSetBit(func(idx int) bool { + return f(values[idx]) + }) +} + +func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool { + if len(tokens) == 0 { + return true + } + bf := bs.getBloomFilterForColumn(ch) + return bf.containsAll(tokens) +}