mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-20 15:16:42 +00:00
wip
This commit is contained in:
parent
e8e49405ef
commit
bd70c94a27
3 changed files with 134 additions and 33 deletions
|
@ -3,6 +3,7 @@ package logstorage
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
@ -27,7 +28,8 @@ type filterIn struct {
|
||||||
// qFieldName must be set to field name for obtaining values from if q is non-nil.
|
// qFieldName must be set to field name for obtaining values from if q is non-nil.
|
||||||
qFieldName string
|
qFieldName string
|
||||||
|
|
||||||
tokenSetsOnce sync.Once
|
tokensOnce sync.Once
|
||||||
|
commonTokens []string
|
||||||
tokenSets [][]string
|
tokenSets [][]string
|
||||||
|
|
||||||
stringValuesOnce sync.Once
|
stringValuesOnce sync.Once
|
||||||
|
@ -74,28 +76,15 @@ func (fi *filterIn) updateNeededFields(neededFields fieldsSet) {
|
||||||
neededFields.add(fi.fieldName)
|
neededFields.add(fi.fieldName)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fi *filterIn) getTokenSets() [][]string {
|
func (fi *filterIn) getTokens() ([]string, [][]string) {
|
||||||
fi.tokenSetsOnce.Do(fi.initTokenSets)
|
fi.tokensOnce.Do(fi.initTokens)
|
||||||
return fi.tokenSets
|
return fi.commonTokens, fi.tokenSets
|
||||||
}
|
}
|
||||||
|
|
||||||
// It is faster to match every row in the block instead of checking too big number of tokenSets against bloom filter.
|
func (fi *filterIn) initTokens() {
|
||||||
const maxTokenSetsToInit = 1000
|
commonTokens, tokenSets := getCommonTokensAndTokenSets(fi.values)
|
||||||
|
|
||||||
func (fi *filterIn) initTokenSets() {
|
fi.commonTokens = commonTokens
|
||||||
values := fi.values
|
|
||||||
tokenSetsLen := len(values)
|
|
||||||
if tokenSetsLen > maxTokenSetsToInit {
|
|
||||||
tokenSetsLen = maxTokenSetsToInit
|
|
||||||
}
|
|
||||||
tokenSets := make([][]string, 0, tokenSetsLen+1)
|
|
||||||
for _, v := range values {
|
|
||||||
tokens := tokenizeStrings(nil, []string{v})
|
|
||||||
tokenSets = append(tokenSets, tokens)
|
|
||||||
if len(tokens) > maxTokenSetsToInit {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fi.tokenSets = tokenSets
|
fi.tokenSets = tokenSets
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -385,47 +374,47 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenSets := fi.getTokenSets()
|
commonTokens, tokenSets := fi.getTokens()
|
||||||
|
|
||||||
switch ch.valueType {
|
switch ch.valueType {
|
||||||
case valueTypeString:
|
case valueTypeString:
|
||||||
stringValues := fi.getStringValues()
|
stringValues := fi.getStringValues()
|
||||||
matchAnyValue(bs, ch, bm, stringValues, tokenSets)
|
matchAnyValue(bs, ch, bm, stringValues, commonTokens, tokenSets)
|
||||||
case valueTypeDict:
|
case valueTypeDict:
|
||||||
stringValues := fi.getStringValues()
|
stringValues := fi.getStringValues()
|
||||||
matchValuesDictByAnyValue(bs, ch, bm, stringValues)
|
matchValuesDictByAnyValue(bs, ch, bm, stringValues)
|
||||||
case valueTypeUint8:
|
case valueTypeUint8:
|
||||||
binValues := fi.getUint8Values()
|
binValues := fi.getUint8Values()
|
||||||
matchAnyValue(bs, ch, bm, binValues, tokenSets)
|
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||||
case valueTypeUint16:
|
case valueTypeUint16:
|
||||||
binValues := fi.getUint16Values()
|
binValues := fi.getUint16Values()
|
||||||
matchAnyValue(bs, ch, bm, binValues, tokenSets)
|
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||||
case valueTypeUint32:
|
case valueTypeUint32:
|
||||||
binValues := fi.getUint32Values()
|
binValues := fi.getUint32Values()
|
||||||
matchAnyValue(bs, ch, bm, binValues, tokenSets)
|
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||||
case valueTypeUint64:
|
case valueTypeUint64:
|
||||||
binValues := fi.getUint64Values()
|
binValues := fi.getUint64Values()
|
||||||
matchAnyValue(bs, ch, bm, binValues, tokenSets)
|
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||||
case valueTypeFloat64:
|
case valueTypeFloat64:
|
||||||
binValues := fi.getFloat64Values()
|
binValues := fi.getFloat64Values()
|
||||||
matchAnyValue(bs, ch, bm, binValues, tokenSets)
|
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||||
case valueTypeIPv4:
|
case valueTypeIPv4:
|
||||||
binValues := fi.getIPv4Values()
|
binValues := fi.getIPv4Values()
|
||||||
matchAnyValue(bs, ch, bm, binValues, tokenSets)
|
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||||
case valueTypeTimestampISO8601:
|
case valueTypeTimestampISO8601:
|
||||||
binValues := fi.getTimestampISO8601Values()
|
binValues := fi.getTimestampISO8601Values()
|
||||||
matchAnyValue(bs, ch, bm, binValues, tokenSets)
|
matchAnyValue(bs, ch, bm, binValues, commonTokens, tokenSets)
|
||||||
default:
|
default:
|
||||||
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
|
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, tokenSets [][]string) {
|
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []string, tokenSets [][]string) {
|
||||||
if len(values) == 0 {
|
if len(values) == 0 {
|
||||||
bm.resetBits()
|
bm.resetBits()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if !matchBloomFilterAnyTokenSet(bs, ch, tokenSets) {
|
if !matchBloomFilterAnyTokenSet(bs, ch, commonTokens, tokenSets) {
|
||||||
bm.resetBits()
|
bm.resetBits()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -435,7 +424,10 @@ func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[str
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets [][]string) bool {
|
func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []string, tokenSets [][]string) bool {
|
||||||
|
if len(commonTokens) > 0 {
|
||||||
|
return matchBloomFilterAllTokens(bs, ch, commonTokens)
|
||||||
|
}
|
||||||
if len(tokenSets) == 0 {
|
if len(tokenSets) == 0 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -453,6 +445,9 @@ func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets []
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// It is faster to match every row in the block instead of checking too big number of tokenSets against bloom filter.
|
||||||
|
const maxTokenSetsToInit = 1000
|
||||||
|
|
||||||
func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) {
|
func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) {
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
for _, v := range ch.valuesDict.values {
|
for _, v := range ch.valuesDict.values {
|
||||||
|
@ -465,3 +460,44 @@ func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, va
|
||||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||||
bbPool.Put(bb)
|
bbPool.Put(bb)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getCommonTokensAndTokenSets(values []string) ([]string, [][]string) {
|
||||||
|
tokenSets := make([][]string, len(values))
|
||||||
|
for i, v := range values {
|
||||||
|
tokenSets[i] = tokenizeStrings(nil, []string{v})
|
||||||
|
}
|
||||||
|
|
||||||
|
commonTokens := getCommonTokens(tokenSets)
|
||||||
|
if len(commonTokens) == 0 {
|
||||||
|
return nil, tokenSets
|
||||||
|
}
|
||||||
|
return commonTokens, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCommonTokens(tokenSets [][]string) []string {
|
||||||
|
if len(tokenSets) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
m := make(map[string]struct{}, len(tokenSets[0]))
|
||||||
|
for _, token := range tokenSets[0] {
|
||||||
|
m[token] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tokens := range tokenSets[1:] {
|
||||||
|
if len(m) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
for token := range m {
|
||||||
|
if !slices.Contains(tokens, token) {
|
||||||
|
delete(m, token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens := make([]string, 0, len(m))
|
||||||
|
for token := range m {
|
||||||
|
tokens = append(tokens, token)
|
||||||
|
}
|
||||||
|
return tokens
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
package logstorage
|
package logstorage
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
|
"slices"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -688,3 +690,32 @@ func TestFilterIn(t *testing.T) {
|
||||||
testFilterMatchForColumns(t, columns, fi, "_msg", nil)
|
testFilterMatchForColumns(t, columns, fi, "_msg", nil)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetCommonTokensAndTokenSets(t *testing.T) {
|
||||||
|
f := func(values []string, commonTokensExpected []string, tokenSetsExpected [][]string) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
commonTokens, tokenSets := getCommonTokensAndTokenSets(values)
|
||||||
|
slices.Sort(commonTokens)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(commonTokens, commonTokensExpected) {
|
||||||
|
t.Fatalf("unexpected commonTokens for values=%q\ngot\n%q\nwant\n%q", values, commonTokens, commonTokensExpected)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, tokens := range tokenSets {
|
||||||
|
slices.Sort(tokens)
|
||||||
|
tokensExpected := tokenSetsExpected[i]
|
||||||
|
if !reflect.DeepEqual(tokens, tokensExpected) {
|
||||||
|
t.Fatalf("unexpected tokens for value=%q\ngot\n%q\nwant\n%q", values[i], tokens, tokensExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f(nil, nil, nil)
|
||||||
|
f([]string{"foo"}, []string{"foo"}, nil)
|
||||||
|
f([]string{"foo", "foo"}, []string{"foo"}, nil)
|
||||||
|
f([]string{"foo", "bar", "bar", "foo"}, nil, [][]string{{"foo"}, {"bar"}, {"bar"}, {"foo"}})
|
||||||
|
f([]string{"foo", "foo bar", "bar foo"}, []string{"foo"}, nil)
|
||||||
|
f([]string{"a foo bar", "bar abc foo", "foo abc a bar"}, []string{"bar", "foo"}, nil)
|
||||||
|
f([]string{"a xfoo bar", "xbar abc foo", "foo abc a bar"}, nil, [][]string{{"a", "bar", "xfoo"}, {"abc", "foo", "xbar"}, {"a", "abc", "bar", "foo"}})
|
||||||
|
}
|
||||||
|
|
|
@ -167,3 +167,37 @@ func TestPipeMath(t *testing.T) {
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPipeMathUpdateNeededFields(t *testing.T) {
|
||||||
|
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||||
|
t.Helper()
|
||||||
|
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||||
|
}
|
||||||
|
|
||||||
|
// all the needed fields
|
||||||
|
f("math (x + 1) as y", "*", "", "*", "y")
|
||||||
|
|
||||||
|
// all the needed fields, unneeded fields do not intersect with src and dst
|
||||||
|
f("math (x + 1) as y", "*", "f1,f2", "*", "f1,f2,y")
|
||||||
|
|
||||||
|
// all the needed fields, unneeded fields intersect with src
|
||||||
|
f("math (x + 1) as y", "*", "f1,x", "*", "f1,y")
|
||||||
|
|
||||||
|
// all the needed fields, unneeded fields intersect with dst
|
||||||
|
f("math (x + 1) as y", "*", "f1,y", "*", "f1,y")
|
||||||
|
|
||||||
|
// all the needed fields, unneeded fields intersect with src and dst
|
||||||
|
f("math (x + 1) as y", "*", "f1,x,y", "*", "f1,x,y")
|
||||||
|
|
||||||
|
// needed fields do not intersect with src and dst
|
||||||
|
f("math (x + 1) as y", "f1,f2", "", "f1,f2", "")
|
||||||
|
|
||||||
|
// needed fields intersect with src
|
||||||
|
f("math (x + 1) as y", "f1,x", "", "f1,x", "")
|
||||||
|
|
||||||
|
// needed fields intersect with dst
|
||||||
|
f("math (x + 1) as y", "f1,y", "", "f1,x", "")
|
||||||
|
|
||||||
|
// needed fields intersect with src and dst
|
||||||
|
f("math (x + 1) as y", "f1,x,y", "", "f1,x", "")
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue