mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-20 15:16:42 +00:00
wip
This commit is contained in:
parent
892afe25d1
commit
03b9d7977d
4 changed files with 156 additions and 16 deletions
|
@ -2,6 +2,8 @@ package logstorage
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
|
||||||
|
@ -13,6 +15,9 @@ import (
|
||||||
type filterRegexp struct {
|
type filterRegexp struct {
|
||||||
fieldName string
|
fieldName string
|
||||||
re *regexutil.Regex
|
re *regexutil.Regex
|
||||||
|
|
||||||
|
tokens []string
|
||||||
|
tokensOnce sync.Once
|
||||||
}
|
}
|
||||||
|
|
||||||
func (fr *filterRegexp) String() string {
|
func (fr *filterRegexp) String() string {
|
||||||
|
@ -23,6 +28,37 @@ func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) {
|
||||||
neededFields.add(fr.fieldName)
|
neededFields.add(fr.fieldName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (fr *filterRegexp) getTokens() []string {
|
||||||
|
fr.tokensOnce.Do(fr.initTokens)
|
||||||
|
return fr.tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fr *filterRegexp) initTokens() {
|
||||||
|
literals := fr.re.GetLiterals()
|
||||||
|
for i, literal := range literals {
|
||||||
|
literals[i] = skipFirstLastToken(literal)
|
||||||
|
}
|
||||||
|
fr.tokens = tokenizeStrings(nil, literals)
|
||||||
|
}
|
||||||
|
|
||||||
|
func skipFirstLastToken(s string) string {
|
||||||
|
for {
|
||||||
|
r, runeSize := utf8.DecodeRuneInString(s)
|
||||||
|
if !isTokenRune(r) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s = s[runeSize:]
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
r, runeSize := utf8.DecodeLastRuneInString(s)
|
||||||
|
if !isTokenRune(r) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s = s[:len(s)-runeSize]
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
func (fr *filterRegexp) applyToBlockResult(br *blockResult, bm *bitmap) {
|
func (fr *filterRegexp) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||||
re := fr.re
|
re := fr.re
|
||||||
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
|
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
|
||||||
|
@ -53,31 +89,37 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tokens := fr.getTokens()
|
||||||
|
|
||||||
switch ch.valueType {
|
switch ch.valueType {
|
||||||
case valueTypeString:
|
case valueTypeString:
|
||||||
matchStringByRegexp(bs, ch, bm, re)
|
matchStringByRegexp(bs, ch, bm, re, tokens)
|
||||||
case valueTypeDict:
|
case valueTypeDict:
|
||||||
matchValuesDictByRegexp(bs, ch, bm, re)
|
matchValuesDictByRegexp(bs, ch, bm, re)
|
||||||
case valueTypeUint8:
|
case valueTypeUint8:
|
||||||
matchUint8ByRegexp(bs, ch, bm, re)
|
matchUint8ByRegexp(bs, ch, bm, re, tokens)
|
||||||
case valueTypeUint16:
|
case valueTypeUint16:
|
||||||
matchUint16ByRegexp(bs, ch, bm, re)
|
matchUint16ByRegexp(bs, ch, bm, re, tokens)
|
||||||
case valueTypeUint32:
|
case valueTypeUint32:
|
||||||
matchUint32ByRegexp(bs, ch, bm, re)
|
matchUint32ByRegexp(bs, ch, bm, re, tokens)
|
||||||
case valueTypeUint64:
|
case valueTypeUint64:
|
||||||
matchUint64ByRegexp(bs, ch, bm, re)
|
matchUint64ByRegexp(bs, ch, bm, re, tokens)
|
||||||
case valueTypeFloat64:
|
case valueTypeFloat64:
|
||||||
matchFloat64ByRegexp(bs, ch, bm, re)
|
matchFloat64ByRegexp(bs, ch, bm, re, tokens)
|
||||||
case valueTypeIPv4:
|
case valueTypeIPv4:
|
||||||
matchIPv4ByRegexp(bs, ch, bm, re)
|
matchIPv4ByRegexp(bs, ch, bm, re, tokens)
|
||||||
case valueTypeTimestampISO8601:
|
case valueTypeTimestampISO8601:
|
||||||
matchTimestampISO8601ByRegexp(bs, ch, bm, re)
|
matchTimestampISO8601ByRegexp(bs, ch, bm, re, tokens)
|
||||||
default:
|
default:
|
||||||
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
|
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||||
|
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||||
|
bm.resetBits()
|
||||||
|
return
|
||||||
|
}
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
visitValues(bs, ch, bm, func(v string) bool {
|
visitValues(bs, ch, bm, func(v string) bool {
|
||||||
s := toTimestampISO8601String(bs, bb, v)
|
s := toTimestampISO8601String(bs, bb, v)
|
||||||
|
@ -86,7 +128,11 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
|
||||||
bbPool.Put(bb)
|
bbPool.Put(bb)
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||||
|
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||||
|
bm.resetBits()
|
||||||
|
return
|
||||||
|
}
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
visitValues(bs, ch, bm, func(v string) bool {
|
visitValues(bs, ch, bm, func(v string) bool {
|
||||||
s := toIPv4String(bs, bb, v)
|
s := toIPv4String(bs, bb, v)
|
||||||
|
@ -95,7 +141,11 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexu
|
||||||
bbPool.Put(bb)
|
bbPool.Put(bb)
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||||
|
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||||
|
bm.resetBits()
|
||||||
|
return
|
||||||
|
}
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
visitValues(bs, ch, bm, func(v string) bool {
|
visitValues(bs, ch, bm, func(v string) bool {
|
||||||
s := toFloat64String(bs, bb, v)
|
s := toFloat64String(bs, bb, v)
|
||||||
|
@ -117,13 +167,21 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *
|
||||||
bbPool.Put(bb)
|
bbPool.Put(bb)
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||||
|
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||||
|
bm.resetBits()
|
||||||
|
return
|
||||||
|
}
|
||||||
visitValues(bs, ch, bm, func(v string) bool {
|
visitValues(bs, ch, bm, func(v string) bool {
|
||||||
return re.MatchString(v)
|
return re.MatchString(v)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||||
|
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||||
|
bm.resetBits()
|
||||||
|
return
|
||||||
|
}
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
visitValues(bs, ch, bm, func(v string) bool {
|
visitValues(bs, ch, bm, func(v string) bool {
|
||||||
s := toUint8String(bs, bb, v)
|
s := toUint8String(bs, bb, v)
|
||||||
|
@ -132,7 +190,11 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex
|
||||||
bbPool.Put(bb)
|
bbPool.Put(bb)
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||||
|
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||||
|
bm.resetBits()
|
||||||
|
return
|
||||||
|
}
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
visitValues(bs, ch, bm, func(v string) bool {
|
visitValues(bs, ch, bm, func(v string) bool {
|
||||||
s := toUint16String(bs, bb, v)
|
s := toUint16String(bs, bb, v)
|
||||||
|
@ -141,7 +203,11 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
|
||||||
bbPool.Put(bb)
|
bbPool.Put(bb)
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||||
|
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||||
|
bm.resetBits()
|
||||||
|
return
|
||||||
|
}
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
visitValues(bs, ch, bm, func(v string) bool {
|
visitValues(bs, ch, bm, func(v string) bool {
|
||||||
s := toUint32String(bs, bb, v)
|
s := toUint32String(bs, bb, v)
|
||||||
|
@ -150,7 +216,11 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
|
||||||
bbPool.Put(bb)
|
bbPool.Put(bb)
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||||
|
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||||
|
bm.resetBits()
|
||||||
|
return
|
||||||
|
}
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
visitValues(bs, ch, bm, func(v string) bool {
|
visitValues(bs, ch, bm, func(v string) bool {
|
||||||
s := toUint64String(bs, bb, v)
|
s := toUint64String(bs, bb, v)
|
||||||
|
|
|
@ -370,6 +370,25 @@ func TestFilterRegexp(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSkipFirstLastToken(t *testing.T) {
|
||||||
|
f := func(s, resultExpected string) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
result := skipFirstLastToken(s)
|
||||||
|
if result != resultExpected {
|
||||||
|
t.Fatalf("unexpected result in skipFirstLastToken(%q); got %q; want %q", s, result, resultExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f("", "")
|
||||||
|
f("foobar", "")
|
||||||
|
f("foo bar", " ")
|
||||||
|
f("foo bar baz", " bar ")
|
||||||
|
f(" foo bar baz", " foo bar ")
|
||||||
|
f(",foo bar baz!", ",foo bar baz!")
|
||||||
|
f("фыад длоа д!", " длоа д!")
|
||||||
|
}
|
||||||
|
|
||||||
func mustCompileRegex(expr string) *regexutil.Regex {
|
func mustCompileRegex(expr string) *regexutil.Regex {
|
||||||
re, err := regexutil.NewRegex(expr)
|
re, err := regexutil.NewRegex(expr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -94,6 +94,32 @@ func (r *Regex) MatchString(s string) bool {
|
||||||
return r.matchStringWithPrefix(s)
|
return r.matchStringWithPrefix(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetLiterals returns literals for r.
|
||||||
|
func (r *Regex) GetLiterals() []string {
|
||||||
|
sre := mustParseRegexp(r.exprStr)
|
||||||
|
for sre.Op == syntax.OpCapture {
|
||||||
|
sre = sre.Sub[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
v, ok := getLiteral(sre)
|
||||||
|
if ok {
|
||||||
|
return []string{v}
|
||||||
|
}
|
||||||
|
|
||||||
|
if sre.Op != syntax.OpConcat {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var a []string
|
||||||
|
for _, sub := range sre.Sub {
|
||||||
|
v, ok := getLiteral(sub)
|
||||||
|
if ok {
|
||||||
|
a = append(a, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
|
||||||
// String returns string represetnation for r
|
// String returns string represetnation for r
|
||||||
func (r *Regex) String() string {
|
func (r *Regex) String() string {
|
||||||
return r.exprStr
|
return r.exprStr
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package regexutil
|
package regexutil
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -144,3 +145,27 @@ func TestRegexMatchString(t *testing.T) {
|
||||||
f("foo(bar|baz)", "a fooxfooban a", false)
|
f("foo(bar|baz)", "a fooxfooban a", false)
|
||||||
f("foo(bar|baz)", "a fooxfooban foobar a", true)
|
f("foo(bar|baz)", "a fooxfooban foobar a", true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetLiterals(t *testing.T) {
|
||||||
|
f := func(expr string, literalsExpected []string) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
r, err := NewRegex(expr)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("cannot parse %q: %s", expr, err)
|
||||||
|
}
|
||||||
|
literals := r.GetLiterals()
|
||||||
|
if !reflect.DeepEqual(literals, literalsExpected) {
|
||||||
|
t.Fatalf("unexpected literals; got %q; want %q", literals, literalsExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f("", nil)
|
||||||
|
f("foo bar baz", []string{"foo bar baz"})
|
||||||
|
f("foo.*bar(a|b)baz.+", []string{"foo", "bar", "baz"})
|
||||||
|
f("(foo[ab](?:bar))", []string{"foo", "bar"})
|
||||||
|
f("foo|bar", nil)
|
||||||
|
f("((foo|bar)baz xxx(?:yzabc))", []string{"baz xxxyzabc"})
|
||||||
|
f("((foo|bar)baz xxx(?:yzabc)*)", []string{"baz xxx"})
|
||||||
|
f("((foo|bar)baz? xxx(?:yzabc)*)", []string{"ba", " xxx"})
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue