mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
892afe25d1
commit
03b9d7977d
4 changed files with 156 additions and 16 deletions
|
@ -2,6 +2,8 @@ package logstorage
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
|
||||
|
@ -13,6 +15,9 @@ import (
|
|||
type filterRegexp struct {
|
||||
fieldName string
|
||||
re *regexutil.Regex
|
||||
|
||||
tokens []string
|
||||
tokensOnce sync.Once
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) String() string {
|
||||
|
@ -23,6 +28,37 @@ func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) {
|
|||
neededFields.add(fr.fieldName)
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) getTokens() []string {
|
||||
fr.tokensOnce.Do(fr.initTokens)
|
||||
return fr.tokens
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) initTokens() {
|
||||
literals := fr.re.GetLiterals()
|
||||
for i, literal := range literals {
|
||||
literals[i] = skipFirstLastToken(literal)
|
||||
}
|
||||
fr.tokens = tokenizeStrings(nil, literals)
|
||||
}
|
||||
|
||||
func skipFirstLastToken(s string) string {
|
||||
for {
|
||||
r, runeSize := utf8.DecodeRuneInString(s)
|
||||
if !isTokenRune(r) {
|
||||
break
|
||||
}
|
||||
s = s[runeSize:]
|
||||
}
|
||||
for {
|
||||
r, runeSize := utf8.DecodeLastRuneInString(s)
|
||||
if !isTokenRune(r) {
|
||||
break
|
||||
}
|
||||
s = s[:len(s)-runeSize]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
re := fr.re
|
||||
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
|
||||
|
@ -53,31 +89,37 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
return
|
||||
}
|
||||
|
||||
tokens := fr.getTokens()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
matchStringByRegexp(bs, ch, bm, re)
|
||||
matchStringByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeDict:
|
||||
matchValuesDictByRegexp(bs, ch, bm, re)
|
||||
case valueTypeUint8:
|
||||
matchUint8ByRegexp(bs, ch, bm, re)
|
||||
matchUint8ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeUint16:
|
||||
matchUint16ByRegexp(bs, ch, bm, re)
|
||||
matchUint16ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeUint32:
|
||||
matchUint32ByRegexp(bs, ch, bm, re)
|
||||
matchUint32ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeUint64:
|
||||
matchUint64ByRegexp(bs, ch, bm, re)
|
||||
matchUint64ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeFloat64:
|
||||
matchFloat64ByRegexp(bs, ch, bm, re)
|
||||
matchFloat64ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeIPv4:
|
||||
matchIPv4ByRegexp(bs, ch, bm, re)
|
||||
matchIPv4ByRegexp(bs, ch, bm, re, tokens)
|
||||
case valueTypeTimestampISO8601:
|
||||
matchTimestampISO8601ByRegexp(bs, ch, bm, re)
|
||||
matchTimestampISO8601ByRegexp(bs, ch, bm, re, tokens)
|
||||
default:
|
||||
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
||||
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toTimestampISO8601String(bs, bb, v)
|
||||
|
@ -86,7 +128,11 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
||||
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toIPv4String(bs, bb, v)
|
||||
|
@ -95,7 +141,11 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexu
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
||||
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toFloat64String(bs, bb, v)
|
||||
|
@ -117,13 +167,21 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
||||
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
return re.MatchString(v)
|
||||
})
|
||||
}
|
||||
|
||||
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
||||
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toUint8String(bs, bb, v)
|
||||
|
@ -132,7 +190,11 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
||||
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toUint16String(bs, bb, v)
|
||||
|
@ -141,7 +203,11 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
||||
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toUint32String(bs, bb, v)
|
||||
|
@ -150,7 +216,11 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
|
|||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex) {
|
||||
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toUint64String(bs, bb, v)
|
||||
|
|
|
@ -370,6 +370,25 @@ func TestFilterRegexp(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestSkipFirstLastToken(t *testing.T) {
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
|
||||
result := skipFirstLastToken(s)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result in skipFirstLastToken(%q); got %q; want %q", s, result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("", "")
|
||||
f("foobar", "")
|
||||
f("foo bar", " ")
|
||||
f("foo bar baz", " bar ")
|
||||
f(" foo bar baz", " foo bar ")
|
||||
f(",foo bar baz!", ",foo bar baz!")
|
||||
f("фыад длоа д!", " длоа д!")
|
||||
}
|
||||
|
||||
func mustCompileRegex(expr string) *regexutil.Regex {
|
||||
re, err := regexutil.NewRegex(expr)
|
||||
if err != nil {
|
||||
|
|
|
@ -94,6 +94,32 @@ func (r *Regex) MatchString(s string) bool {
|
|||
return r.matchStringWithPrefix(s)
|
||||
}
|
||||
|
||||
// GetLiterals returns literals for r.
|
||||
func (r *Regex) GetLiterals() []string {
|
||||
sre := mustParseRegexp(r.exprStr)
|
||||
for sre.Op == syntax.OpCapture {
|
||||
sre = sre.Sub[0]
|
||||
}
|
||||
|
||||
v, ok := getLiteral(sre)
|
||||
if ok {
|
||||
return []string{v}
|
||||
}
|
||||
|
||||
if sre.Op != syntax.OpConcat {
|
||||
return nil
|
||||
}
|
||||
|
||||
var a []string
|
||||
for _, sub := range sre.Sub {
|
||||
v, ok := getLiteral(sub)
|
||||
if ok {
|
||||
a = append(a, v)
|
||||
}
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// String returns string represetnation for r
|
||||
func (r *Regex) String() string {
|
||||
return r.exprStr
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package regexutil
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -144,3 +145,27 @@ func TestRegexMatchString(t *testing.T) {
|
|||
f("foo(bar|baz)", "a fooxfooban a", false)
|
||||
f("foo(bar|baz)", "a fooxfooban foobar a", true)
|
||||
}
|
||||
|
||||
func TestGetLiterals(t *testing.T) {
|
||||
f := func(expr string, literalsExpected []string) {
|
||||
t.Helper()
|
||||
|
||||
r, err := NewRegex(expr)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", expr, err)
|
||||
}
|
||||
literals := r.GetLiterals()
|
||||
if !reflect.DeepEqual(literals, literalsExpected) {
|
||||
t.Fatalf("unexpected literals; got %q; want %q", literals, literalsExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("", nil)
|
||||
f("foo bar baz", []string{"foo bar baz"})
|
||||
f("foo.*bar(a|b)baz.+", []string{"foo", "bar", "baz"})
|
||||
f("(foo[ab](?:bar))", []string{"foo", "bar"})
|
||||
f("foo|bar", nil)
|
||||
f("((foo|bar)baz xxx(?:yzabc))", []string{"baz xxxyzabc"})
|
||||
f("((foo|bar)baz xxx(?:yzabc)*)", []string{"baz xxx"})
|
||||
f("((foo|bar)baz? xxx(?:yzabc)*)", []string{"ba", " xxx"})
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue