mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
evaluate the execution cost of all tag filters (#824)
* evaluate the execution cost of all tag filters * fix suffixes typo
This commit is contained in:
parent
35791d9b29
commit
8ddf089deb
3 changed files with 251 additions and 23 deletions
|
@ -2186,7 +2186,7 @@ func (is *indexSearch) getMetricIDsForTagFilter(tf *tagFilter, filter *uint64set
|
|||
}
|
||||
metricIDs := &uint64set.Set{}
|
||||
if len(tf.orSuffixes) > 0 {
|
||||
// Fast path for orSuffixes - seek for rows for each value from orSuffxies.
|
||||
// Fast path for orSuffixes - seek for rows for each value from orSuffixes.
|
||||
if err := is.updateMetricIDsForOrSuffixesNoFilter(tf, maxMetrics, metricIDs); err != nil {
|
||||
if err == errFallbackToMetricNameMatch {
|
||||
return nil, err
|
||||
|
@ -2596,6 +2596,7 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
|
|||
// This way we limit the amount of work below by applying more specific filters at first.
|
||||
type tagFilterWithCount struct {
|
||||
tf *tagFilter
|
||||
cost uint64
|
||||
count uint64
|
||||
}
|
||||
tfsWithCount := make([]tagFilterWithCount, len(tfs.tfs))
|
||||
|
@ -2611,13 +2612,14 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
|
|||
}
|
||||
tfsWithCount[i] = tagFilterWithCount{
|
||||
tf: tf,
|
||||
cost: count * tf.matchCost,
|
||||
count: count,
|
||||
}
|
||||
}
|
||||
sort.Slice(tfsWithCount, func(i, j int) bool {
|
||||
a, b := &tfsWithCount[i], &tfsWithCount[j]
|
||||
if a.count != b.count {
|
||||
return a.count < b.count
|
||||
if a.cost != b.cost {
|
||||
return a.cost < b.cost
|
||||
}
|
||||
return a.tf.Less(b.tf)
|
||||
})
|
||||
|
|
|
@ -165,6 +165,7 @@ type tagFilter struct {
|
|||
value []byte
|
||||
isNegative bool
|
||||
isRegexp bool
|
||||
matchCost uint64
|
||||
|
||||
// Prefix always contains {nsPrefixTagToMetricIDs, AccountID, ProjectID, key}.
|
||||
// Additionally it contains:
|
||||
|
@ -285,6 +286,7 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp
|
|||
// during the search for matching metricIDs.
|
||||
tf.orSuffixes = append(tf.orSuffixes[:0], "")
|
||||
tf.isEmptyMatch = len(prefix) == 0
|
||||
tf.matchCost = defaultCost
|
||||
return nil
|
||||
}
|
||||
rcv, err := getRegexpFromCache(expr)
|
||||
|
@ -293,6 +295,7 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp
|
|||
}
|
||||
tf.orSuffixes = append(tf.orSuffixes[:0], rcv.orValues...)
|
||||
tf.reSuffixMatch = rcv.reMatch
|
||||
tf.matchCost = rcv.reCost
|
||||
tf.isEmptyMatch = len(prefix) == 0 && tf.reSuffixMatch(nil)
|
||||
if !tf.isNegative && len(key) == 0 && strings.IndexByte(rcv.literalSuffix, '.') >= 0 {
|
||||
// Reverse suffix is needed only for non-negative regexp filters on __name__ that contains dots.
|
||||
|
@ -357,6 +360,7 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) {
|
|||
sExpr := string(expr)
|
||||
orValues := getOrValues(sExpr)
|
||||
var reMatch func(b []byte) bool
|
||||
var reCost uint64
|
||||
var literalSuffix string
|
||||
if len(orValues) > 0 {
|
||||
if len(orValues) == 1 {
|
||||
|
@ -364,6 +368,7 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) {
|
|||
reMatch = func(b []byte) bool {
|
||||
return string(b) == v
|
||||
}
|
||||
reCost = defaultLiteralCost
|
||||
} else {
|
||||
reMatch = func(b []byte) bool {
|
||||
for _, v := range orValues {
|
||||
|
@ -373,14 +378,16 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) {
|
|||
}
|
||||
return false
|
||||
}
|
||||
reCost = uint64(len(orValues)) * defaultLiteralCost
|
||||
}
|
||||
} else {
|
||||
reMatch, literalSuffix = getOptimizedReMatchFunc(re.Match, sExpr)
|
||||
reMatch, literalSuffix, reCost = getOptimizedReMatchFunc(re.Match, sExpr)
|
||||
}
|
||||
|
||||
// Put the reMatch in the cache.
|
||||
rcv.orValues = orValues
|
||||
rcv.reMatch = reMatch
|
||||
rcv.reCost = reCost
|
||||
rcv.literalSuffix = literalSuffix
|
||||
|
||||
regexpCacheLock.Lock()
|
||||
|
@ -415,32 +422,40 @@ func getRegexpFromCache(expr []byte) (regexpCacheValue, error) {
|
|||
// It returns reMatch if it cannot find optimized function.
|
||||
//
|
||||
// It also returns literal suffix from the expr.
|
||||
func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) (func(b []byte) bool, string) {
|
||||
func getOptimizedReMatchFunc(reMatch func(b []byte) bool, expr string) (func(b []byte) bool, string, uint64) {
|
||||
sre, err := syntax.Parse(expr, syntax.Perl)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err)
|
||||
}
|
||||
if matchFunc, literalSuffix := getOptimizedReMatchFuncExt(reMatch, sre); matchFunc != nil {
|
||||
if matchFunc, literalSuffix, reCost := getOptimizedReMatchFuncExt(reMatch, sre); matchFunc != nil {
|
||||
// Found optimized function for matching the expr.
|
||||
suffixUnescaped := tagCharsReverseRegexpEscaper.Replace(literalSuffix)
|
||||
return matchFunc, suffixUnescaped
|
||||
return matchFunc, suffixUnescaped, reCost
|
||||
}
|
||||
// Fall back to un-optimized reMatch.
|
||||
return reMatch, ""
|
||||
return reMatch, "", defaultReCost
|
||||
}
|
||||
|
||||
func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) (func(b []byte) bool, string) {
|
||||
// The following & default cost values are returned from BenchmarkOptimizedReMatchCost
|
||||
|
||||
var (
|
||||
defaultCost uint64 = 1
|
||||
defaultLiteralCost uint64 = 3
|
||||
defaultReCost uint64 = 140
|
||||
)
|
||||
|
||||
func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp) (func(b []byte) bool, string, uint64) {
|
||||
if isDotStar(sre) {
|
||||
// '.*'
|
||||
return func(b []byte) bool {
|
||||
return true
|
||||
}, ""
|
||||
}, "", 1
|
||||
}
|
||||
if isDotPlus(sre) {
|
||||
// '.+'
|
||||
return func(b []byte) bool {
|
||||
return len(b) > 0
|
||||
}, ""
|
||||
}, "", 1
|
||||
}
|
||||
switch sre.Op {
|
||||
case syntax.OpCapture:
|
||||
|
@ -448,13 +463,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
|
|||
return getOptimizedReMatchFuncExt(reMatch, sre.Sub[0])
|
||||
case syntax.OpLiteral:
|
||||
if !isLiteral(sre) {
|
||||
return nil, ""
|
||||
return nil, "", 0
|
||||
}
|
||||
s := string(sre.Rune)
|
||||
// Literal match
|
||||
return func(b []byte) bool {
|
||||
return string(b) == s
|
||||
}, s
|
||||
}, s, defaultLiteralCost
|
||||
case syntax.OpConcat:
|
||||
if len(sre.Sub) == 2 {
|
||||
if isLiteral(sre.Sub[0]) {
|
||||
|
@ -463,13 +478,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
|
|||
// 'prefix.*'
|
||||
return func(b []byte) bool {
|
||||
return bytes.HasPrefix(b, prefix)
|
||||
}, ""
|
||||
}, "", 2
|
||||
}
|
||||
if isDotPlus(sre.Sub[1]) {
|
||||
// 'prefix.+'
|
||||
return func(b []byte) bool {
|
||||
return len(b) > len(prefix) && bytes.HasPrefix(b, prefix)
|
||||
}, ""
|
||||
}, "", 2
|
||||
}
|
||||
}
|
||||
if isLiteral(sre.Sub[1]) {
|
||||
|
@ -478,13 +493,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
|
|||
// '.*suffix'
|
||||
return func(b []byte) bool {
|
||||
return bytes.HasSuffix(b, suffix)
|
||||
}, string(suffix)
|
||||
}, string(suffix), 3
|
||||
}
|
||||
if isDotPlus(sre.Sub[0]) {
|
||||
// '.+suffix'
|
||||
return func(b []byte) bool {
|
||||
return len(b) > len(suffix) && bytes.HasSuffix(b[1:], suffix)
|
||||
}, string(suffix)
|
||||
}, string(suffix), 3
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -495,13 +510,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
|
|||
// '.*middle.*'
|
||||
return func(b []byte) bool {
|
||||
return bytes.Contains(b, middle)
|
||||
}, ""
|
||||
}, "", 5
|
||||
}
|
||||
if isDotPlus(sre.Sub[2]) {
|
||||
// '.*middle.+'
|
||||
return func(b []byte) bool {
|
||||
return len(b) > len(middle) && bytes.Contains(b[:len(b)-1], middle)
|
||||
}, ""
|
||||
}, "", 5
|
||||
}
|
||||
}
|
||||
if isDotPlus(sre.Sub[0]) {
|
||||
|
@ -509,13 +524,13 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
|
|||
// '.+middle.*'
|
||||
return func(b []byte) bool {
|
||||
return len(b) > len(middle) && bytes.Contains(b[1:], middle)
|
||||
}, ""
|
||||
}, "", 5
|
||||
}
|
||||
if isDotPlus(sre.Sub[2]) {
|
||||
// '.+middle.+'
|
||||
return func(b []byte) bool {
|
||||
return len(b) > len(middle)+1 && bytes.Contains(b[1:len(b)-1], middle)
|
||||
}, ""
|
||||
}, "", 5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -549,9 +564,9 @@ func getOptimizedReMatchFuncExt(reMatch func(b []byte) bool, sre *syntax.Regexp)
|
|||
}
|
||||
// Fall back to slow path.
|
||||
return reMatch(bOrig)
|
||||
}, string(suffix)
|
||||
}, string(suffix), defaultReCost
|
||||
default:
|
||||
return nil, ""
|
||||
return nil, "", 0
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -738,6 +753,7 @@ var (
|
|||
type regexpCacheValue struct {
|
||||
orValues []string
|
||||
reMatch func(b []byte) bool
|
||||
reCost uint64
|
||||
literalSuffix string
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package storage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
|
@ -307,3 +309,211 @@ func BenchmarkTagFilterMatchSuffix(b *testing.B) {
|
|||
})
|
||||
})
|
||||
}
|
||||
|
||||
// Run the following command to get the execution cost of all matches
|
||||
//
|
||||
// go test -run=none -bench=BenchmarkOptimizedReMatchCost -count 20 | tee cost.txt
|
||||
// benchstat ./cost.txt
|
||||
//
|
||||
// Calculate the multiplier of default for each match overhead.
|
||||
|
||||
func BenchmarkOptimizedReMatchCost(b *testing.B) {
|
||||
b.Run("default", func(b *testing.B) {
|
||||
reMatch := func(b []byte) bool {
|
||||
return len(b) == 0
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run("literal match", func(b *testing.B) {
|
||||
s := "foo1.bar.baz.sss.ddd"
|
||||
reMatch := func(b []byte) bool {
|
||||
return string(b) == s
|
||||
}
|
||||
suffix := []byte(s)
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run("foo|bar|baz", func(b *testing.B) {
|
||||
s := []string{"foo", "bar", "baz"}
|
||||
reMatch := func(b []byte) bool {
|
||||
for _, v := range s {
|
||||
if string(b) == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
suffix := []byte("ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run(".*", func(b *testing.B) {
|
||||
reMatch := func(b []byte) bool {
|
||||
return true
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run(".+", func(b *testing.B) {
|
||||
reMatch := func(b []byte) bool {
|
||||
return len(b) > 0
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run("prefix.*", func(b *testing.B) {
|
||||
s := []byte("foo1.bar")
|
||||
reMatch := func(b []byte) bool {
|
||||
return bytes.HasPrefix(b, s)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run("prefix.+", func(b *testing.B) {
|
||||
s := []byte("foo1.bar")
|
||||
reMatch := func(b []byte) bool {
|
||||
return len(b) > len(s) && bytes.HasPrefix(b, s)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run(".*suffix", func(b *testing.B) {
|
||||
s := []byte("sss.ddd")
|
||||
reMatch := func(b []byte) bool {
|
||||
return bytes.HasSuffix(b, s)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run(".+suffix", func(b *testing.B) {
|
||||
s := []byte("sss.ddd")
|
||||
reMatch := func(b []byte) bool {
|
||||
return len(b) > len(s) && bytes.HasSuffix(b[1:], s)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run(".*middle.*", func(b *testing.B) {
|
||||
s := []byte("bar.baz")
|
||||
reMatch := func(b []byte) bool {
|
||||
return bytes.Contains(b, s)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run(".*middle.+", func(b *testing.B) {
|
||||
s := []byte("bar.baz")
|
||||
reMatch := func(b []byte) bool {
|
||||
return len(b) > len(s) && bytes.Contains(b[:len(b)-1], s)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run(".+middle.*", func(b *testing.B) {
|
||||
s := []byte("bar.baz")
|
||||
reMatch := func(b []byte) bool {
|
||||
return len(b) > len(s) && bytes.Contains(b[1:], s)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run(".+middle.+", func(b *testing.B) {
|
||||
s := []byte("bar.baz")
|
||||
reMatch := func(b []byte) bool {
|
||||
return len(b) > len(s)+1 && bytes.Contains(b[1:len(b)-1], s)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
b.Run("default", func(b *testing.B) {
|
||||
re := regexp.MustCompile(`foo[^.]*?\.bar\.baz\.[^.]*?\.ddd`)
|
||||
reMatch := func(b []byte) bool {
|
||||
return re.Match(b)
|
||||
}
|
||||
suffix := []byte("foo1.bar.baz.sss.ddd")
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(1))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
reMatch(suffix)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue