mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-02-09 15:27:11 +00:00
lib/regexutil: cache MatchString results for unoptimized regexps
This increases relabeling performance by 3x for unoptimized regexs
This commit is contained in:
parent
e220bc3cd5
commit
17289ff481
6 changed files with 132 additions and 20 deletions
48
lib/bytesutil/fast_string_matcher.go
Normal file
48
lib/bytesutil/fast_string_matcher.go
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
package bytesutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FastStringMatcher implements fast matcher for strings.
|
||||||
|
//
|
||||||
|
// It caches string match results and returns them back on the next calls
|
||||||
|
// without calling the matchFunc, which may be expensive.
|
||||||
|
type FastStringMatcher struct {
|
||||||
|
m atomic.Value
|
||||||
|
mLen uint64
|
||||||
|
|
||||||
|
matchFunc func(s string) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFastStringMatcher creates new matcher, which applies matchFunc to strings passed to Match()
|
||||||
|
//
|
||||||
|
// matchFunc must return the same result for the same input.
|
||||||
|
func NewFastStringMatcher(matchFunc func(s string) bool) *FastStringMatcher {
|
||||||
|
var fsm FastStringMatcher
|
||||||
|
fsm.m.Store(&sync.Map{})
|
||||||
|
fsm.matchFunc = matchFunc
|
||||||
|
return &fsm
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match applies matchFunc to s and returns the result.
|
||||||
|
func (fsm *FastStringMatcher) Match(s string) bool {
|
||||||
|
m := fsm.m.Load().(*sync.Map)
|
||||||
|
v, ok := m.Load(s)
|
||||||
|
if ok {
|
||||||
|
// Fast path - s match result is found in the cache.
|
||||||
|
bp := v.(*bool)
|
||||||
|
return *bp
|
||||||
|
}
|
||||||
|
// Slow path - run matchFunc for s and store the result in the cache.
|
||||||
|
b := fsm.matchFunc(s)
|
||||||
|
bp := &b
|
||||||
|
m.Store(s, bp)
|
||||||
|
n := atomic.AddUint64(&fsm.mLen, 1)
|
||||||
|
if n > 100e3 {
|
||||||
|
atomic.StoreUint64(&fsm.mLen, 0)
|
||||||
|
fsm.m.Store(&sync.Map{})
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
25
lib/bytesutil/fast_string_matcher_test.go
Normal file
25
lib/bytesutil/fast_string_matcher_test.go
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
package bytesutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFastStringMatcher(t *testing.T) {
|
||||||
|
fsm := NewFastStringMatcher(func(s string) bool {
|
||||||
|
return strings.HasPrefix(s, "foo")
|
||||||
|
})
|
||||||
|
f := func(s string, resultExpected bool) {
|
||||||
|
t.Helper()
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
result := fsm.Match(s)
|
||||||
|
if result != resultExpected {
|
||||||
|
t.Fatalf("unexpected result for Match(%q) at iteration %d; got %v; want %v", s, i, result, resultExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f("", false)
|
||||||
|
f("foo", true)
|
||||||
|
f("a_b-C", false)
|
||||||
|
f("foobar", true)
|
||||||
|
}
|
33
lib/bytesutil/fast_string_matcher_timing_test.go
Normal file
33
lib/bytesutil/fast_string_matcher_timing_test.go
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
package bytesutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkFastStringMatcher(b *testing.B) {
|
||||||
|
for _, s := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} {
|
||||||
|
b.Run(s, func(b *testing.B) {
|
||||||
|
benchmarkFastStringMatcher(b, s)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkFastStringMatcher(b *testing.B, s string) {
|
||||||
|
fsm := NewFastStringMatcher(func(s string) bool {
|
||||||
|
return strings.HasPrefix(s, "foo")
|
||||||
|
})
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.SetBytes(1)
|
||||||
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
n := uint64(0)
|
||||||
|
for pb.Next() {
|
||||||
|
v := fsm.Match(s)
|
||||||
|
if v {
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
atomic.AddUint64(&GlobalSink, n)
|
||||||
|
})
|
||||||
|
}
|
|
@ -2,6 +2,7 @@ package bytesutil
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -18,11 +19,13 @@ func benchmarkFastStringTransformer(b *testing.B, s string) {
|
||||||
b.ReportAllocs()
|
b.ReportAllocs()
|
||||||
b.SetBytes(1)
|
b.SetBytes(1)
|
||||||
b.RunParallel(func(pb *testing.PB) {
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
n := uint64(0)
|
||||||
for pb.Next() {
|
for pb.Next() {
|
||||||
sTransformed := fst.Transform(s)
|
sTransformed := fst.Transform(s)
|
||||||
GlobalSink += len(sTransformed)
|
n += uint64(len(sTransformed))
|
||||||
}
|
}
|
||||||
|
atomic.AddUint64(&GlobalSink, n)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
var GlobalSink int
|
var GlobalSink uint64
|
||||||
|
|
|
@ -674,8 +674,9 @@ scrape_config_files:
|
||||||
}
|
}
|
||||||
|
|
||||||
func resetNonEssentialFields(sws []*ScrapeWork) {
|
func resetNonEssentialFields(sws []*ScrapeWork) {
|
||||||
for i := range sws {
|
for _, sw := range sws {
|
||||||
sws[i].OriginalLabels = nil
|
sw.OriginalLabels = nil
|
||||||
|
sw.MetricRelabelConfigs = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1446,10 +1447,6 @@ scrape_configs:
|
||||||
},
|
},
|
||||||
AuthConfig: &promauth.Config{},
|
AuthConfig: &promauth.Config{},
|
||||||
ProxyAuthConfig: &promauth.Config{},
|
ProxyAuthConfig: &promauth.Config{},
|
||||||
MetricRelabelConfigs: mustParseRelabelConfigs(`
|
|
||||||
- source_labels: [foo]
|
|
||||||
target_label: abc
|
|
||||||
`),
|
|
||||||
jobNameOriginal: "foo",
|
jobNameOriginal: "foo",
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
@ -1847,8 +1844,10 @@ func TestScrapeConfigClone(t *testing.T) {
|
||||||
f := func(sc *ScrapeConfig) {
|
f := func(sc *ScrapeConfig) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
scCopy := sc.clone()
|
scCopy := sc.clone()
|
||||||
if !reflect.DeepEqual(sc, scCopy) {
|
scJSON := sc.marshalJSON()
|
||||||
t.Fatalf("unexpected result after unmarshalJSON() for JSON:\n%s", sc.marshalJSON())
|
scCopyJSON := scCopy.marshalJSON()
|
||||||
|
if !reflect.DeepEqual(scJSON, scCopyJSON) {
|
||||||
|
t.Fatalf("unexpected cloned result:\ngot\n%s\nwant\n%s", scCopyJSON, scJSON)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,8 @@ package regexutil
|
||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
// PromRegex implements an optimized string matching for Prometheus-like regex.
|
// PromRegex implements an optimized string matching for Prometheus-like regex.
|
||||||
|
@ -13,6 +15,8 @@ import (
|
||||||
// - alternate strings such as "foo|bar|baz"
|
// - alternate strings such as "foo|bar|baz"
|
||||||
// - prefix match such as "foo.*" or "foo.+"
|
// - prefix match such as "foo.*" or "foo.+"
|
||||||
// - substring match such as ".*foo.*" or ".+bar.+"
|
// - substring match such as ".*foo.*" or ".+bar.+"
|
||||||
|
//
|
||||||
|
// The rest of regexps are also optimized by returning cached match results for the same input strings.
|
||||||
type PromRegex struct {
|
type PromRegex struct {
|
||||||
// prefix contains literal prefix for regex.
|
// prefix contains literal prefix for regex.
|
||||||
// For example, prefix="foo" for regex="foo(a|b)"
|
// For example, prefix="foo" for regex="foo(a|b)"
|
||||||
|
@ -32,9 +36,8 @@ type PromRegex struct {
|
||||||
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
|
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
|
||||||
orValues []string
|
orValues []string
|
||||||
|
|
||||||
// reSuffix contains an anchored regexp built from suffix:
|
// reSuffixMatcher contains fast matcher for "^suffix$"
|
||||||
// "^(?:suffix)$"
|
reSuffixMatcher *bytesutil.FastStringMatcher
|
||||||
reSuffix *regexp.Regexp
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewPromRegex returns PromRegex for the given expr.
|
// NewPromRegex returns PromRegex for the given expr.
|
||||||
|
@ -50,13 +53,14 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
||||||
// Anchor suffix to the beginning and the end of the matching string.
|
// Anchor suffix to the beginning and the end of the matching string.
|
||||||
suffixExpr := "^(?:" + suffix + ")$"
|
suffixExpr := "^(?:" + suffix + ")$"
|
||||||
reSuffix := regexp.MustCompile(suffixExpr)
|
reSuffix := regexp.MustCompile(suffixExpr)
|
||||||
|
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
|
||||||
pr := &PromRegex{
|
pr := &PromRegex{
|
||||||
prefix: prefix,
|
prefix: prefix,
|
||||||
suffix: suffix,
|
suffix: suffix,
|
||||||
substrDotStar: substrDotStar,
|
substrDotStar: substrDotStar,
|
||||||
substrDotPlus: substrDotPlus,
|
substrDotPlus: substrDotPlus,
|
||||||
orValues: orValues,
|
orValues: orValues,
|
||||||
reSuffix: reSuffix,
|
reSuffixMatcher: reSuffixMatcher,
|
||||||
}
|
}
|
||||||
return pr, nil
|
return pr, nil
|
||||||
}
|
}
|
||||||
|
@ -106,7 +110,7 @@ func (pr *PromRegex) MatchString(s string) bool {
|
||||||
return len(s) > 0
|
return len(s) > 0
|
||||||
}
|
}
|
||||||
// Fall back to slow path by matching the original regexp.
|
// Fall back to slow path by matching the original regexp.
|
||||||
return pr.reSuffix.MatchString(s)
|
return pr.reSuffixMatcher.Match(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getSubstringLiteral(expr, prefixSuffix string) string {
|
func getSubstringLiteral(expr, prefixSuffix string) string {
|
||||||
|
|
Loading…
Reference in a new issue