mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib/regexutil: cache MatchString results for unoptimized regexps
This increases relabeling performance by 3x for unoptimized regexs
This commit is contained in:
parent
e220bc3cd5
commit
17289ff481
6 changed files with 132 additions and 20 deletions
48
lib/bytesutil/fast_string_matcher.go
Normal file
48
lib/bytesutil/fast_string_matcher.go
Normal file
|
@ -0,0 +1,48 @@
|
|||
package bytesutil
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// FastStringMatcher implements fast matcher for strings.
|
||||
//
|
||||
// It caches string match results and returns them back on the next calls
|
||||
// without calling the matchFunc, which may be expensive.
|
||||
type FastStringMatcher struct {
|
||||
m atomic.Value
|
||||
mLen uint64
|
||||
|
||||
matchFunc func(s string) bool
|
||||
}
|
||||
|
||||
// NewFastStringMatcher creates new matcher, which applies matchFunc to strings passed to Match()
|
||||
//
|
||||
// matchFunc must return the same result for the same input.
|
||||
func NewFastStringMatcher(matchFunc func(s string) bool) *FastStringMatcher {
|
||||
var fsm FastStringMatcher
|
||||
fsm.m.Store(&sync.Map{})
|
||||
fsm.matchFunc = matchFunc
|
||||
return &fsm
|
||||
}
|
||||
|
||||
// Match applies matchFunc to s and returns the result.
|
||||
func (fsm *FastStringMatcher) Match(s string) bool {
|
||||
m := fsm.m.Load().(*sync.Map)
|
||||
v, ok := m.Load(s)
|
||||
if ok {
|
||||
// Fast path - s match result is found in the cache.
|
||||
bp := v.(*bool)
|
||||
return *bp
|
||||
}
|
||||
// Slow path - run matchFunc for s and store the result in the cache.
|
||||
b := fsm.matchFunc(s)
|
||||
bp := &b
|
||||
m.Store(s, bp)
|
||||
n := atomic.AddUint64(&fsm.mLen, 1)
|
||||
if n > 100e3 {
|
||||
atomic.StoreUint64(&fsm.mLen, 0)
|
||||
fsm.m.Store(&sync.Map{})
|
||||
}
|
||||
return b
|
||||
}
|
25
lib/bytesutil/fast_string_matcher_test.go
Normal file
25
lib/bytesutil/fast_string_matcher_test.go
Normal file
|
@ -0,0 +1,25 @@
|
|||
package bytesutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFastStringMatcher(t *testing.T) {
|
||||
fsm := NewFastStringMatcher(func(s string) bool {
|
||||
return strings.HasPrefix(s, "foo")
|
||||
})
|
||||
f := func(s string, resultExpected bool) {
|
||||
t.Helper()
|
||||
for i := 0; i < 10; i++ {
|
||||
result := fsm.Match(s)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for Match(%q) at iteration %d; got %v; want %v", s, i, result, resultExpected)
|
||||
}
|
||||
}
|
||||
}
|
||||
f("", false)
|
||||
f("foo", true)
|
||||
f("a_b-C", false)
|
||||
f("foobar", true)
|
||||
}
|
33
lib/bytesutil/fast_string_matcher_timing_test.go
Normal file
33
lib/bytesutil/fast_string_matcher_timing_test.go
Normal file
|
@ -0,0 +1,33 @@
|
|||
package bytesutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkFastStringMatcher(b *testing.B) {
|
||||
for _, s := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} {
|
||||
b.Run(s, func(b *testing.B) {
|
||||
benchmarkFastStringMatcher(b, s)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkFastStringMatcher(b *testing.B, s string) {
|
||||
fsm := NewFastStringMatcher(func(s string) bool {
|
||||
return strings.HasPrefix(s, "foo")
|
||||
})
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(1)
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
n := uint64(0)
|
||||
for pb.Next() {
|
||||
v := fsm.Match(s)
|
||||
if v {
|
||||
n++
|
||||
}
|
||||
}
|
||||
atomic.AddUint64(&GlobalSink, n)
|
||||
})
|
||||
}
|
|
@ -2,6 +2,7 @@ package bytesutil
|
|||
|
||||
import (
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -18,11 +19,13 @@ func benchmarkFastStringTransformer(b *testing.B, s string) {
|
|||
b.ReportAllocs()
|
||||
b.SetBytes(1)
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
n := uint64(0)
|
||||
for pb.Next() {
|
||||
sTransformed := fst.Transform(s)
|
||||
GlobalSink += len(sTransformed)
|
||||
n += uint64(len(sTransformed))
|
||||
}
|
||||
atomic.AddUint64(&GlobalSink, n)
|
||||
})
|
||||
}
|
||||
|
||||
var GlobalSink int
|
||||
var GlobalSink uint64
|
||||
|
|
|
@ -674,8 +674,9 @@ scrape_config_files:
|
|||
}
|
||||
|
||||
func resetNonEssentialFields(sws []*ScrapeWork) {
|
||||
for i := range sws {
|
||||
sws[i].OriginalLabels = nil
|
||||
for _, sw := range sws {
|
||||
sw.OriginalLabels = nil
|
||||
sw.MetricRelabelConfigs = nil
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1446,10 +1447,6 @@ scrape_configs:
|
|||
},
|
||||
AuthConfig: &promauth.Config{},
|
||||
ProxyAuthConfig: &promauth.Config{},
|
||||
MetricRelabelConfigs: mustParseRelabelConfigs(`
|
||||
- source_labels: [foo]
|
||||
target_label: abc
|
||||
`),
|
||||
jobNameOriginal: "foo",
|
||||
},
|
||||
})
|
||||
|
@ -1847,8 +1844,10 @@ func TestScrapeConfigClone(t *testing.T) {
|
|||
f := func(sc *ScrapeConfig) {
|
||||
t.Helper()
|
||||
scCopy := sc.clone()
|
||||
if !reflect.DeepEqual(sc, scCopy) {
|
||||
t.Fatalf("unexpected result after unmarshalJSON() for JSON:\n%s", sc.marshalJSON())
|
||||
scJSON := sc.marshalJSON()
|
||||
scCopyJSON := scCopy.marshalJSON()
|
||||
if !reflect.DeepEqual(scJSON, scCopyJSON) {
|
||||
t.Fatalf("unexpected cloned result:\ngot\n%s\nwant\n%s", scCopyJSON, scJSON)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,8 @@ package regexutil
|
|||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
)
|
||||
|
||||
// PromRegex implements an optimized string matching for Prometheus-like regex.
|
||||
|
@ -13,6 +15,8 @@ import (
|
|||
// - alternate strings such as "foo|bar|baz"
|
||||
// - prefix match such as "foo.*" or "foo.+"
|
||||
// - substring match such as ".*foo.*" or ".+bar.+"
|
||||
//
|
||||
// The rest of regexps are also optimized by returning cached match results for the same input strings.
|
||||
type PromRegex struct {
|
||||
// prefix contains literal prefix for regex.
|
||||
// For example, prefix="foo" for regex="foo(a|b)"
|
||||
|
@ -32,9 +36,8 @@ type PromRegex struct {
|
|||
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
|
||||
orValues []string
|
||||
|
||||
// reSuffix contains an anchored regexp built from suffix:
|
||||
// "^(?:suffix)$"
|
||||
reSuffix *regexp.Regexp
|
||||
// reSuffixMatcher contains fast matcher for "^suffix$"
|
||||
reSuffixMatcher *bytesutil.FastStringMatcher
|
||||
}
|
||||
|
||||
// NewPromRegex returns PromRegex for the given expr.
|
||||
|
@ -50,13 +53,14 @@ func NewPromRegex(expr string) (*PromRegex, error) {
|
|||
// Anchor suffix to the beginning and the end of the matching string.
|
||||
suffixExpr := "^(?:" + suffix + ")$"
|
||||
reSuffix := regexp.MustCompile(suffixExpr)
|
||||
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
|
||||
pr := &PromRegex{
|
||||
prefix: prefix,
|
||||
suffix: suffix,
|
||||
substrDotStar: substrDotStar,
|
||||
substrDotPlus: substrDotPlus,
|
||||
orValues: orValues,
|
||||
reSuffix: reSuffix,
|
||||
prefix: prefix,
|
||||
suffix: suffix,
|
||||
substrDotStar: substrDotStar,
|
||||
substrDotPlus: substrDotPlus,
|
||||
orValues: orValues,
|
||||
reSuffixMatcher: reSuffixMatcher,
|
||||
}
|
||||
return pr, nil
|
||||
}
|
||||
|
@ -106,7 +110,7 @@ func (pr *PromRegex) MatchString(s string) bool {
|
|||
return len(s) > 0
|
||||
}
|
||||
// Fall back to slow path by matching the original regexp.
|
||||
return pr.reSuffix.MatchString(s)
|
||||
return pr.reSuffixMatcher.Match(s)
|
||||
}
|
||||
|
||||
func getSubstringLiteral(expr, prefixSuffix string) string {
|
||||
|
|
Loading…
Reference in a new issue