lib/regexutil: cache MatchString results for unoptimized regexps

This increases relabeling performance by 3x for unoptimized regexs
This commit is contained in:
Aliaksandr Valialkin 2022-09-30 10:38:44 +03:00
parent d0a9fff70c
commit b4bb1477fe
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
6 changed files with 132 additions and 20 deletions

View file

@ -0,0 +1,48 @@
package bytesutil
import (
"sync"
"sync/atomic"
)
// FastStringMatcher implements fast matcher for strings.
//
// It caches string match results and returns them back on the next calls
// without calling the matchFunc, which may be expensive.
type FastStringMatcher struct {
m atomic.Value
mLen uint64
matchFunc func(s string) bool
}
// NewFastStringMatcher creates new matcher, which applies matchFunc to strings passed to Match()
//
// matchFunc must return the same result for the same input.
func NewFastStringMatcher(matchFunc func(s string) bool) *FastStringMatcher {
var fsm FastStringMatcher
fsm.m.Store(&sync.Map{})
fsm.matchFunc = matchFunc
return &fsm
}
// Match applies matchFunc to s and returns the result.
func (fsm *FastStringMatcher) Match(s string) bool {
m := fsm.m.Load().(*sync.Map)
v, ok := m.Load(s)
if ok {
// Fast path - s match result is found in the cache.
bp := v.(*bool)
return *bp
}
// Slow path - run matchFunc for s and store the result in the cache.
b := fsm.matchFunc(s)
bp := &b
m.Store(s, bp)
n := atomic.AddUint64(&fsm.mLen, 1)
if n > 100e3 {
atomic.StoreUint64(&fsm.mLen, 0)
fsm.m.Store(&sync.Map{})
}
return b
}

View file

@ -0,0 +1,25 @@
package bytesutil
import (
"strings"
"testing"
)
func TestFastStringMatcher(t *testing.T) {
fsm := NewFastStringMatcher(func(s string) bool {
return strings.HasPrefix(s, "foo")
})
f := func(s string, resultExpected bool) {
t.Helper()
for i := 0; i < 10; i++ {
result := fsm.Match(s)
if result != resultExpected {
t.Fatalf("unexpected result for Match(%q) at iteration %d; got %v; want %v", s, i, result, resultExpected)
}
}
}
f("", false)
f("foo", true)
f("a_b-C", false)
f("foobar", true)
}

View file

@ -0,0 +1,33 @@
package bytesutil
import (
"strings"
"sync/atomic"
"testing"
)
func BenchmarkFastStringMatcher(b *testing.B) {
for _, s := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} {
b.Run(s, func(b *testing.B) {
benchmarkFastStringMatcher(b, s)
})
}
}
func benchmarkFastStringMatcher(b *testing.B, s string) {
fsm := NewFastStringMatcher(func(s string) bool {
return strings.HasPrefix(s, "foo")
})
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
n := uint64(0)
for pb.Next() {
v := fsm.Match(s)
if v {
n++
}
}
atomic.AddUint64(&GlobalSink, n)
})
}

View file

@ -2,6 +2,7 @@ package bytesutil
import (
"strings"
"sync/atomic"
"testing"
)
@ -18,11 +19,13 @@ func benchmarkFastStringTransformer(b *testing.B, s string) {
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
n := uint64(0)
for pb.Next() {
sTransformed := fst.Transform(s)
GlobalSink += len(sTransformed)
n += uint64(len(sTransformed))
}
atomic.AddUint64(&GlobalSink, n)
})
}
var GlobalSink int
var GlobalSink uint64

View file

@ -674,8 +674,9 @@ scrape_config_files:
}
func resetNonEssentialFields(sws []*ScrapeWork) {
for i := range sws {
sws[i].OriginalLabels = nil
for _, sw := range sws {
sw.OriginalLabels = nil
sw.MetricRelabelConfigs = nil
}
}
@ -1446,10 +1447,6 @@ scrape_configs:
},
AuthConfig: &promauth.Config{},
ProxyAuthConfig: &promauth.Config{},
MetricRelabelConfigs: mustParseRelabelConfigs(`
- source_labels: [foo]
target_label: abc
`),
jobNameOriginal: "foo",
},
})
@ -1847,8 +1844,10 @@ func TestScrapeConfigClone(t *testing.T) {
f := func(sc *ScrapeConfig) {
t.Helper()
scCopy := sc.clone()
if !reflect.DeepEqual(sc, scCopy) {
t.Fatalf("unexpected result after unmarshalJSON() for JSON:\n%s", sc.marshalJSON())
scJSON := sc.marshalJSON()
scCopyJSON := scCopy.marshalJSON()
if !reflect.DeepEqual(scJSON, scCopyJSON) {
t.Fatalf("unexpected cloned result:\ngot\n%s\nwant\n%s", scCopyJSON, scJSON)
}
}

View file

@ -3,6 +3,8 @@ package regexutil
import (
"regexp"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
)
// PromRegex implements an optimized string matching for Prometheus-like regex.
@ -13,6 +15,8 @@ import (
// - alternate strings such as "foo|bar|baz"
// - prefix match such as "foo.*" or "foo.+"
// - substring match such as ".*foo.*" or ".+bar.+"
//
// The rest of regexps are also optimized by returning cached match results for the same input strings.
type PromRegex struct {
// prefix contains literal prefix for regex.
// For example, prefix="foo" for regex="foo(a|b)"
@ -32,9 +36,8 @@ type PromRegex struct {
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
orValues []string
// reSuffix contains an anchored regexp built from suffix:
// "^(?:suffix)$"
reSuffix *regexp.Regexp
// reSuffixMatcher contains fast matcher for "^suffix$"
reSuffixMatcher *bytesutil.FastStringMatcher
}
// NewPromRegex returns PromRegex for the given expr.
@ -50,13 +53,14 @@ func NewPromRegex(expr string) (*PromRegex, error) {
// Anchor suffix to the beginning and the end of the matching string.
suffixExpr := "^(?:" + suffix + ")$"
reSuffix := regexp.MustCompile(suffixExpr)
reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString)
pr := &PromRegex{
prefix: prefix,
suffix: suffix,
substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus,
orValues: orValues,
reSuffix: reSuffix,
prefix: prefix,
suffix: suffix,
substrDotStar: substrDotStar,
substrDotPlus: substrDotPlus,
orValues: orValues,
reSuffixMatcher: reSuffixMatcher,
}
return pr, nil
}
@ -106,7 +110,7 @@ func (pr *PromRegex) MatchString(s string) bool {
return len(s) > 0
}
// Fall back to slow path by matching the original regexp.
return pr.reSuffix.MatchString(s)
return pr.reSuffixMatcher.Match(s)
}
func getSubstringLiteral(expr, prefixSuffix string) string {