From 17289ff4813d255eef72bd6704ad21fea66d1b01 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 30 Sep 2022 10:38:44 +0300 Subject: [PATCH] lib/regexutil: cache MatchString results for unoptimized regexps This increases relabeling performance by 3x for unoptimized regexs --- lib/bytesutil/fast_string_matcher.go | 48 +++++++++++++++++++ lib/bytesutil/fast_string_matcher_test.go | 25 ++++++++++ .../fast_string_matcher_timing_test.go | 33 +++++++++++++ .../fast_string_transformer_timing_test.go | 7 ++- lib/promscrape/config_test.go | 15 +++--- lib/regexutil/promregex.go | 24 ++++++---- 6 files changed, 132 insertions(+), 20 deletions(-) create mode 100644 lib/bytesutil/fast_string_matcher.go create mode 100644 lib/bytesutil/fast_string_matcher_test.go create mode 100644 lib/bytesutil/fast_string_matcher_timing_test.go diff --git a/lib/bytesutil/fast_string_matcher.go b/lib/bytesutil/fast_string_matcher.go new file mode 100644 index 000000000..40658e499 --- /dev/null +++ b/lib/bytesutil/fast_string_matcher.go @@ -0,0 +1,48 @@ +package bytesutil + +import ( + "sync" + "sync/atomic" +) + +// FastStringMatcher implements fast matcher for strings. +// +// It caches string match results and returns them back on the next calls +// without calling the matchFunc, which may be expensive. +type FastStringMatcher struct { + m atomic.Value + mLen uint64 + + matchFunc func(s string) bool +} + +// NewFastStringMatcher creates new matcher, which applies matchFunc to strings passed to Match() +// +// matchFunc must return the same result for the same input. +func NewFastStringMatcher(matchFunc func(s string) bool) *FastStringMatcher { + var fsm FastStringMatcher + fsm.m.Store(&sync.Map{}) + fsm.matchFunc = matchFunc + return &fsm +} + +// Match applies matchFunc to s and returns the result. +func (fsm *FastStringMatcher) Match(s string) bool { + m := fsm.m.Load().(*sync.Map) + v, ok := m.Load(s) + if ok { + // Fast path - s match result is found in the cache. + bp := v.(*bool) + return *bp + } + // Slow path - run matchFunc for s and store the result in the cache. + b := fsm.matchFunc(s) + bp := &b + m.Store(s, bp) + n := atomic.AddUint64(&fsm.mLen, 1) + if n > 100e3 { + atomic.StoreUint64(&fsm.mLen, 0) + fsm.m.Store(&sync.Map{}) + } + return b +} diff --git a/lib/bytesutil/fast_string_matcher_test.go b/lib/bytesutil/fast_string_matcher_test.go new file mode 100644 index 000000000..7ea7925ce --- /dev/null +++ b/lib/bytesutil/fast_string_matcher_test.go @@ -0,0 +1,25 @@ +package bytesutil + +import ( + "strings" + "testing" +) + +func TestFastStringMatcher(t *testing.T) { + fsm := NewFastStringMatcher(func(s string) bool { + return strings.HasPrefix(s, "foo") + }) + f := func(s string, resultExpected bool) { + t.Helper() + for i := 0; i < 10; i++ { + result := fsm.Match(s) + if result != resultExpected { + t.Fatalf("unexpected result for Match(%q) at iteration %d; got %v; want %v", s, i, result, resultExpected) + } + } + } + f("", false) + f("foo", true) + f("a_b-C", false) + f("foobar", true) +} diff --git a/lib/bytesutil/fast_string_matcher_timing_test.go b/lib/bytesutil/fast_string_matcher_timing_test.go new file mode 100644 index 000000000..7ccd51eed --- /dev/null +++ b/lib/bytesutil/fast_string_matcher_timing_test.go @@ -0,0 +1,33 @@ +package bytesutil + +import ( + "strings" + "sync/atomic" + "testing" +) + +func BenchmarkFastStringMatcher(b *testing.B) { + for _, s := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} { + b.Run(s, func(b *testing.B) { + benchmarkFastStringMatcher(b, s) + }) + } +} + +func benchmarkFastStringMatcher(b *testing.B, s string) { + fsm := NewFastStringMatcher(func(s string) bool { + return strings.HasPrefix(s, "foo") + }) + b.ReportAllocs() + b.SetBytes(1) + b.RunParallel(func(pb *testing.PB) { + n := uint64(0) + for pb.Next() { + v := fsm.Match(s) + if v { + n++ + } + } + atomic.AddUint64(&GlobalSink, n) + }) +} diff --git a/lib/bytesutil/fast_string_transformer_timing_test.go b/lib/bytesutil/fast_string_transformer_timing_test.go index ed2324fb1..b23fdbc05 100644 --- a/lib/bytesutil/fast_string_transformer_timing_test.go +++ b/lib/bytesutil/fast_string_transformer_timing_test.go @@ -2,6 +2,7 @@ package bytesutil import ( "strings" + "sync/atomic" "testing" ) @@ -18,11 +19,13 @@ func benchmarkFastStringTransformer(b *testing.B, s string) { b.ReportAllocs() b.SetBytes(1) b.RunParallel(func(pb *testing.PB) { + n := uint64(0) for pb.Next() { sTransformed := fst.Transform(s) - GlobalSink += len(sTransformed) + n += uint64(len(sTransformed)) } + atomic.AddUint64(&GlobalSink, n) }) } -var GlobalSink int +var GlobalSink uint64 diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go index 7a728b428..7f256f90f 100644 --- a/lib/promscrape/config_test.go +++ b/lib/promscrape/config_test.go @@ -674,8 +674,9 @@ scrape_config_files: } func resetNonEssentialFields(sws []*ScrapeWork) { - for i := range sws { - sws[i].OriginalLabels = nil + for _, sw := range sws { + sw.OriginalLabels = nil + sw.MetricRelabelConfigs = nil } } @@ -1446,10 +1447,6 @@ scrape_configs: }, AuthConfig: &promauth.Config{}, ProxyAuthConfig: &promauth.Config{}, - MetricRelabelConfigs: mustParseRelabelConfigs(` -- source_labels: [foo] - target_label: abc -`), jobNameOriginal: "foo", }, }) @@ -1847,8 +1844,10 @@ func TestScrapeConfigClone(t *testing.T) { f := func(sc *ScrapeConfig) { t.Helper() scCopy := sc.clone() - if !reflect.DeepEqual(sc, scCopy) { - t.Fatalf("unexpected result after unmarshalJSON() for JSON:\n%s", sc.marshalJSON()) + scJSON := sc.marshalJSON() + scCopyJSON := scCopy.marshalJSON() + if !reflect.DeepEqual(scJSON, scCopyJSON) { + t.Fatalf("unexpected cloned result:\ngot\n%s\nwant\n%s", scCopyJSON, scJSON) } } diff --git a/lib/regexutil/promregex.go b/lib/regexutil/promregex.go index 96e39ec96..29ec622f3 100644 --- a/lib/regexutil/promregex.go +++ b/lib/regexutil/promregex.go @@ -3,6 +3,8 @@ package regexutil import ( "regexp" "strings" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" ) // PromRegex implements an optimized string matching for Prometheus-like regex. @@ -13,6 +15,8 @@ import ( // - alternate strings such as "foo|bar|baz" // - prefix match such as "foo.*" or "foo.+" // - substring match such as ".*foo.*" or ".+bar.+" +// +// The rest of regexps are also optimized by returning cached match results for the same input strings. type PromRegex struct { // prefix contains literal prefix for regex. // For example, prefix="foo" for regex="foo(a|b)" @@ -32,9 +36,8 @@ type PromRegex struct { // For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz" orValues []string - // reSuffix contains an anchored regexp built from suffix: - // "^(?:suffix)$" - reSuffix *regexp.Regexp + // reSuffixMatcher contains fast matcher for "^suffix$" + reSuffixMatcher *bytesutil.FastStringMatcher } // NewPromRegex returns PromRegex for the given expr. @@ -50,13 +53,14 @@ func NewPromRegex(expr string) (*PromRegex, error) { // Anchor suffix to the beginning and the end of the matching string. suffixExpr := "^(?:" + suffix + ")$" reSuffix := regexp.MustCompile(suffixExpr) + reSuffixMatcher := bytesutil.NewFastStringMatcher(reSuffix.MatchString) pr := &PromRegex{ - prefix: prefix, - suffix: suffix, - substrDotStar: substrDotStar, - substrDotPlus: substrDotPlus, - orValues: orValues, - reSuffix: reSuffix, + prefix: prefix, + suffix: suffix, + substrDotStar: substrDotStar, + substrDotPlus: substrDotPlus, + orValues: orValues, + reSuffixMatcher: reSuffixMatcher, } return pr, nil } @@ -106,7 +110,7 @@ func (pr *PromRegex) MatchString(s string) bool { return len(s) > 0 } // Fall back to slow path by matching the original regexp. - return pr.reSuffix.MatchString(s) + return pr.reSuffixMatcher.Match(s) } func getSubstringLiteral(expr, prefixSuffix string) string {