From 6a32a640736d3a7770a52cd8b35b8ca25dc9d5e8 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 28 Sep 2022 10:39:01 +0300 Subject: [PATCH] lib/bytesutil: add FastStringTransformer and use it in the rest of the code where needed --- lib/bytesutil/fast_string_transformer.go | 57 +++++++++++++++++++ lib/bytesutil/fast_string_transformer_test.go | 22 +++++++ .../fast_string_transformer_timing_test.go | 28 +++++++++ lib/promrelabel/relabel.go | 41 ++----------- lib/promscrape/discoveryutils/utils.go | 42 ++------------ lib/protoparser/datadog/streamparser.go | 43 +++----------- 6 files changed, 126 insertions(+), 107 deletions(-) create mode 100644 lib/bytesutil/fast_string_transformer.go create mode 100644 lib/bytesutil/fast_string_transformer_test.go create mode 100644 lib/bytesutil/fast_string_transformer_timing_test.go diff --git a/lib/bytesutil/fast_string_transformer.go b/lib/bytesutil/fast_string_transformer.go new file mode 100644 index 000000000..60549f6eb --- /dev/null +++ b/lib/bytesutil/fast_string_transformer.go @@ -0,0 +1,57 @@ +package bytesutil + +import ( + "strings" + "sync" + "sync/atomic" +) + +// FastStringTransformer implements fast transformer for strings. +// +// It caches transformed strings and returns them back on the next calls +// without calling the transformFunc, which may be expensive. +type FastStringTransformer struct { + m atomic.Value + mLen uint64 + + transformFunc func(s string) string +} + +// NewFastStringTransformer creates new transformer, which applies transformFunc to strings passed to Transform() +// +// transformFunc must return the same result for the same input. +func NewFastStringTransformer(transformFunc func(s string) string) *FastStringTransformer { + var fst FastStringTransformer + fst.m.Store(&sync.Map{}) + fst.transformFunc = transformFunc + return &fst +} + +// Transform applies transformFunc to s and returns the result. +func (fst *FastStringTransformer) Transform(s string) string { + m := fst.m.Load().(*sync.Map) + v, ok := m.Load(s) + if ok { + // Fast path - the transformed s is found in the cache. + sp := v.(*string) + return *sp + } + // Slow path - transform s and store it in the cache. + sTransformed := fst.transformFunc(s) + // Make a copy of s in order to limit memory usage to the s length, + // since the s may point to bigger string. + s = strings.Clone(s) + if sTransformed == s { + // point sTransformed to just allocated s, since it may point to s, + // which, in turn, can point to bigger string. + sTransformed = s + } + sp := &sTransformed + m.Store(s, sp) + n := atomic.AddUint64(&fst.mLen, 1) + if n > 100e3 { + atomic.StoreUint64(&fst.mLen, 0) + fst.m.Store(&sync.Map{}) + } + return sTransformed +} diff --git a/lib/bytesutil/fast_string_transformer_test.go b/lib/bytesutil/fast_string_transformer_test.go new file mode 100644 index 000000000..62d002914 --- /dev/null +++ b/lib/bytesutil/fast_string_transformer_test.go @@ -0,0 +1,22 @@ +package bytesutil + +import ( + "strings" + "testing" +) + +func TestFastStringTransformer(t *testing.T) { + fst := NewFastStringTransformer(strings.ToUpper) + f := func(s, resultExpected string) { + t.Helper() + for i := 0; i < 10; i++ { + result := fst.Transform(s) + if result != resultExpected { + t.Fatalf("unexpected result for Transform(%q) at iteration %d; got %q; want %q", s, i, result, resultExpected) + } + } + } + f("", "") + f("foo", "FOO") + f("a_b-C", "A_B-C") +} diff --git a/lib/bytesutil/fast_string_transformer_timing_test.go b/lib/bytesutil/fast_string_transformer_timing_test.go new file mode 100644 index 000000000..ed2324fb1 --- /dev/null +++ b/lib/bytesutil/fast_string_transformer_timing_test.go @@ -0,0 +1,28 @@ +package bytesutil + +import ( + "strings" + "testing" +) + +func BenchmarkFastStringTransformer(b *testing.B) { + for _, s := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} { + b.Run(s, func(b *testing.B) { + benchmarkFastStringTransformer(b, s) + }) + } +} + +func benchmarkFastStringTransformer(b *testing.B, s string) { + fst := NewFastStringTransformer(strings.ToUpper) + b.ReportAllocs() + b.SetBytes(1) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + sTransformed := fst.Transform(s) + GlobalSink += len(sTransformed) + } + }) +} + +var GlobalSink int diff --git a/lib/promrelabel/relabel.go b/lib/promrelabel/relabel.go index adc6938af..b8d293aa8 100644 --- a/lib/promrelabel/relabel.go +++ b/lib/promrelabel/relabel.go @@ -5,8 +5,6 @@ import ( "regexp" "strconv" "strings" - "sync" - "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" @@ -565,40 +563,11 @@ func fillLabelReferences(dst []byte, replacement string, labels []prompbmarshal. // // See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels func SanitizeName(name string) string { - m := sanitizedNames.Load().(*sync.Map) - v, ok := m.Load(name) - if ok { - // Fast path - the sanitized name is found in the cache. - sp := v.(*string) - return *sp - } - // Slow path - sanitize name and store it in the cache. - sanitizedName := unsupportedPromChars.ReplaceAllString(name, "_") - // Make a copy of name in order to limit memory usage to the name length, - // since the name may point to bigger string. - s := string(append([]byte{}, name...)) - if sanitizedName == name { - // point sanitizedName to just allocated s, since it may point to name, - // which, in turn, can point to bigger string. - sanitizedName = s - } - sp := &sanitizedName - m.Store(s, sp) - n := atomic.AddUint64(&sanitizedNamesLen, 1) - if n > 100e3 { - atomic.StoreUint64(&sanitizedNamesLen, 0) - sanitizedNames.Store(&sync.Map{}) - } - return sanitizedName + return promSanitizer.Transform(name) } -var ( - sanitizedNames atomic.Value - sanitizedNamesLen uint64 +var promSanitizer = bytesutil.NewFastStringTransformer(func(s string) string { + return unsupportedPromChars.ReplaceAllString(s, "_") +}) - unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`) -) - -func init() { - sanitizedNames.Store(&sync.Map{}) -} +var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`) diff --git a/lib/promscrape/discoveryutils/utils.go b/lib/promscrape/discoveryutils/utils.go index 9ee10d0cc..e0dd7fd78 100644 --- a/lib/promscrape/discoveryutils/utils.go +++ b/lib/promscrape/discoveryutils/utils.go @@ -6,9 +6,8 @@ import ( "regexp" "sort" "strconv" - "sync" - "sync/atomic" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" ) @@ -17,43 +16,14 @@ import ( // // This has been copied from Prometheus sources at util/strutil/strconv.go func SanitizeLabelName(name string) string { - m := sanitizedLabelNames.Load().(*sync.Map) - v, ok := m.Load(name) - if ok { - // Fast path - the sanitized label name is found in the cache. - sp := v.(*string) - return *sp - } - // Slow path - sanitize name and store it in the cache. - sanitizedName := invalidLabelCharRE.ReplaceAllString(name, "_") - // Make a copy of name in order to limit memory usage to the name length, - // since the name may point to bigger string. - s := string(append([]byte{}, name...)) - if sanitizedName == name { - // point sanitizedName to just allocated s, since it may point to name, - // which, in turn, can point to bigger string. - sanitizedName = s - } - sp := &sanitizedName - m.Store(s, sp) - n := atomic.AddUint64(&sanitizedLabelNamesLen, 1) - if n > 100e3 { - atomic.StoreUint64(&sanitizedLabelNamesLen, 0) - sanitizedLabelNames.Store(&sync.Map{}) - } - return sanitizedName + return labelNamesSanitizer.Transform(name) } -var ( - sanitizedLabelNames atomic.Value - sanitizedLabelNamesLen uint64 +var labelNamesSanitizer = bytesutil.NewFastStringTransformer(func(s string) string { + return invalidLabelCharRE.ReplaceAllString(s, "_") +}) - invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) -) - -func init() { - sanitizedLabelNames.Store(&sync.Map{}) -} +var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) // JoinHostPort returns host:port. // diff --git a/lib/protoparser/datadog/streamparser.go b/lib/protoparser/datadog/streamparser.go index 35c29567f..ef1877071 100644 --- a/lib/protoparser/datadog/streamparser.go +++ b/lib/protoparser/datadog/streamparser.go @@ -7,7 +7,6 @@ import ( "io" "regexp" "sync" - "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" @@ -157,44 +156,18 @@ var requestPool sync.Pool // // See https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics func sanitizeName(name string) string { - m := sanitizedNames.Load().(*sync.Map) - v, ok := m.Load(name) - if ok { - // Fast path - the sanitized name is found in the cache. - sp := v.(*string) - return *sp - } - // Slow path - sanitize name and store it in the cache. - sanitizedName := unsupportedDatadogChars.ReplaceAllString(name, "_") - sanitizedName = multiUnderscores.ReplaceAllString(sanitizedName, "_") - sanitizedName = underscoresWithDots.ReplaceAllString(sanitizedName, ".") - // Make a copy of name in order to limit memory usage to the name length, - // since the name may point to bigger string. - s := string(append([]byte{}, name...)) - if sanitizedName == name { - // point sanitizedName to just allocated s, since it may point to name, - // which, in turn, can point to bigger string. - sanitizedName = s - } - sp := &sanitizedName - m.Store(s, sp) - n := atomic.AddUint64(&sanitizedNamesLen, 1) - if n > 100e3 { - atomic.StoreUint64(&sanitizedNamesLen, 0) - sanitizedNames.Store(&sync.Map{}) - } - return sanitizedName + return namesSanitizer.Transform(name) } -var ( - sanitizedNames atomic.Value - sanitizedNamesLen uint64 +var namesSanitizer = bytesutil.NewFastStringTransformer(func(s string) string { + s = unsupportedDatadogChars.ReplaceAllString(s, "_") + s = multiUnderscores.ReplaceAllString(s, "_") + s = underscoresWithDots.ReplaceAllString(s, ".") + return s +}) +var ( unsupportedDatadogChars = regexp.MustCompile(`[^0-9a-zA-Z_\.]+`) multiUnderscores = regexp.MustCompile(`_+`) underscoresWithDots = regexp.MustCompile(`_?\._?`) ) - -func init() { - sanitizedNames.Store(&sync.Map{}) -}