lib/bytesutil: add FastStringTransformer and use it in the rest of the code where needed

This commit is contained in:
Aliaksandr Valialkin 2022-09-28 10:39:01 +03:00
parent 92b3622253
commit 6a32a64073
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
6 changed files with 126 additions and 107 deletions

View file

@ -0,0 +1,57 @@
package bytesutil
import (
"strings"
"sync"
"sync/atomic"
)
// FastStringTransformer implements fast transformer for strings.
//
// It caches transformed strings and returns them back on the next calls
// without calling the transformFunc, which may be expensive.
type FastStringTransformer struct {
m atomic.Value
mLen uint64
transformFunc func(s string) string
}
// NewFastStringTransformer creates new transformer, which applies transformFunc to strings passed to Transform()
//
// transformFunc must return the same result for the same input.
func NewFastStringTransformer(transformFunc func(s string) string) *FastStringTransformer {
var fst FastStringTransformer
fst.m.Store(&sync.Map{})
fst.transformFunc = transformFunc
return &fst
}
// Transform applies transformFunc to s and returns the result.
func (fst *FastStringTransformer) Transform(s string) string {
m := fst.m.Load().(*sync.Map)
v, ok := m.Load(s)
if ok {
// Fast path - the transformed s is found in the cache.
sp := v.(*string)
return *sp
}
// Slow path - transform s and store it in the cache.
sTransformed := fst.transformFunc(s)
// Make a copy of s in order to limit memory usage to the s length,
// since the s may point to bigger string.
s = strings.Clone(s)
if sTransformed == s {
// point sTransformed to just allocated s, since it may point to s,
// which, in turn, can point to bigger string.
sTransformed = s
}
sp := &sTransformed
m.Store(s, sp)
n := atomic.AddUint64(&fst.mLen, 1)
if n > 100e3 {
atomic.StoreUint64(&fst.mLen, 0)
fst.m.Store(&sync.Map{})
}
return sTransformed
}

View file

@ -0,0 +1,22 @@
package bytesutil
import (
"strings"
"testing"
)
func TestFastStringTransformer(t *testing.T) {
fst := NewFastStringTransformer(strings.ToUpper)
f := func(s, resultExpected string) {
t.Helper()
for i := 0; i < 10; i++ {
result := fst.Transform(s)
if result != resultExpected {
t.Fatalf("unexpected result for Transform(%q) at iteration %d; got %q; want %q", s, i, result, resultExpected)
}
}
}
f("", "")
f("foo", "FOO")
f("a_b-C", "A_B-C")
}

View file

@ -0,0 +1,28 @@
package bytesutil
import (
"strings"
"testing"
)
func BenchmarkFastStringTransformer(b *testing.B) {
for _, s := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} {
b.Run(s, func(b *testing.B) {
benchmarkFastStringTransformer(b, s)
})
}
}
func benchmarkFastStringTransformer(b *testing.B, s string) {
fst := NewFastStringTransformer(strings.ToUpper)
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
sTransformed := fst.Transform(s)
GlobalSink += len(sTransformed)
}
})
}
var GlobalSink int

View file

@ -5,8 +5,6 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@ -565,40 +563,11 @@ func fillLabelReferences(dst []byte, replacement string, labels []prompbmarshal.
//
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
func SanitizeName(name string) string {
m := sanitizedNames.Load().(*sync.Map)
v, ok := m.Load(name)
if ok {
// Fast path - the sanitized name is found in the cache.
sp := v.(*string)
return *sp
}
// Slow path - sanitize name and store it in the cache.
sanitizedName := unsupportedPromChars.ReplaceAllString(name, "_")
// Make a copy of name in order to limit memory usage to the name length,
// since the name may point to bigger string.
s := string(append([]byte{}, name...))
if sanitizedName == name {
// point sanitizedName to just allocated s, since it may point to name,
// which, in turn, can point to bigger string.
sanitizedName = s
}
sp := &sanitizedName
m.Store(s, sp)
n := atomic.AddUint64(&sanitizedNamesLen, 1)
if n > 100e3 {
atomic.StoreUint64(&sanitizedNamesLen, 0)
sanitizedNames.Store(&sync.Map{})
}
return sanitizedName
return promSanitizer.Transform(name)
}
var (
sanitizedNames atomic.Value
sanitizedNamesLen uint64
var promSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
return unsupportedPromChars.ReplaceAllString(s, "_")
})
unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
)
func init() {
sanitizedNames.Store(&sync.Map{})
}
var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)

View file

@ -6,9 +6,8 @@ import (
"regexp"
"sort"
"strconv"
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
@ -17,43 +16,14 @@ import (
//
// This has been copied from Prometheus sources at util/strutil/strconv.go
func SanitizeLabelName(name string) string {
m := sanitizedLabelNames.Load().(*sync.Map)
v, ok := m.Load(name)
if ok {
// Fast path - the sanitized label name is found in the cache.
sp := v.(*string)
return *sp
}
// Slow path - sanitize name and store it in the cache.
sanitizedName := invalidLabelCharRE.ReplaceAllString(name, "_")
// Make a copy of name in order to limit memory usage to the name length,
// since the name may point to bigger string.
s := string(append([]byte{}, name...))
if sanitizedName == name {
// point sanitizedName to just allocated s, since it may point to name,
// which, in turn, can point to bigger string.
sanitizedName = s
}
sp := &sanitizedName
m.Store(s, sp)
n := atomic.AddUint64(&sanitizedLabelNamesLen, 1)
if n > 100e3 {
atomic.StoreUint64(&sanitizedLabelNamesLen, 0)
sanitizedLabelNames.Store(&sync.Map{})
}
return sanitizedName
return labelNamesSanitizer.Transform(name)
}
var (
sanitizedLabelNames atomic.Value
sanitizedLabelNamesLen uint64
var labelNamesSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
return invalidLabelCharRE.ReplaceAllString(s, "_")
})
invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
)
func init() {
sanitizedLabelNames.Store(&sync.Map{})
}
var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
// JoinHostPort returns host:port.
//

View file

@ -7,7 +7,6 @@ import (
"io"
"regexp"
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
@ -157,44 +156,18 @@ var requestPool sync.Pool
//
// See https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
func sanitizeName(name string) string {
m := sanitizedNames.Load().(*sync.Map)
v, ok := m.Load(name)
if ok {
// Fast path - the sanitized name is found in the cache.
sp := v.(*string)
return *sp
}
// Slow path - sanitize name and store it in the cache.
sanitizedName := unsupportedDatadogChars.ReplaceAllString(name, "_")
sanitizedName = multiUnderscores.ReplaceAllString(sanitizedName, "_")
sanitizedName = underscoresWithDots.ReplaceAllString(sanitizedName, ".")
// Make a copy of name in order to limit memory usage to the name length,
// since the name may point to bigger string.
s := string(append([]byte{}, name...))
if sanitizedName == name {
// point sanitizedName to just allocated s, since it may point to name,
// which, in turn, can point to bigger string.
sanitizedName = s
}
sp := &sanitizedName
m.Store(s, sp)
n := atomic.AddUint64(&sanitizedNamesLen, 1)
if n > 100e3 {
atomic.StoreUint64(&sanitizedNamesLen, 0)
sanitizedNames.Store(&sync.Map{})
}
return sanitizedName
return namesSanitizer.Transform(name)
}
var namesSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
s = unsupportedDatadogChars.ReplaceAllString(s, "_")
s = multiUnderscores.ReplaceAllString(s, "_")
s = underscoresWithDots.ReplaceAllString(s, ".")
return s
})
var (
sanitizedNames atomic.Value
sanitizedNamesLen uint64
unsupportedDatadogChars = regexp.MustCompile(`[^0-9a-zA-Z_\.]+`)
multiUnderscores = regexp.MustCompile(`_+`)
underscoresWithDots = regexp.MustCompile(`_?\._?`)
)
func init() {
sanitizedNames.Store(&sync.Map{})
}