mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib/bytesutil: add FastStringTransformer and use it in the rest of the code where needed
This commit is contained in:
parent
9c6c691471
commit
4afa25fb38
6 changed files with 126 additions and 107 deletions
57
lib/bytesutil/fast_string_transformer.go
Normal file
57
lib/bytesutil/fast_string_transformer.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
package bytesutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// FastStringTransformer implements fast transformer for strings.
|
||||
//
|
||||
// It caches transformed strings and returns them back on the next calls
|
||||
// without calling the transformFunc, which may be expensive.
|
||||
type FastStringTransformer struct {
|
||||
m atomic.Value
|
||||
mLen uint64
|
||||
|
||||
transformFunc func(s string) string
|
||||
}
|
||||
|
||||
// NewFastStringTransformer creates new transformer, which applies transformFunc to strings passed to Transform()
|
||||
//
|
||||
// transformFunc must return the same result for the same input.
|
||||
func NewFastStringTransformer(transformFunc func(s string) string) *FastStringTransformer {
|
||||
var fst FastStringTransformer
|
||||
fst.m.Store(&sync.Map{})
|
||||
fst.transformFunc = transformFunc
|
||||
return &fst
|
||||
}
|
||||
|
||||
// Transform applies transformFunc to s and returns the result.
|
||||
func (fst *FastStringTransformer) Transform(s string) string {
|
||||
m := fst.m.Load().(*sync.Map)
|
||||
v, ok := m.Load(s)
|
||||
if ok {
|
||||
// Fast path - the transformed s is found in the cache.
|
||||
sp := v.(*string)
|
||||
return *sp
|
||||
}
|
||||
// Slow path - transform s and store it in the cache.
|
||||
sTransformed := fst.transformFunc(s)
|
||||
// Make a copy of s in order to limit memory usage to the s length,
|
||||
// since the s may point to bigger string.
|
||||
s = strings.Clone(s)
|
||||
if sTransformed == s {
|
||||
// point sTransformed to just allocated s, since it may point to s,
|
||||
// which, in turn, can point to bigger string.
|
||||
sTransformed = s
|
||||
}
|
||||
sp := &sTransformed
|
||||
m.Store(s, sp)
|
||||
n := atomic.AddUint64(&fst.mLen, 1)
|
||||
if n > 100e3 {
|
||||
atomic.StoreUint64(&fst.mLen, 0)
|
||||
fst.m.Store(&sync.Map{})
|
||||
}
|
||||
return sTransformed
|
||||
}
|
22
lib/bytesutil/fast_string_transformer_test.go
Normal file
22
lib/bytesutil/fast_string_transformer_test.go
Normal file
|
@ -0,0 +1,22 @@
|
|||
package bytesutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFastStringTransformer(t *testing.T) {
|
||||
fst := NewFastStringTransformer(strings.ToUpper)
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
for i := 0; i < 10; i++ {
|
||||
result := fst.Transform(s)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for Transform(%q) at iteration %d; got %q; want %q", s, i, result, resultExpected)
|
||||
}
|
||||
}
|
||||
}
|
||||
f("", "")
|
||||
f("foo", "FOO")
|
||||
f("a_b-C", "A_B-C")
|
||||
}
|
28
lib/bytesutil/fast_string_transformer_timing_test.go
Normal file
28
lib/bytesutil/fast_string_transformer_timing_test.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
package bytesutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkFastStringTransformer(b *testing.B) {
|
||||
for _, s := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} {
|
||||
b.Run(s, func(b *testing.B) {
|
||||
benchmarkFastStringTransformer(b, s)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkFastStringTransformer(b *testing.B, s string) {
|
||||
fst := NewFastStringTransformer(strings.ToUpper)
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(1)
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
sTransformed := fst.Transform(s)
|
||||
GlobalSink += len(sTransformed)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
var GlobalSink int
|
|
@ -5,8 +5,6 @@ import (
|
|||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
|
@ -565,40 +563,11 @@ func fillLabelReferences(dst []byte, replacement string, labels []prompbmarshal.
|
|||
//
|
||||
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||
func SanitizeName(name string) string {
|
||||
m := sanitizedNames.Load().(*sync.Map)
|
||||
v, ok := m.Load(name)
|
||||
if ok {
|
||||
// Fast path - the sanitized name is found in the cache.
|
||||
sp := v.(*string)
|
||||
return *sp
|
||||
}
|
||||
// Slow path - sanitize name and store it in the cache.
|
||||
sanitizedName := unsupportedPromChars.ReplaceAllString(name, "_")
|
||||
// Make a copy of name in order to limit memory usage to the name length,
|
||||
// since the name may point to bigger string.
|
||||
s := string(append([]byte{}, name...))
|
||||
if sanitizedName == name {
|
||||
// point sanitizedName to just allocated s, since it may point to name,
|
||||
// which, in turn, can point to bigger string.
|
||||
sanitizedName = s
|
||||
}
|
||||
sp := &sanitizedName
|
||||
m.Store(s, sp)
|
||||
n := atomic.AddUint64(&sanitizedNamesLen, 1)
|
||||
if n > 100e3 {
|
||||
atomic.StoreUint64(&sanitizedNamesLen, 0)
|
||||
sanitizedNames.Store(&sync.Map{})
|
||||
}
|
||||
return sanitizedName
|
||||
return promSanitizer.Transform(name)
|
||||
}
|
||||
|
||||
var (
|
||||
sanitizedNames atomic.Value
|
||||
sanitizedNamesLen uint64
|
||||
var promSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
|
||||
return unsupportedPromChars.ReplaceAllString(s, "_")
|
||||
})
|
||||
|
||||
unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
|
||||
)
|
||||
|
||||
func init() {
|
||||
sanitizedNames.Store(&sync.Map{})
|
||||
}
|
||||
var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
|
||||
|
|
|
@ -6,9 +6,8 @@ import (
|
|||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
|
@ -17,43 +16,14 @@ import (
|
|||
//
|
||||
// This has been copied from Prometheus sources at util/strutil/strconv.go
|
||||
func SanitizeLabelName(name string) string {
|
||||
m := sanitizedLabelNames.Load().(*sync.Map)
|
||||
v, ok := m.Load(name)
|
||||
if ok {
|
||||
// Fast path - the sanitized label name is found in the cache.
|
||||
sp := v.(*string)
|
||||
return *sp
|
||||
}
|
||||
// Slow path - sanitize name and store it in the cache.
|
||||
sanitizedName := invalidLabelCharRE.ReplaceAllString(name, "_")
|
||||
// Make a copy of name in order to limit memory usage to the name length,
|
||||
// since the name may point to bigger string.
|
||||
s := string(append([]byte{}, name...))
|
||||
if sanitizedName == name {
|
||||
// point sanitizedName to just allocated s, since it may point to name,
|
||||
// which, in turn, can point to bigger string.
|
||||
sanitizedName = s
|
||||
}
|
||||
sp := &sanitizedName
|
||||
m.Store(s, sp)
|
||||
n := atomic.AddUint64(&sanitizedLabelNamesLen, 1)
|
||||
if n > 100e3 {
|
||||
atomic.StoreUint64(&sanitizedLabelNamesLen, 0)
|
||||
sanitizedLabelNames.Store(&sync.Map{})
|
||||
}
|
||||
return sanitizedName
|
||||
return labelNamesSanitizer.Transform(name)
|
||||
}
|
||||
|
||||
var (
|
||||
sanitizedLabelNames atomic.Value
|
||||
sanitizedLabelNamesLen uint64
|
||||
var labelNamesSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
|
||||
return invalidLabelCharRE.ReplaceAllString(s, "_")
|
||||
})
|
||||
|
||||
invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
||||
)
|
||||
|
||||
func init() {
|
||||
sanitizedLabelNames.Store(&sync.Map{})
|
||||
}
|
||||
var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
||||
|
||||
// JoinHostPort returns host:port.
|
||||
//
|
||||
|
|
|
@ -7,7 +7,6 @@ import (
|
|||
"io"
|
||||
"regexp"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
|
@ -157,44 +156,18 @@ var requestPool sync.Pool
|
|||
//
|
||||
// See https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
|
||||
func sanitizeName(name string) string {
|
||||
m := sanitizedNames.Load().(*sync.Map)
|
||||
v, ok := m.Load(name)
|
||||
if ok {
|
||||
// Fast path - the sanitized name is found in the cache.
|
||||
sp := v.(*string)
|
||||
return *sp
|
||||
}
|
||||
// Slow path - sanitize name and store it in the cache.
|
||||
sanitizedName := unsupportedDatadogChars.ReplaceAllString(name, "_")
|
||||
sanitizedName = multiUnderscores.ReplaceAllString(sanitizedName, "_")
|
||||
sanitizedName = underscoresWithDots.ReplaceAllString(sanitizedName, ".")
|
||||
// Make a copy of name in order to limit memory usage to the name length,
|
||||
// since the name may point to bigger string.
|
||||
s := string(append([]byte{}, name...))
|
||||
if sanitizedName == name {
|
||||
// point sanitizedName to just allocated s, since it may point to name,
|
||||
// which, in turn, can point to bigger string.
|
||||
sanitizedName = s
|
||||
}
|
||||
sp := &sanitizedName
|
||||
m.Store(s, sp)
|
||||
n := atomic.AddUint64(&sanitizedNamesLen, 1)
|
||||
if n > 100e3 {
|
||||
atomic.StoreUint64(&sanitizedNamesLen, 0)
|
||||
sanitizedNames.Store(&sync.Map{})
|
||||
}
|
||||
return sanitizedName
|
||||
return namesSanitizer.Transform(name)
|
||||
}
|
||||
|
||||
var namesSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
|
||||
s = unsupportedDatadogChars.ReplaceAllString(s, "_")
|
||||
s = multiUnderscores.ReplaceAllString(s, "_")
|
||||
s = underscoresWithDots.ReplaceAllString(s, ".")
|
||||
return s
|
||||
})
|
||||
|
||||
var (
|
||||
sanitizedNames atomic.Value
|
||||
sanitizedNamesLen uint64
|
||||
|
||||
unsupportedDatadogChars = regexp.MustCompile(`[^0-9a-zA-Z_\.]+`)
|
||||
multiUnderscores = regexp.MustCompile(`_+`)
|
||||
underscoresWithDots = regexp.MustCompile(`_?\._?`)
|
||||
)
|
||||
|
||||
func init() {
|
||||
sanitizedNames.Store(&sync.Map{})
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue