mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/bytesutil: add FastStringTransformer and use it in the rest of the code where needed
This commit is contained in:
parent
92b3622253
commit
6a32a64073
6 changed files with 126 additions and 107 deletions
57
lib/bytesutil/fast_string_transformer.go
Normal file
57
lib/bytesutil/fast_string_transformer.go
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
package bytesutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FastStringTransformer implements fast transformer for strings.
|
||||||
|
//
|
||||||
|
// It caches transformed strings and returns them back on the next calls
|
||||||
|
// without calling the transformFunc, which may be expensive.
|
||||||
|
type FastStringTransformer struct {
|
||||||
|
m atomic.Value
|
||||||
|
mLen uint64
|
||||||
|
|
||||||
|
transformFunc func(s string) string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFastStringTransformer creates new transformer, which applies transformFunc to strings passed to Transform()
|
||||||
|
//
|
||||||
|
// transformFunc must return the same result for the same input.
|
||||||
|
func NewFastStringTransformer(transformFunc func(s string) string) *FastStringTransformer {
|
||||||
|
var fst FastStringTransformer
|
||||||
|
fst.m.Store(&sync.Map{})
|
||||||
|
fst.transformFunc = transformFunc
|
||||||
|
return &fst
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transform applies transformFunc to s and returns the result.
|
||||||
|
func (fst *FastStringTransformer) Transform(s string) string {
|
||||||
|
m := fst.m.Load().(*sync.Map)
|
||||||
|
v, ok := m.Load(s)
|
||||||
|
if ok {
|
||||||
|
// Fast path - the transformed s is found in the cache.
|
||||||
|
sp := v.(*string)
|
||||||
|
return *sp
|
||||||
|
}
|
||||||
|
// Slow path - transform s and store it in the cache.
|
||||||
|
sTransformed := fst.transformFunc(s)
|
||||||
|
// Make a copy of s in order to limit memory usage to the s length,
|
||||||
|
// since the s may point to bigger string.
|
||||||
|
s = strings.Clone(s)
|
||||||
|
if sTransformed == s {
|
||||||
|
// point sTransformed to just allocated s, since it may point to s,
|
||||||
|
// which, in turn, can point to bigger string.
|
||||||
|
sTransformed = s
|
||||||
|
}
|
||||||
|
sp := &sTransformed
|
||||||
|
m.Store(s, sp)
|
||||||
|
n := atomic.AddUint64(&fst.mLen, 1)
|
||||||
|
if n > 100e3 {
|
||||||
|
atomic.StoreUint64(&fst.mLen, 0)
|
||||||
|
fst.m.Store(&sync.Map{})
|
||||||
|
}
|
||||||
|
return sTransformed
|
||||||
|
}
|
22
lib/bytesutil/fast_string_transformer_test.go
Normal file
22
lib/bytesutil/fast_string_transformer_test.go
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
package bytesutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFastStringTransformer(t *testing.T) {
|
||||||
|
fst := NewFastStringTransformer(strings.ToUpper)
|
||||||
|
f := func(s, resultExpected string) {
|
||||||
|
t.Helper()
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
result := fst.Transform(s)
|
||||||
|
if result != resultExpected {
|
||||||
|
t.Fatalf("unexpected result for Transform(%q) at iteration %d; got %q; want %q", s, i, result, resultExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f("", "")
|
||||||
|
f("foo", "FOO")
|
||||||
|
f("a_b-C", "A_B-C")
|
||||||
|
}
|
28
lib/bytesutil/fast_string_transformer_timing_test.go
Normal file
28
lib/bytesutil/fast_string_transformer_timing_test.go
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
package bytesutil
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkFastStringTransformer(b *testing.B) {
|
||||||
|
for _, s := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} {
|
||||||
|
b.Run(s, func(b *testing.B) {
|
||||||
|
benchmarkFastStringTransformer(b, s)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkFastStringTransformer(b *testing.B, s string) {
|
||||||
|
fst := NewFastStringTransformer(strings.ToUpper)
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.SetBytes(1)
|
||||||
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
for pb.Next() {
|
||||||
|
sTransformed := fst.Transform(s)
|
||||||
|
GlobalSink += len(sTransformed)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
var GlobalSink int
|
|
@ -5,8 +5,6 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"sync/atomic"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
|
@ -565,40 +563,11 @@ func fillLabelReferences(dst []byte, replacement string, labels []prompbmarshal.
|
||||||
//
|
//
|
||||||
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||||
func SanitizeName(name string) string {
|
func SanitizeName(name string) string {
|
||||||
m := sanitizedNames.Load().(*sync.Map)
|
return promSanitizer.Transform(name)
|
||||||
v, ok := m.Load(name)
|
|
||||||
if ok {
|
|
||||||
// Fast path - the sanitized name is found in the cache.
|
|
||||||
sp := v.(*string)
|
|
||||||
return *sp
|
|
||||||
}
|
|
||||||
// Slow path - sanitize name and store it in the cache.
|
|
||||||
sanitizedName := unsupportedPromChars.ReplaceAllString(name, "_")
|
|
||||||
// Make a copy of name in order to limit memory usage to the name length,
|
|
||||||
// since the name may point to bigger string.
|
|
||||||
s := string(append([]byte{}, name...))
|
|
||||||
if sanitizedName == name {
|
|
||||||
// point sanitizedName to just allocated s, since it may point to name,
|
|
||||||
// which, in turn, can point to bigger string.
|
|
||||||
sanitizedName = s
|
|
||||||
}
|
|
||||||
sp := &sanitizedName
|
|
||||||
m.Store(s, sp)
|
|
||||||
n := atomic.AddUint64(&sanitizedNamesLen, 1)
|
|
||||||
if n > 100e3 {
|
|
||||||
atomic.StoreUint64(&sanitizedNamesLen, 0)
|
|
||||||
sanitizedNames.Store(&sync.Map{})
|
|
||||||
}
|
|
||||||
return sanitizedName
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var promSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
|
||||||
sanitizedNames atomic.Value
|
return unsupportedPromChars.ReplaceAllString(s, "_")
|
||||||
sanitizedNamesLen uint64
|
})
|
||||||
|
|
||||||
unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
|
var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
sanitizedNames.Store(&sync.Map{})
|
|
||||||
}
|
|
||||||
|
|
|
@ -6,9 +6,8 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
|
||||||
"sync/atomic"
|
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -17,43 +16,14 @@ import (
|
||||||
//
|
//
|
||||||
// This has been copied from Prometheus sources at util/strutil/strconv.go
|
// This has been copied from Prometheus sources at util/strutil/strconv.go
|
||||||
func SanitizeLabelName(name string) string {
|
func SanitizeLabelName(name string) string {
|
||||||
m := sanitizedLabelNames.Load().(*sync.Map)
|
return labelNamesSanitizer.Transform(name)
|
||||||
v, ok := m.Load(name)
|
|
||||||
if ok {
|
|
||||||
// Fast path - the sanitized label name is found in the cache.
|
|
||||||
sp := v.(*string)
|
|
||||||
return *sp
|
|
||||||
}
|
|
||||||
// Slow path - sanitize name and store it in the cache.
|
|
||||||
sanitizedName := invalidLabelCharRE.ReplaceAllString(name, "_")
|
|
||||||
// Make a copy of name in order to limit memory usage to the name length,
|
|
||||||
// since the name may point to bigger string.
|
|
||||||
s := string(append([]byte{}, name...))
|
|
||||||
if sanitizedName == name {
|
|
||||||
// point sanitizedName to just allocated s, since it may point to name,
|
|
||||||
// which, in turn, can point to bigger string.
|
|
||||||
sanitizedName = s
|
|
||||||
}
|
|
||||||
sp := &sanitizedName
|
|
||||||
m.Store(s, sp)
|
|
||||||
n := atomic.AddUint64(&sanitizedLabelNamesLen, 1)
|
|
||||||
if n > 100e3 {
|
|
||||||
atomic.StoreUint64(&sanitizedLabelNamesLen, 0)
|
|
||||||
sanitizedLabelNames.Store(&sync.Map{})
|
|
||||||
}
|
|
||||||
return sanitizedName
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var labelNamesSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
|
||||||
sanitizedLabelNames atomic.Value
|
return invalidLabelCharRE.ReplaceAllString(s, "_")
|
||||||
sanitizedLabelNamesLen uint64
|
})
|
||||||
|
|
||||||
invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
sanitizedLabelNames.Store(&sync.Map{})
|
|
||||||
}
|
|
||||||
|
|
||||||
// JoinHostPort returns host:port.
|
// JoinHostPort returns host:port.
|
||||||
//
|
//
|
||||||
|
|
|
@ -7,7 +7,6 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||||
|
@ -157,44 +156,18 @@ var requestPool sync.Pool
|
||||||
//
|
//
|
||||||
// See https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
|
// See https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
|
||||||
func sanitizeName(name string) string {
|
func sanitizeName(name string) string {
|
||||||
m := sanitizedNames.Load().(*sync.Map)
|
return namesSanitizer.Transform(name)
|
||||||
v, ok := m.Load(name)
|
|
||||||
if ok {
|
|
||||||
// Fast path - the sanitized name is found in the cache.
|
|
||||||
sp := v.(*string)
|
|
||||||
return *sp
|
|
||||||
}
|
|
||||||
// Slow path - sanitize name and store it in the cache.
|
|
||||||
sanitizedName := unsupportedDatadogChars.ReplaceAllString(name, "_")
|
|
||||||
sanitizedName = multiUnderscores.ReplaceAllString(sanitizedName, "_")
|
|
||||||
sanitizedName = underscoresWithDots.ReplaceAllString(sanitizedName, ".")
|
|
||||||
// Make a copy of name in order to limit memory usage to the name length,
|
|
||||||
// since the name may point to bigger string.
|
|
||||||
s := string(append([]byte{}, name...))
|
|
||||||
if sanitizedName == name {
|
|
||||||
// point sanitizedName to just allocated s, since it may point to name,
|
|
||||||
// which, in turn, can point to bigger string.
|
|
||||||
sanitizedName = s
|
|
||||||
}
|
|
||||||
sp := &sanitizedName
|
|
||||||
m.Store(s, sp)
|
|
||||||
n := atomic.AddUint64(&sanitizedNamesLen, 1)
|
|
||||||
if n > 100e3 {
|
|
||||||
atomic.StoreUint64(&sanitizedNamesLen, 0)
|
|
||||||
sanitizedNames.Store(&sync.Map{})
|
|
||||||
}
|
|
||||||
return sanitizedName
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var namesSanitizer = bytesutil.NewFastStringTransformer(func(s string) string {
|
||||||
sanitizedNames atomic.Value
|
s = unsupportedDatadogChars.ReplaceAllString(s, "_")
|
||||||
sanitizedNamesLen uint64
|
s = multiUnderscores.ReplaceAllString(s, "_")
|
||||||
|
s = underscoresWithDots.ReplaceAllString(s, ".")
|
||||||
|
return s
|
||||||
|
})
|
||||||
|
|
||||||
|
var (
|
||||||
unsupportedDatadogChars = regexp.MustCompile(`[^0-9a-zA-Z_\.]+`)
|
unsupportedDatadogChars = regexp.MustCompile(`[^0-9a-zA-Z_\.]+`)
|
||||||
multiUnderscores = regexp.MustCompile(`_+`)
|
multiUnderscores = regexp.MustCompile(`_+`)
|
||||||
underscoresWithDots = regexp.MustCompile(`_?\._?`)
|
underscoresWithDots = regexp.MustCompile(`_?\._?`)
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
|
||||||
sanitizedNames.Store(&sync.Map{})
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue