From 7f0b95b50a18ec0d742a24e003a830ac0d589e8c Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 28 Sep 2022 09:59:36 +0300 Subject: [PATCH] lib/promrelabel: add SanitizeName() function for sanitizing Prometheus metric names and label names Optimize this function by using results cache for input strings. Use this function all over the code. This is a follow-up for fcffdba9dcbc6a696d328cc8f4268349d54218e3 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113 --- app/vmagent/remotewrite/relabel.go | 8 ++--- app/vmctl/opentsdb/parser.go | 17 +++------- app/vminsert/relabel/relabel.go | 8 ++--- lib/promrelabel/relabel.go | 44 ++++++++++++++++++++++++++ lib/promrelabel/relabel_test.go | 16 ++++++++++ lib/promrelabel/relabel_timing_test.go | 21 ++++++++++++ 6 files changed, 89 insertions(+), 25 deletions(-) diff --git a/app/vmagent/remotewrite/relabel.go b/app/vmagent/remotewrite/relabel.go index abcdbb0de..4374ff2bc 100644 --- a/app/vmagent/remotewrite/relabel.go +++ b/app/vmagent/remotewrite/relabel.go @@ -3,7 +3,6 @@ package remotewrite import ( "flag" "fmt" - "regexp" "strings" "sync" @@ -118,9 +117,9 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab for j := range tmpLabels { label := &tmpLabels[j] if label.Name == "__name__" { - label.Value = unsupportedPromChars.ReplaceAllString(label.Value, "_") + label.Value = promrelabel.SanitizeName(label.Value) } else { - label.Name = unsupportedPromChars.ReplaceAllString(label.Name, "_") + label.Name = promrelabel.SanitizeName(label.Name) } } } @@ -138,9 +137,6 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab return tssDst } -// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`) - type relabelCtx struct { // pool for labels, which are used during the relabeling. labels []prompbmarshal.Label diff --git a/app/vmctl/opentsdb/parser.go b/app/vmctl/opentsdb/parser.go index 6a1fa8baf..6993e8d13 100644 --- a/app/vmctl/opentsdb/parser.go +++ b/app/vmctl/opentsdb/parser.go @@ -6,13 +6,12 @@ import ( "strconv" "strings" "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" ) var ( - allowedNames = regexp.MustCompile("^[a-zA-Z][a-zA-Z0-9_:]*$") allowedFirstChar = regexp.MustCompile("^[a-zA-Z]") - replaceChars = regexp.MustCompile("[^a-zA-Z0-9_:]") - allowedTagKeys = regexp.MustCompile("^[a-zA-Z][a-zA-Z0-9_]*$") ) func convertDuration(duration string) (time.Duration, error) { @@ -180,13 +179,8 @@ func modifyData(msg Metric, normalize bool) (Metric, error) { } /* replace bad characters in metric name with _ per the data model - only replace if needed to reduce string processing time */ - if !allowedNames.MatchString(name) { - finalMsg.Metric = replaceChars.ReplaceAllString(name, "_") - } else { - finalMsg.Metric = name - } + finalMsg.Metric = promrelabel.SanitizeName(name) // replace bad characters in tag keys with _ per the data model for key, value := range msg.Tags { // if normalization requested, lowercase the key and value @@ -196,11 +190,8 @@ func modifyData(msg Metric, normalize bool) (Metric, error) { } /* replace all explicitly bad characters with _ - only replace if needed to reduce string processing time */ - if !allowedTagKeys.MatchString(key) { - key = replaceChars.ReplaceAllString(key, "_") - } + key = promrelabel.SanitizeName(key) // tags that start with __ are considered custom stats for internal prometheus stuff, we should drop them if !strings.HasPrefix(key, "__") { finalMsg.Tags[key] = value diff --git a/app/vminsert/relabel/relabel.go b/app/vminsert/relabel/relabel.go index bb02836c5..991a131fb 100644 --- a/app/vminsert/relabel/relabel.go +++ b/app/vminsert/relabel/relabel.go @@ -3,7 +3,6 @@ package relabel import ( "flag" "fmt" - "regexp" "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" @@ -115,9 +114,9 @@ func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label { for i := range tmpLabels { label := &tmpLabels[i] if label.Name == "__name__" { - label.Value = unsupportedPromChars.ReplaceAllString(label.Value, "_") + label.Value = promrelabel.SanitizeName(label.Value) } else { - label.Name = unsupportedPromChars.ReplaceAllString(label.Name, "_") + label.Name = promrelabel.SanitizeName(label.Name) } } } @@ -149,6 +148,3 @@ func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label { } var metricsDropped = metrics.NewCounter(`vm_relabel_metrics_dropped_total`) - -// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`) diff --git a/lib/promrelabel/relabel.go b/lib/promrelabel/relabel.go index f03f29ba2..adc6938af 100644 --- a/lib/promrelabel/relabel.go +++ b/lib/promrelabel/relabel.go @@ -5,6 +5,8 @@ import ( "regexp" "strconv" "strings" + "sync" + "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" @@ -558,3 +560,45 @@ func fillLabelReferences(dst []byte, replacement string, labels []prompbmarshal. } return dst } + +// SanitizeName replaces unsupported by Prometheus chars in metric names and label names with _. +// +// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels +func SanitizeName(name string) string { + m := sanitizedNames.Load().(*sync.Map) + v, ok := m.Load(name) + if ok { + // Fast path - the sanitized name is found in the cache. + sp := v.(*string) + return *sp + } + // Slow path - sanitize name and store it in the cache. + sanitizedName := unsupportedPromChars.ReplaceAllString(name, "_") + // Make a copy of name in order to limit memory usage to the name length, + // since the name may point to bigger string. + s := string(append([]byte{}, name...)) + if sanitizedName == name { + // point sanitizedName to just allocated s, since it may point to name, + // which, in turn, can point to bigger string. + sanitizedName = s + } + sp := &sanitizedName + m.Store(s, sp) + n := atomic.AddUint64(&sanitizedNamesLen, 1) + if n > 100e3 { + atomic.StoreUint64(&sanitizedNamesLen, 0) + sanitizedNames.Store(&sync.Map{}) + } + return sanitizedName +} + +var ( + sanitizedNames atomic.Value + sanitizedNamesLen uint64 + + unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`) +) + +func init() { + sanitizedNames.Store(&sync.Map{}) +} diff --git a/lib/promrelabel/relabel_test.go b/lib/promrelabel/relabel_test.go index ba40e5f26..bc4bb7c3f 100644 --- a/lib/promrelabel/relabel_test.go +++ b/lib/promrelabel/relabel_test.go @@ -7,6 +7,22 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" ) +func TestSanitizeName(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + for i := 0; i < 5; i++ { + result := SanitizeName(s) + if result != resultExpected { + t.Fatalf("unexpected result for SanitizeName(%q) at iteration %d; got %q; want %q", s, i, result, resultExpected) + } + } + } + f("", "") + f("a", "a") + f("foo.bar/baz:a", "foo_bar_baz:a") + f("foo...bar", "foo___bar") +} + func TestLabelsToString(t *testing.T) { f := func(labels []prompbmarshal.Label, sExpected string) { t.Helper() diff --git a/lib/promrelabel/relabel_timing_test.go b/lib/promrelabel/relabel_timing_test.go index 6e7d55d19..eaba87047 100644 --- a/lib/promrelabel/relabel_timing_test.go +++ b/lib/promrelabel/relabel_timing_test.go @@ -8,6 +8,27 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" ) +func BenchmarkSanitizeName(b *testing.B) { + for _, name := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} { + b.Run(name, func(b *testing.B) { + benchmarkSanitizeName(b, name) + }) + } +} + +func benchmarkSanitizeName(b *testing.B, name string) { + b.ReportAllocs() + b.SetBytes(1) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + sanitizedName := SanitizeName(name) + GlobalSink += len(sanitizedName) + } + }) +} + +var GlobalSink int + func BenchmarkMatchRegexPrefixDotPlusMatchOptimized(b *testing.B) { const pattern = "^foo.+$" const s = "foobar"