lib/promrelabel: add SanitizeName() function for sanitizing Prometheus metric names and label names

Optimize this function by using results cache for input strings.
Use this function all over the code.

This is a follow-up for fcffdba9dc

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113
This commit is contained in:
Aliaksandr Valialkin 2022-09-28 09:59:36 +03:00
parent 41882222d3
commit 7f0b95b50a
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
6 changed files with 89 additions and 25 deletions

View file

@ -3,7 +3,6 @@ package remotewrite
import (
"flag"
"fmt"
"regexp"
"strings"
"sync"
@ -118,9 +117,9 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab
for j := range tmpLabels {
label := &tmpLabels[j]
if label.Name == "__name__" {
label.Value = unsupportedPromChars.ReplaceAllString(label.Value, "_")
label.Value = promrelabel.SanitizeName(label.Value)
} else {
label.Name = unsupportedPromChars.ReplaceAllString(label.Name, "_")
label.Name = promrelabel.SanitizeName(label.Name)
}
}
}
@ -138,9 +137,6 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab
return tssDst
}
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
type relabelCtx struct {
// pool for labels, which are used during the relabeling.
labels []prompbmarshal.Label

View file

@ -6,13 +6,12 @@ import (
"strconv"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
var (
allowedNames = regexp.MustCompile("^[a-zA-Z][a-zA-Z0-9_:]*$")
allowedFirstChar = regexp.MustCompile("^[a-zA-Z]")
replaceChars = regexp.MustCompile("[^a-zA-Z0-9_:]")
allowedTagKeys = regexp.MustCompile("^[a-zA-Z][a-zA-Z0-9_]*$")
)
func convertDuration(duration string) (time.Duration, error) {
@ -180,13 +179,8 @@ func modifyData(msg Metric, normalize bool) (Metric, error) {
}
/*
replace bad characters in metric name with _ per the data model
only replace if needed to reduce string processing time
*/
if !allowedNames.MatchString(name) {
finalMsg.Metric = replaceChars.ReplaceAllString(name, "_")
} else {
finalMsg.Metric = name
}
finalMsg.Metric = promrelabel.SanitizeName(name)
// replace bad characters in tag keys with _ per the data model
for key, value := range msg.Tags {
// if normalization requested, lowercase the key and value
@ -196,11 +190,8 @@ func modifyData(msg Metric, normalize bool) (Metric, error) {
}
/*
replace all explicitly bad characters with _
only replace if needed to reduce string processing time
*/
if !allowedTagKeys.MatchString(key) {
key = replaceChars.ReplaceAllString(key, "_")
}
key = promrelabel.SanitizeName(key)
// tags that start with __ are considered custom stats for internal prometheus stuff, we should drop them
if !strings.HasPrefix(key, "__") {
finalMsg.Tags[key] = value

View file

@ -3,7 +3,6 @@ package relabel
import (
"flag"
"fmt"
"regexp"
"sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@ -115,9 +114,9 @@ func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label {
for i := range tmpLabels {
label := &tmpLabels[i]
if label.Name == "__name__" {
label.Value = unsupportedPromChars.ReplaceAllString(label.Value, "_")
label.Value = promrelabel.SanitizeName(label.Value)
} else {
label.Name = unsupportedPromChars.ReplaceAllString(label.Name, "_")
label.Name = promrelabel.SanitizeName(label.Name)
}
}
}
@ -149,6 +148,3 @@ func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label {
}
var metricsDropped = metrics.NewCounter(`vm_relabel_metrics_dropped_total`)
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)

View file

@ -5,6 +5,8 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@ -558,3 +560,45 @@ func fillLabelReferences(dst []byte, replacement string, labels []prompbmarshal.
}
return dst
}
// SanitizeName replaces unsupported by Prometheus chars in metric names and label names with _.
//
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
func SanitizeName(name string) string {
m := sanitizedNames.Load().(*sync.Map)
v, ok := m.Load(name)
if ok {
// Fast path - the sanitized name is found in the cache.
sp := v.(*string)
return *sp
}
// Slow path - sanitize name and store it in the cache.
sanitizedName := unsupportedPromChars.ReplaceAllString(name, "_")
// Make a copy of name in order to limit memory usage to the name length,
// since the name may point to bigger string.
s := string(append([]byte{}, name...))
if sanitizedName == name {
// point sanitizedName to just allocated s, since it may point to name,
// which, in turn, can point to bigger string.
sanitizedName = s
}
sp := &sanitizedName
m.Store(s, sp)
n := atomic.AddUint64(&sanitizedNamesLen, 1)
if n > 100e3 {
atomic.StoreUint64(&sanitizedNamesLen, 0)
sanitizedNames.Store(&sync.Map{})
}
return sanitizedName
}
var (
sanitizedNames atomic.Value
sanitizedNamesLen uint64
unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
)
func init() {
sanitizedNames.Store(&sync.Map{})
}

View file

@ -7,6 +7,22 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func TestSanitizeName(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
for i := 0; i < 5; i++ {
result := SanitizeName(s)
if result != resultExpected {
t.Fatalf("unexpected result for SanitizeName(%q) at iteration %d; got %q; want %q", s, i, result, resultExpected)
}
}
}
f("", "")
f("a", "a")
f("foo.bar/baz:a", "foo_bar_baz:a")
f("foo...bar", "foo___bar")
}
func TestLabelsToString(t *testing.T) {
f := func(labels []prompbmarshal.Label, sExpected string) {
t.Helper()

View file

@ -8,6 +8,27 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func BenchmarkSanitizeName(b *testing.B) {
for _, name := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} {
b.Run(name, func(b *testing.B) {
benchmarkSanitizeName(b, name)
})
}
}
func benchmarkSanitizeName(b *testing.B, name string) {
b.ReportAllocs()
b.SetBytes(1)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
sanitizedName := SanitizeName(name)
GlobalSink += len(sanitizedName)
}
})
}
var GlobalSink int
func BenchmarkMatchRegexPrefixDotPlusMatchOptimized(b *testing.B) {
const pattern = "^foo.+$"
const s = "foobar"