mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/promrelabel: add SanitizeName() function for sanitizing Prometheus metric names and label names
Optimize this function by using results cache for input strings.
Use this function all over the code.
This is a follow-up for fcffdba9dc
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113
This commit is contained in:
parent
41882222d3
commit
7f0b95b50a
6 changed files with 89 additions and 25 deletions
|
@ -3,7 +3,6 @@ package remotewrite
|
|||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
|
@ -118,9 +117,9 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab
|
|||
for j := range tmpLabels {
|
||||
label := &tmpLabels[j]
|
||||
if label.Name == "__name__" {
|
||||
label.Value = unsupportedPromChars.ReplaceAllString(label.Value, "_")
|
||||
label.Value = promrelabel.SanitizeName(label.Value)
|
||||
} else {
|
||||
label.Name = unsupportedPromChars.ReplaceAllString(label.Name, "_")
|
||||
label.Name = promrelabel.SanitizeName(label.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -138,9 +137,6 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab
|
|||
return tssDst
|
||||
}
|
||||
|
||||
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||
var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
|
||||
|
||||
type relabelCtx struct {
|
||||
// pool for labels, which are used during the relabeling.
|
||||
labels []prompbmarshal.Label
|
||||
|
|
|
@ -6,13 +6,12 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
var (
|
||||
allowedNames = regexp.MustCompile("^[a-zA-Z][a-zA-Z0-9_:]*$")
|
||||
allowedFirstChar = regexp.MustCompile("^[a-zA-Z]")
|
||||
replaceChars = regexp.MustCompile("[^a-zA-Z0-9_:]")
|
||||
allowedTagKeys = regexp.MustCompile("^[a-zA-Z][a-zA-Z0-9_]*$")
|
||||
)
|
||||
|
||||
func convertDuration(duration string) (time.Duration, error) {
|
||||
|
@ -180,13 +179,8 @@ func modifyData(msg Metric, normalize bool) (Metric, error) {
|
|||
}
|
||||
/*
|
||||
replace bad characters in metric name with _ per the data model
|
||||
only replace if needed to reduce string processing time
|
||||
*/
|
||||
if !allowedNames.MatchString(name) {
|
||||
finalMsg.Metric = replaceChars.ReplaceAllString(name, "_")
|
||||
} else {
|
||||
finalMsg.Metric = name
|
||||
}
|
||||
finalMsg.Metric = promrelabel.SanitizeName(name)
|
||||
// replace bad characters in tag keys with _ per the data model
|
||||
for key, value := range msg.Tags {
|
||||
// if normalization requested, lowercase the key and value
|
||||
|
@ -196,11 +190,8 @@ func modifyData(msg Metric, normalize bool) (Metric, error) {
|
|||
}
|
||||
/*
|
||||
replace all explicitly bad characters with _
|
||||
only replace if needed to reduce string processing time
|
||||
*/
|
||||
if !allowedTagKeys.MatchString(key) {
|
||||
key = replaceChars.ReplaceAllString(key, "_")
|
||||
}
|
||||
key = promrelabel.SanitizeName(key)
|
||||
// tags that start with __ are considered custom stats for internal prometheus stuff, we should drop them
|
||||
if !strings.HasPrefix(key, "__") {
|
||||
finalMsg.Tags[key] = value
|
||||
|
|
|
@ -3,7 +3,6 @@ package relabel
|
|||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
|
@ -115,9 +114,9 @@ func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label {
|
|||
for i := range tmpLabels {
|
||||
label := &tmpLabels[i]
|
||||
if label.Name == "__name__" {
|
||||
label.Value = unsupportedPromChars.ReplaceAllString(label.Value, "_")
|
||||
label.Value = promrelabel.SanitizeName(label.Value)
|
||||
} else {
|
||||
label.Name = unsupportedPromChars.ReplaceAllString(label.Name, "_")
|
||||
label.Name = promrelabel.SanitizeName(label.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -149,6 +148,3 @@ func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label {
|
|||
}
|
||||
|
||||
var metricsDropped = metrics.NewCounter(`vm_relabel_metrics_dropped_total`)
|
||||
|
||||
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||
var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
|
||||
|
|
|
@ -5,6 +5,8 @@ import (
|
|||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
|
@ -558,3 +560,45 @@ func fillLabelReferences(dst []byte, replacement string, labels []prompbmarshal.
|
|||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// SanitizeName replaces unsupported by Prometheus chars in metric names and label names with _.
|
||||
//
|
||||
// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
|
||||
func SanitizeName(name string) string {
|
||||
m := sanitizedNames.Load().(*sync.Map)
|
||||
v, ok := m.Load(name)
|
||||
if ok {
|
||||
// Fast path - the sanitized name is found in the cache.
|
||||
sp := v.(*string)
|
||||
return *sp
|
||||
}
|
||||
// Slow path - sanitize name and store it in the cache.
|
||||
sanitizedName := unsupportedPromChars.ReplaceAllString(name, "_")
|
||||
// Make a copy of name in order to limit memory usage to the name length,
|
||||
// since the name may point to bigger string.
|
||||
s := string(append([]byte{}, name...))
|
||||
if sanitizedName == name {
|
||||
// point sanitizedName to just allocated s, since it may point to name,
|
||||
// which, in turn, can point to bigger string.
|
||||
sanitizedName = s
|
||||
}
|
||||
sp := &sanitizedName
|
||||
m.Store(s, sp)
|
||||
n := atomic.AddUint64(&sanitizedNamesLen, 1)
|
||||
if n > 100e3 {
|
||||
atomic.StoreUint64(&sanitizedNamesLen, 0)
|
||||
sanitizedNames.Store(&sync.Map{})
|
||||
}
|
||||
return sanitizedName
|
||||
}
|
||||
|
||||
var (
|
||||
sanitizedNames atomic.Value
|
||||
sanitizedNamesLen uint64
|
||||
|
||||
unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`)
|
||||
)
|
||||
|
||||
func init() {
|
||||
sanitizedNames.Store(&sync.Map{})
|
||||
}
|
||||
|
|
|
@ -7,6 +7,22 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func TestSanitizeName(t *testing.T) {
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
for i := 0; i < 5; i++ {
|
||||
result := SanitizeName(s)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for SanitizeName(%q) at iteration %d; got %q; want %q", s, i, result, resultExpected)
|
||||
}
|
||||
}
|
||||
}
|
||||
f("", "")
|
||||
f("a", "a")
|
||||
f("foo.bar/baz:a", "foo_bar_baz:a")
|
||||
f("foo...bar", "foo___bar")
|
||||
}
|
||||
|
||||
func TestLabelsToString(t *testing.T) {
|
||||
f := func(labels []prompbmarshal.Label, sExpected string) {
|
||||
t.Helper()
|
||||
|
|
|
@ -8,6 +8,27 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func BenchmarkSanitizeName(b *testing.B) {
|
||||
for _, name := range []string{"", "foo", "foo-bar-baz", "http_requests_total"} {
|
||||
b.Run(name, func(b *testing.B) {
|
||||
benchmarkSanitizeName(b, name)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkSanitizeName(b *testing.B, name string) {
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(1)
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
sanitizedName := SanitizeName(name)
|
||||
GlobalSink += len(sanitizedName)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
var GlobalSink int
|
||||
|
||||
func BenchmarkMatchRegexPrefixDotPlusMatchOptimized(b *testing.B) {
|
||||
const pattern = "^foo.+$"
|
||||
const s = "foobar"
|
||||
|
|
Loading…
Reference in a new issue