From 476faf5578dee90e87296eda3bfdce4d14da3ad6 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Tue, 2 Jul 2024 15:56:41 +0300 Subject: [PATCH] lib/protoparser/graphite: added -graphite.sanitizeMetricName flag (#6489) ### Describe Your Changes Added flag to sanitize graphite metrics fixes #6077 ### Checklist The following checks are **mandatory**: - [ ] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). --------- Signed-off-by: hagen1778 Co-authored-by: hagen1778 --- README.md | 9 ++++++ docs/CHANGELOG.md | 1 + docs/README.md | 9 ++++++ docs/Single-server-VictoriaMetrics.md | 9 ++++++ lib/protoparser/graphite/parser.go | 29 +++++++++++++++++ lib/protoparser/graphite/parser_test.go | 42 +++++++++++++++++++------ 6 files changed, 89 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 853b21a94..7d4799bf2 100644 --- a/README.md +++ b/README.md @@ -774,6 +774,13 @@ Example for writing data with Graphite plaintext protocol to local VictoriaMetri echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003 ``` +To sanitize ingested metric names and labels according to Prometheus naming convention enable +`-graphite.sanitizeMetricName` cmd-line flag. When enabled, VictoriaMetrics will apply the following modifications: +- replace `/`,`@`,`*` with `_`; +- drop `\`; +- remove redundant dots, e.g: `metric..name` => `metric.name`; +- replace characters not matching the expression `^a-zA-Z0-9:._` with `_`. + VictoriaMetrics sets the current time if the timestamp is omitted. An arbitrary number of lines delimited by `\n` (aka newline char) can be sent in one go. After that the data may be read via [/api/v1/export](#how-to-export-data-in-json-line-format) endpoint: @@ -2836,6 +2843,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Flag value can be read from the given file when using -forceMergeAuthKey=file:///abs/path/to/file or -forceMergeAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -forceMergeAuthKey=http://host/path or -forceMergeAuthKey=https://host/path -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() + -graphite.sanitizeMetricName + Sanitize metric names for the ingested Graphite data. See https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd -graphiteListenAddr string TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol -graphiteListenAddr.useProxyProtocol diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c9e5d33e2..0dd34b67a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -43,6 +43,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * `vm_streamaggr_stale_samples_total` - shows the number of time series that became [stale](https://docs.victoriametrics.com/stream-aggregation/#staleness) during aggregation; * metrics related to stream aggregation got additional labels `match` (matching param), `group` (`by` or `without` param), `url` (address of `remoteWrite.url` where aggregation is applied), `position` (the position of the aggregation rule in config file). * These and other metrics were reflected on the [vmagent dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vmagent.json) in `stream aggregation` section. +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [Single-node VictoriaMetrics](https://docs.victoriametrics.com/): add `-graphite.sanitizeMetricName` cmd-line flag for sanitizing metrics ingested via [Graphite protocol](https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6077). * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): do not retry RPC calls to vmstorage nodes if [complexity limits](https://docs.victoriametrics.com/#resource-usage-limits) were exceeded. * BUGFIX: [docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#docker-compose-environment-for-victoriametrics): fix incorrect link to vmui from [VictoriaMetrics plugin in Grafana](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#grafana). diff --git a/docs/README.md b/docs/README.md index ac8bec4d0..406e54e26 100644 --- a/docs/README.md +++ b/docs/README.md @@ -777,6 +777,13 @@ Example for writing data with Graphite plaintext protocol to local VictoriaMetri echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003 ``` +To sanitize ingested metric names and labels according to Prometheus naming convention enable +`-graphite.sanitizeMetricName` cmd-line flag. When enabled, VictoriaMetrics will apply the following modifications: +- replace `/`,`@`,`*` with `_`; +- drop `\`; +- remove redundant dots, e.g: `metric..name` => `metric.name`; +- replace characters not matching the expression `^a-zA-Z0-9:._` with `_`. + VictoriaMetrics sets the current time if the timestamp is omitted. An arbitrary number of lines delimited by `\n` (aka newline char) can be sent in one go. After that the data may be read via [/api/v1/export](#how-to-export-data-in-json-line-format) endpoint: @@ -2839,6 +2846,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Flag value can be read from the given file when using -forceMergeAuthKey=file:///abs/path/to/file or -forceMergeAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -forceMergeAuthKey=http://host/path or -forceMergeAuthKey=https://host/path -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() + -graphite.sanitizeMetricName + Sanitize metric names for the ingested Graphite data. See https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd -graphiteListenAddr string TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol -graphiteListenAddr.useProxyProtocol diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 97b8108a1..09e35c8ef 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -785,6 +785,13 @@ Example for writing data with Graphite plaintext protocol to local VictoriaMetri echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003 ``` +To sanitize ingested metric names and labels according to Prometheus naming convention enable +`-graphite.sanitizeMetricName` cmd-line flag. When enabled, VictoriaMetrics will apply the following modifications: +- replace `/`,`@`,`*` with `_`; +- drop `\`; +- remove redundant dots, e.g: `metric..name` => `metric.name`; +- replace characters not matching the expression `^a-zA-Z0-9:._` with `_`. + VictoriaMetrics sets the current time if the timestamp is omitted. An arbitrary number of lines delimited by `\n` (aka newline char) can be sent in one go. After that the data may be read via [/api/v1/export](#how-to-export-data-in-json-line-format) endpoint: @@ -2847,6 +2854,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Flag value can be read from the given file when using -forceMergeAuthKey=file:///abs/path/to/file or -forceMergeAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -forceMergeAuthKey=http://host/path or -forceMergeAuthKey=https://host/path -fs.disableMmap Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread() + -graphite.sanitizeMetricName + Sanitize metric names for the ingested Graphite data. See https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd -graphiteListenAddr string TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty. See also -graphiteListenAddr.useProxyProtocol -graphiteListenAddr.useProxyProtocol diff --git a/lib/protoparser/graphite/parser.go b/lib/protoparser/graphite/parser.go index 1198da50c..1bf524239 100644 --- a/lib/protoparser/graphite/parser.go +++ b/lib/protoparser/graphite/parser.go @@ -1,14 +1,22 @@ package graphite import ( + "flag" "fmt" + "regexp" "strings" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/metrics" "github.com/valyala/fastjson/fastfloat" ) +var ( + sanitizeMetricName = flag.Bool("graphite.sanitizeMetricName", false, "Sanitize metric names for the ingested Graphite data. "+ + "See https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd") +) + // graphite text line protocol may use white space or tab as separator // See https://github.com/grobian/carbon-c-relay/commit/f3ffe6cc2b52b07d14acbda649ad3fd6babdd528 const graphiteSeparators = " \t" @@ -76,6 +84,9 @@ func (r *Row) UnmarshalMetricAndTags(s string, tagsPool []Tag) ([]Tag, error) { if len(r.Metric) == 0 { return tagsPool, fmt.Errorf("metric cannot be empty") } + if *sanitizeMetricName { + r.Metric = sanitizer.Transform(r.Metric) + } return tagsPool, nil } @@ -202,6 +213,9 @@ func (t *Tag) reset() { func (t *Tag) unmarshal(s string) { t.reset() + if *sanitizeMetricName { + s = sanitizer.Transform(s) + } n := strings.IndexByte(s, '=') if n < 0 { // Empty tag value. @@ -240,3 +254,18 @@ func stripLeadingWhitespace(s string) string { } return "" } + +var sanitizer = bytesutil.NewFastStringTransformer(func(s string) string { + // Apply rule to drop some chars to preserve backwards compatibility + s = dropChars.Replace(s) + // Replace any remaining illegal chars + return allowedChars.ReplaceAllLiteralString(s, "_") +}) + +var ( + dropChars = strings.NewReplacer( + `\`, "", + "..", ".", + ) + allowedChars = regexp.MustCompile(`[^a-zA-Z0-9:._=\p{L}]`) +) diff --git a/lib/protoparser/graphite/parser_test.go b/lib/protoparser/graphite/parser_test.go index f4e6cce7b..f93fc7889 100644 --- a/lib/protoparser/graphite/parser_test.go +++ b/lib/protoparser/graphite/parser_test.go @@ -19,6 +19,8 @@ func TestUnmarshalMetricAndTagsFailure(t *testing.T) { } func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { + sanitizeFlagValue := *sanitizeMetricName + *sanitizeMetricName = true f := func(s string, rExpected *Row) { t.Helper() var r Row @@ -31,10 +33,10 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { } } f(" ", &Row{ - Metric: " ", + Metric: "_", }) f("foo ;bar=baz", &Row{ - Metric: "foo ", + Metric: "foo_", Tags: []Tag{ { Key: "bar", @@ -43,7 +45,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { }, }) f("f oo;bar=baz", &Row{ - Metric: "f oo", + Metric: "f_oo", Tags: []Tag{ { Key: "bar", @@ -56,7 +58,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { Tags: []Tag{ { Key: "bar", - Value: "baz ", + Value: "baz___", }, }, }) @@ -65,7 +67,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { Tags: []Tag{ { Key: "bar", - Value: " baz", + Value: "_baz", }, }, }) @@ -74,7 +76,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { Tags: []Tag{ { Key: "bar", - Value: "b az", + Value: "b_az", }, }, }) @@ -82,7 +84,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { Metric: "foo", Tags: []Tag{ { - Key: "b ar", + Key: "b_ar", Value: "baz", }, }, @@ -103,9 +105,25 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { }, }, }) + f("foo..bar;bar=123;baz=aa=bb", &Row{ + Metric: "foo.bar", + Tags: []Tag{ + { + Key: "bar", + Value: "123", + }, + { + Key: "baz", + Value: "aa=bb", + }, + }, + }) + *sanitizeMetricName = sanitizeFlagValue } func TestRowsUnmarshalFailure(t *testing.T) { + sanitizeFlagValue := *sanitizeMetricName + *sanitizeMetricName = true f := func(s string) { t.Helper() var rows Rows @@ -129,9 +147,12 @@ func TestRowsUnmarshalFailure(t *testing.T) { // invalid timestamp f("aa 123 bar") + *sanitizeMetricName = sanitizeFlagValue } func TestRowsUnmarshalSuccess(t *testing.T) { + sanitizeFlagValue := *sanitizeMetricName + *sanitizeMetricName = true f := func(s string, rowsExpected *Rows) { t.Helper() var rows Rows @@ -184,17 +205,17 @@ func TestRowsUnmarshalSuccess(t *testing.T) { // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3102 f("s a;ta g1=aaa1;tag2=bb b2;tag3 1 23", &Rows{ Rows: []Row{{ - Metric: "s a", + Metric: "s_a", Value: 1, Timestamp: 23, Tags: []Tag{ { - Key: "ta g1", + Key: "ta_g1", Value: "aaa1", }, { Key: "tag2", - Value: "bb b2", + Value: "bb_b2", }, }, }}, @@ -379,4 +400,5 @@ func TestRowsUnmarshalSuccess(t *testing.T) { Timestamp: 1789, }}, }) + *sanitizeMetricName = sanitizeFlagValue }