mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/protoparser/datadog: sanitize metric names by default in the same way as DataDog does
This commit is based on the pull request https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3105 Thanks to @PerGon for the idea and initial implementation.
This commit is contained in:
parent
fcffdba9dc
commit
7c2474dac7
9 changed files with 69 additions and 2 deletions
|
@ -1997,6 +1997,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
-dedup.minScrapeInterval duration
|
||||
Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling
|
||||
-deleteAuthKey string
|
||||
|
|
|
@ -915,6 +915,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
|||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
-denyQueryTracing
|
||||
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
|
||||
-dryRun
|
||||
|
|
|
@ -19,6 +19,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
|||
|
||||
**Update note 2:** [vmalert](https://docs.victoriametrics.com/vmalert.html) changes default value for command-line flag `-datasource.queryStep` from `0s` to `5m`. The change supposed to improve reliability of the rules evaluation when evaluation interval is lower than scraping interval.
|
||||
|
||||
* FEATURE: sanitize metric names for data ingested via [DataDog protocol](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) according to [DataDog metric naming](https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics). The behaviour can be disabled by passing `-datadog.sanitizeMetricName=false` command-line flag. Thanks to @PerGon for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3105).
|
||||
* FEATURE: add `-usePromCompatibleNaming` command-line flag to [vmagent](https://docs.victoriametrics.com/vmagent.html), to single-node VictoriaMetrics and to `vminsert` component of VictoriaMetrics cluster. This flag can be used for normalizing the ingested metric names and label names to [Prometheus-compatible form](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). If this flag is set, then all the chars unsupported by Prometheus are replaced with `_` chars in metric names and labels of the ingested samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113).
|
||||
* FEATURE: check the correctess of raw sample timestamps stored on disk when reading them. This reduces the probability of possible silent corruption of the data stored on disk. This should help [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2998) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3011).
|
||||
* FEATURE: atomically delete directories with snapshots, parts and partitions at [storage level](https://docs.victoriametrics.com/#storage). Previously such directories can be left in partially deleted state when the deletion operation was interrupted by unclean shutdown. This may result in `cannot open file ...: no such file or directory` error on the next start. The probability of this error was quite high when NFS or EFS was used as persistent storage for VictoriaMetrics data. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3038).
|
||||
|
|
|
@ -639,6 +639,8 @@ Below is the output for `/path/to/vminsert -help`:
|
|||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
-denyQueryTracing
|
||||
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
|
||||
-disableRerouting
|
||||
|
|
|
@ -1997,6 +1997,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
-dedup.minScrapeInterval duration
|
||||
Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling
|
||||
-deleteAuthKey string
|
||||
|
|
|
@ -2001,6 +2001,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
-dedup.minScrapeInterval duration
|
||||
Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling
|
||||
-deleteAuthKey string
|
||||
|
|
|
@ -919,6 +919,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
|||
-datadog.maxInsertRequestSize size
|
||||
The maximum size in bytes of a single DataDog POST request to /api/v1/series
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
|
||||
-datadog.sanitizeMetricName
|
||||
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
|
||||
-denyQueryTracing
|
||||
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
|
||||
-dryRun
|
||||
|
|
|
@ -2,8 +2,10 @@ package datadog
|
|||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
|
@ -14,8 +16,18 @@ import (
|
|||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// The maximum request size is defined at https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
var maxInsertRequestSize = flagutil.NewBytes("datadog.maxInsertRequestSize", 64*1024*1024, "The maximum size in bytes of a single DataDog POST request to /api/v1/series")
|
||||
var (
|
||||
// The maximum request size is defined at https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
|
||||
maxInsertRequestSize = flagutil.NewBytes("datadog.maxInsertRequestSize", 64*1024*1024, "The maximum size in bytes of a single DataDog POST request to /api/v1/series")
|
||||
|
||||
// If all metrics in Datadog have the same naming schema as custom metrics, then the following rules apply:
|
||||
// https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
|
||||
// But there's some hidden behaviour. In addition to what it states in the docs, the following is also done:
|
||||
// - Consecutive underscores are replaced with just one underscore
|
||||
// - Underscore immediately before or after a dot are removed
|
||||
sanitizeMetricName = flag.Bool("datadog.sanitizeMetricName", true, "Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at "+
|
||||
"https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics")
|
||||
)
|
||||
|
||||
// ParseStream parses DataDog POST request for /api/v1/series from reader and calls callback for the parsed request.
|
||||
//
|
||||
|
@ -52,6 +64,9 @@ func ParseStream(r io.Reader, contentEncoding string, callback func(series []Ser
|
|||
series := req.Series
|
||||
for i := range series {
|
||||
rows += len(series[i].Points)
|
||||
if *sanitizeMetricName {
|
||||
series[i].Metric = sanitizeName(series[i].Metric)
|
||||
}
|
||||
}
|
||||
rowsRead.Add(rows)
|
||||
|
||||
|
@ -136,3 +151,19 @@ func putRequest(req *Request) {
|
|||
}
|
||||
|
||||
var requestPool sync.Pool
|
||||
|
||||
// sanitizeName performs DataDog-compatible santizing for metric names
|
||||
//
|
||||
// See https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
|
||||
func sanitizeName(s string) string {
|
||||
s = unsupportedDatadogChars.ReplaceAllString(s, "_")
|
||||
s = multiUnderscores.ReplaceAllString(s, "_")
|
||||
s = underscoresWithDots.ReplaceAllString(s, ".")
|
||||
return s
|
||||
}
|
||||
|
||||
var (
|
||||
unsupportedDatadogChars = regexp.MustCompile(`[^0-9a-zA-Z_\.]+`)
|
||||
multiUnderscores = regexp.MustCompile(`_+`)
|
||||
underscoresWithDots = regexp.MustCompile(`_?\._?`)
|
||||
)
|
||||
|
|
23
lib/protoparser/datadog/streamparser_test.go
Normal file
23
lib/protoparser/datadog/streamparser_test.go
Normal file
|
@ -0,0 +1,23 @@
|
|||
package datadog
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSanitizeName(t *testing.T) {
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
result := sanitizeName(s)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result for sanitizeName(%q); got\n%q\nwant\n%q", s, result, resultExpected)
|
||||
}
|
||||
}
|
||||
f("before.dot.metric!.name", "before.dot.metric.name")
|
||||
f("after.dot.metric.!name", "after.dot.metric.name")
|
||||
f("in.the.middle.met!ric.name", "in.the.middle.met_ric.name")
|
||||
f("before.and.after.and.middle.met!ric!.!name", "before.and.after.and.middle.met_ric.name")
|
||||
f("many.consecutive.met!!!!ric!!.!!name", "many.consecutive.met_ric.name")
|
||||
f("many.non.consecutive.m!e!t!r!i!c!.!name", "many.non.consecutive.m_e_t_r_i_c.name")
|
||||
f("how.about.underscores_.!_metric!_!.__!!name", "how.about.underscores.metric.name")
|
||||
f("how.about.underscores.middle.met!_!_ric.name", "how.about.underscores.middle.met_ric.name")
|
||||
}
|
Loading…
Reference in a new issue