From fbde238cdcdf4e2c892d85a3e9e2be6e54e69cef Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 27 Sep 2024 18:21:09 +0800 Subject: [PATCH] =?UTF-8?q?stream=20aggregation:=20support=20configuring?= =?UTF-8?q?=20multiple=20labels=20per=20`remoteWrite=E2=80=A6=20(#7073)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ….url` using `-remoteWrite.streamAggr.dropInputLabels` Before, labels were set to all the `remoteWrite.url`. address https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6780 --------- Co-authored-by: Roman Khavronenko --- app/vmagent/remotewrite/streamaggr.go | 14 ++++++++++++-- docs/changelog/CHANGELOG.md | 1 + docs/vmagent.md | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/app/vmagent/remotewrite/streamaggr.go b/app/vmagent/remotewrite/streamaggr.go index 8bdc8c3a7..6391cfa3f 100644 --- a/app/vmagent/remotewrite/streamaggr.go +++ b/app/vmagent/remotewrite/streamaggr.go @@ -3,6 +3,7 @@ package remotewrite import ( "flag" "fmt" + "strings" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" @@ -56,6 +57,7 @@ var ( "See https://docs.victoriametrics.com/stream-aggregation/#ignore-aggregation-intervals-on-start") streamAggrDropInputLabels = flagutil.NewArrayString("remoteWrite.streamAggr.dropInputLabels", "An optional list of labels to drop from samples "+ "before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. "+ + "Multiple labels per remoteWrite.url must be delimited by '^^': -remoteWrite.streamAggr.dropInputLabels='replica^^az,replica'. "+ "See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels") ) @@ -155,7 +157,11 @@ func (rwctx *remoteWriteCtx) initStreamAggrConfig() { dedupInterval := streamAggrDedupInterval.GetOptionalArg(idx) if dedupInterval > 0 { alias := fmt.Sprintf("dedup-%d", idx+1) - rwctx.deduplicator = streamaggr.NewDeduplicator(rwctx.pushInternalTrackDropped, dedupInterval, *streamAggrDropInputLabels, alias) + var dropLabels []string + if streamAggrDropInputLabels.GetOptionalArg(idx) != "" { + dropLabels = strings.Split(streamAggrDropInputLabels.GetOptionalArg(idx), "^^") + } + rwctx.deduplicator = streamaggr.NewDeduplicator(rwctx.pushInternalTrackDropped, dedupInterval, dropLabels, alias) } } @@ -224,9 +230,13 @@ func newStreamAggrConfigPerURL(idx int, pushFunc streamaggr.PushFunc) (*streamag if *showRemoteWriteURL { alias = fmt.Sprintf("%d:%s", idx+1, remoteWriteURLs.GetOptionalArg(idx)) } + var dropLabels []string + if streamAggrDropInputLabels.GetOptionalArg(idx) != "" { + dropLabels = strings.Split(streamAggrDropInputLabels.GetOptionalArg(idx), "^^") + } opts := &streamaggr.Options{ DedupInterval: streamAggrDedupInterval.GetOptionalArg(idx), - DropInputLabels: *streamAggrDropInputLabels, + DropInputLabels: dropLabels, IgnoreOldSamples: streamAggrIgnoreOldSamples.GetOptionalArg(idx), IgnoreFirstIntervals: streamAggrIgnoreFirstIntervals.GetOptionalArg(idx), KeepInput: streamAggrKeepInput.GetOptionalArg(idx), diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md index 54dcf1c14..667bbe195 100644 --- a/docs/changelog/CHANGELOG.md +++ b/docs/changelog/CHANGELOG.md @@ -35,6 +35,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/) fix service discovery of Azure Virtual Machines for response contains `nextLink` in `Host:Port` format. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6912). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/): properly consume messages [from kafka](https://docs.victoriametrics.com/vmagent/#kafka-integration). Previously vmagent could skip some messages during start-up. * BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): perform deduplication for all received data when specifying `-streamAggr.dedupInterval` or `-remoteWrite.streamAggr.dedupInterval` command-line flags are set. Previously, if the `-remoteWrite.streamAggr.config` or `-streamAggr.config` is set, only series that matched aggregation config were deduplicated. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6711#issuecomment-2288361213) for details. +* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): fix `-remoteWrite.streamAggr.dropInputLabels` labels parsing. Now, this flag allows specifying a list of labels to drop (by using '^^' separator, i.e. `dropInputLabels='replica^^az,replica'`) per each corresponding `remoteWrite.url`. Before, `-remoteWrite.streamAggr.dropInputLabels` labels were incorrectly applied to all configured `remoteWrite.url`s. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6780) for the details. * BUGFIX: [vmagent dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vmagent.json): fix legend captions for stream aggregation related panels. Before they were displaying wrong label names. * BUGFIX: [vmgateway](https://docs.victoriametrics.com/vmgateway/): add missing `datadog`, `newrelic`, `opentelemetry` and `pushgateway` routes to the `JWT` authorization routes. Allows prefixed (`promtheus/graphite`) routes for query requests. * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): properly cache empty list of matching time series for the given [labels filter](https://docs.victoriametrics.com/keyconcepts/#filtering). This type of caching was broken since [v1.97.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.97.0), which could result in the increased CPU usage when performing queries, which match zero time series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7009). diff --git a/docs/vmagent.md b/docs/vmagent.md index 9ccbce4f7..4f5d2e900 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -2215,7 +2215,7 @@ See the docs at https://docs.victoriametrics.com/vmagent/ . Supports array of values separated by comma or specified via multiple flags. Empty values are set to false. -remoteWrite.streamAggr.dropInputLabels array - An optional list of labels to drop from samples before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels + An optional list of labels to drop from samples before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. Multiple labels per remoteWrite.url must be delimited by '^^': -remoteWrite.streamAggr.dropInputLabels='replica^^az,replica'. See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -remoteWrite.streamAggr.ignoreFirstIntervals array