stream aggregation: support configuring multiple labels per `remoteWrite… (#7073)

….url` using `-remoteWrite.streamAggr.dropInputLabels`

Before, labels were set to all the `remoteWrite.url`.

address https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6780

---------

Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
This commit is contained in:
Hui Wang 2024-09-27 18:21:09 +08:00 committed by GitHub
parent c896bf340d
commit fbde238cdc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 14 additions and 3 deletions

View file

@ -3,6 +3,7 @@ package remotewrite
import ( import (
"flag" "flag"
"fmt" "fmt"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
@ -56,6 +57,7 @@ var (
"See https://docs.victoriametrics.com/stream-aggregation/#ignore-aggregation-intervals-on-start") "See https://docs.victoriametrics.com/stream-aggregation/#ignore-aggregation-intervals-on-start")
streamAggrDropInputLabels = flagutil.NewArrayString("remoteWrite.streamAggr.dropInputLabels", "An optional list of labels to drop from samples "+ streamAggrDropInputLabels = flagutil.NewArrayString("remoteWrite.streamAggr.dropInputLabels", "An optional list of labels to drop from samples "+
"before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. "+ "before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. "+
"Multiple labels per remoteWrite.url must be delimited by '^^': -remoteWrite.streamAggr.dropInputLabels='replica^^az,replica'. "+
"See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels") "See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels")
) )
@ -155,7 +157,11 @@ func (rwctx *remoteWriteCtx) initStreamAggrConfig() {
dedupInterval := streamAggrDedupInterval.GetOptionalArg(idx) dedupInterval := streamAggrDedupInterval.GetOptionalArg(idx)
if dedupInterval > 0 { if dedupInterval > 0 {
alias := fmt.Sprintf("dedup-%d", idx+1) alias := fmt.Sprintf("dedup-%d", idx+1)
rwctx.deduplicator = streamaggr.NewDeduplicator(rwctx.pushInternalTrackDropped, dedupInterval, *streamAggrDropInputLabels, alias) var dropLabels []string
if streamAggrDropInputLabels.GetOptionalArg(idx) != "" {
dropLabels = strings.Split(streamAggrDropInputLabels.GetOptionalArg(idx), "^^")
}
rwctx.deduplicator = streamaggr.NewDeduplicator(rwctx.pushInternalTrackDropped, dedupInterval, dropLabels, alias)
} }
} }
@ -224,9 +230,13 @@ func newStreamAggrConfigPerURL(idx int, pushFunc streamaggr.PushFunc) (*streamag
if *showRemoteWriteURL { if *showRemoteWriteURL {
alias = fmt.Sprintf("%d:%s", idx+1, remoteWriteURLs.GetOptionalArg(idx)) alias = fmt.Sprintf("%d:%s", idx+1, remoteWriteURLs.GetOptionalArg(idx))
} }
var dropLabels []string
if streamAggrDropInputLabels.GetOptionalArg(idx) != "" {
dropLabels = strings.Split(streamAggrDropInputLabels.GetOptionalArg(idx), "^^")
}
opts := &streamaggr.Options{ opts := &streamaggr.Options{
DedupInterval: streamAggrDedupInterval.GetOptionalArg(idx), DedupInterval: streamAggrDedupInterval.GetOptionalArg(idx),
DropInputLabels: *streamAggrDropInputLabels, DropInputLabels: dropLabels,
IgnoreOldSamples: streamAggrIgnoreOldSamples.GetOptionalArg(idx), IgnoreOldSamples: streamAggrIgnoreOldSamples.GetOptionalArg(idx),
IgnoreFirstIntervals: streamAggrIgnoreFirstIntervals.GetOptionalArg(idx), IgnoreFirstIntervals: streamAggrIgnoreFirstIntervals.GetOptionalArg(idx),
KeepInput: streamAggrKeepInput.GetOptionalArg(idx), KeepInput: streamAggrKeepInput.GetOptionalArg(idx),

View file

@ -35,6 +35,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/) fix service discovery of Azure Virtual Machines for response contains `nextLink` in `Host:Port` format. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6912). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/) fix service discovery of Azure Virtual Machines for response contains `nextLink` in `Host:Port` format. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6912).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/): properly consume messages [from kafka](https://docs.victoriametrics.com/vmagent/#kafka-integration). Previously vmagent could skip some messages during start-up. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/): properly consume messages [from kafka](https://docs.victoriametrics.com/vmagent/#kafka-integration). Previously vmagent could skip some messages during start-up.
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): perform deduplication for all received data when specifying `-streamAggr.dedupInterval` or `-remoteWrite.streamAggr.dedupInterval` command-line flags are set. Previously, if the `-remoteWrite.streamAggr.config` or `-streamAggr.config` is set, only series that matched aggregation config were deduplicated. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6711#issuecomment-2288361213) for details. * BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): perform deduplication for all received data when specifying `-streamAggr.dedupInterval` or `-remoteWrite.streamAggr.dedupInterval` command-line flags are set. Previously, if the `-remoteWrite.streamAggr.config` or `-streamAggr.config` is set, only series that matched aggregation config were deduplicated. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6711#issuecomment-2288361213) for details.
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): fix `-remoteWrite.streamAggr.dropInputLabels` labels parsing. Now, this flag allows specifying a list of labels to drop (by using '^^' separator, i.e. `dropInputLabels='replica^^az,replica'`) per each corresponding `remoteWrite.url`. Before, `-remoteWrite.streamAggr.dropInputLabels` labels were incorrectly applied to all configured `remoteWrite.url`s. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6780) for the details.
* BUGFIX: [vmagent dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vmagent.json): fix legend captions for stream aggregation related panels. Before they were displaying wrong label names. * BUGFIX: [vmagent dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vmagent.json): fix legend captions for stream aggregation related panels. Before they were displaying wrong label names.
* BUGFIX: [vmgateway](https://docs.victoriametrics.com/vmgateway/): add missing `datadog`, `newrelic`, `opentelemetry` and `pushgateway` routes to the `JWT` authorization routes. Allows prefixed (`promtheus/graphite`) routes for query requests. * BUGFIX: [vmgateway](https://docs.victoriametrics.com/vmgateway/): add missing `datadog`, `newrelic`, `opentelemetry` and `pushgateway` routes to the `JWT` authorization routes. Allows prefixed (`promtheus/graphite`) routes for query requests.
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): properly cache empty list of matching time series for the given [labels filter](https://docs.victoriametrics.com/keyconcepts/#filtering). This type of caching was broken since [v1.97.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.97.0), which could result in the increased CPU usage when performing queries, which match zero time series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7009). * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): properly cache empty list of matching time series for the given [labels filter](https://docs.victoriametrics.com/keyconcepts/#filtering). This type of caching was broken since [v1.97.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.97.0), which could result in the increased CPU usage when performing queries, which match zero time series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7009).

View file

@ -2215,7 +2215,7 @@ See the docs at https://docs.victoriametrics.com/vmagent/ .
Supports array of values separated by comma or specified via multiple flags. Supports array of values separated by comma or specified via multiple flags.
Empty values are set to false. Empty values are set to false.
-remoteWrite.streamAggr.dropInputLabels array -remoteWrite.streamAggr.dropInputLabels array
An optional list of labels to drop from samples before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels An optional list of labels to drop from samples before stream de-duplication and aggregation with -remoteWrite.streamAggr.config and -remoteWrite.streamAggr.dedupInterval at the corresponding -remoteWrite.url. Multiple labels per remoteWrite.url must be delimited by '^^': -remoteWrite.streamAggr.dropInputLabels='replica^^az,replica'. See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels
Supports an array of values separated by comma or specified via multiple flags. Supports an array of values separated by comma or specified via multiple flags.
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
-remoteWrite.streamAggr.ignoreFirstIntervals array -remoteWrite.streamAggr.ignoreFirstIntervals array