From 0e1f0ade314f2c1effe9dccdc72c7a993cef25c8 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 5 Jan 2023 02:08:24 -0800 Subject: [PATCH] lib/streamaggr: sort `by` and `without` labels in the aggregate output metric name Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 --- docs/stream-aggregation.md | 16 ++++++----- go.mod | 3 +-- lib/streamaggr/streamaggr.go | 20 ++++++++++++-- lib/streamaggr/streamaggr_test.go | 45 ++++++++++++++++++++++--------- 4 files changed, 62 insertions(+), 22 deletions(-) diff --git a/docs/stream-aggregation.md b/docs/stream-aggregation.md index 50234ae8d..11a0327ab 100644 --- a/docs/stream-aggregation.md +++ b/docs/stream-aggregation.md @@ -132,7 +132,7 @@ In this case the following stream aggregation can be used for reducing the numbe outputs: [total] ``` -This config specifies labels, which must be removed from the aggregate outpit, in the `without` list. +This config specifies labels, which must be removed from the aggregate output, in the `without` list. See [these docs](#aggregating-by-labels) for more details. The aggregated output metric has the following name according to [output metric naming](#output-metric-names): @@ -296,9 +296,9 @@ Output metric names for stream aggregation are constructed according to the foll - `` is the original metric name. - `` is the interval specified in the [stream aggregation config](#stream-aggregation-config). -- `` is `_`-delimited list of `by` labels specified in the [stream aggregation config](#stream-aggregation-config). +- `` is `_`-delimited sorted list of `by` labels specified in the [stream aggregation config](#stream-aggregation-config). If the `by` list is missing in the config, then the `_by_` part isn't included in the output metric name. -- `` is an optional `_`-delimited list of `without` labels specified in the [stream aggregation config](#stream-aggregation-config). +- `` is an optional `_`-delimited sorted list of `without` labels specified in the [stream aggregation config](#stream-aggregation-config). If the `without` list is missing in the config, then the `_without_` part isn't included in the output metric name. - `` is the aggregate used for constucting the output metric. The aggregate name is taken from the `outputs` list at the corresponding [stream aggregation config](#stream-aggregation-config). @@ -324,7 +324,7 @@ For example, the following config removes the `:1m_sum_samples` suffix added [to ## Aggregation outputs -The following aggregation outputs are supported in the `outputs` list of the [stream aggregation config](#stream-aggregation-config): +The following aggregation outputs can be put in the `outputs` list at [stream aggregation config](#stream-aggregation-config): * `total` generates output [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) by summing the input counters. The `total` handler properly handles input counter resets. @@ -375,7 +375,7 @@ removes the `instance` label from output metrics by summing input samples across ``` In this case the `foo{app="bar",instance="..."}` input metrics are transformed into `foo:1m_without_instance_sum_samples{app="bar"}` -output metric. +output metric according to [output metric naming](#output-metric-names). It is possible specifying the exact list of labels in the output metrics via `by` list. For example, the following config sums input samples by the `app` label: @@ -387,7 +387,11 @@ For example, the following config sums input samples by the `app` label: ``` In this case the `foo{app="bar",instance="..."}` input metrics are transformed into `foo:1m_by_app_sum_samples{app="bar"}` -output metric. +output metric according to [output metric naming](#output-metric-names). + +The labels used in `by` and `without` lists can be modified via `input_relabel_configs` section - see [these docs](#relabeling). + +See also [aggregation outputs](#aggregation-outputs). ## Stream aggregation config diff --git a/go.mod b/go.mod index eee96d5a5..30af61090 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,7 @@ require ( github.com/valyala/fastrand v1.1.0 github.com/valyala/fasttemplate v1.2.2 github.com/valyala/gozstd v1.17.0 + github.com/valyala/histogram v1.2.0 github.com/valyala/quicktemplate v1.7.0 golang.org/x/net v0.4.0 golang.org/x/oauth2 v0.3.0 @@ -38,8 +39,6 @@ require ( gopkg.in/yaml.v2 v2.4.0 ) -require github.com/valyala/histogram v1.2.0 - require ( cloud.google.com/go v0.107.0 // indirect cloud.google.com/go/compute v1.14.0 // indirect diff --git a/lib/streamaggr/streamaggr.go b/lib/streamaggr/streamaggr.go index ff9bab6a6..d963b86ff 100644 --- a/lib/streamaggr/streamaggr.go +++ b/lib/streamaggr/streamaggr.go @@ -3,6 +3,7 @@ package streamaggr import ( "fmt" "math" + "sort" "strconv" "strings" "sync" @@ -225,8 +226,8 @@ func newAggregator(cfg *Config, pushFunc PushFunc) (*aggregator, error) { } // check by and without lists - by := cfg.By - without := cfg.Without + by := sortAndRemoveDuplicates(cfg.By) + without := sortAndRemoveDuplicates(cfg.Without) if len(by) > 0 && len(without) > 0 { return nil, fmt.Errorf("`by: %s` and `without: %s` lists cannot be set simultaneously", by, without) } @@ -639,3 +640,18 @@ func removeUnderscoreName(labels []string) []string { } return result } + +func sortAndRemoveDuplicates(a []string) []string { + if len(a) == 0 { + return nil + } + a = append([]string{}, a...) + sort.Strings(a) + dst := a[:1] + for _, v := range a[1:] { + if v != dst[len(dst)-1] { + dst = append(dst, v) + } + } + return dst +} diff --git a/lib/streamaggr/streamaggr_test.go b/lib/streamaggr/streamaggr_test.go index bc2a11388..672032e45 100644 --- a/lib/streamaggr/streamaggr_test.go +++ b/lib/streamaggr/streamaggr_test.go @@ -191,7 +191,7 @@ foo:1m_sum_samples{abc="123"} 12.5 foo:1m_sum_samples{abc="456",de="fg"} 8 `) - // Special case: __name__ in by list + // Special case: __name__ in `by` list - this is the same as empty `by` list f(` - interval: 1m by: [__name__] @@ -209,7 +209,7 @@ foo:1m_count_series 2 foo:1m_sum_samples 20.5 `) - // Non-empty by list with non-existing labels + // Non-empty `by` list with non-existing labels f(` - interval: 1m by: [foo, bar] @@ -219,15 +219,15 @@ foo{abc="123"} 4 bar 5 foo{abc="123"} 8.5 foo{abc="456",de="fg"} 8 -`, `bar:1m_by_foo_bar_count_samples 1 -bar:1m_by_foo_bar_count_series 1 -bar:1m_by_foo_bar_sum_samples 5 -foo:1m_by_foo_bar_count_samples 3 -foo:1m_by_foo_bar_count_series 2 -foo:1m_by_foo_bar_sum_samples 20.5 +`, `bar:1m_by_bar_foo_count_samples 1 +bar:1m_by_bar_foo_count_series 1 +bar:1m_by_bar_foo_sum_samples 5 +foo:1m_by_bar_foo_count_samples 3 +foo:1m_by_bar_foo_count_series 2 +foo:1m_by_bar_foo_sum_samples 20.5 `) - // Non-empty by list with existing label + // Non-empty `by` list with existing label f(` - interval: 1m by: [abc] @@ -248,7 +248,28 @@ foo:1m_by_abc_sum_samples{abc="123"} 12.5 foo:1m_by_abc_sum_samples{abc="456"} 8 `) - // Non-empty without list with non-existing labels + // Non-empty `by` list with duplicate existing label + f(` +- interval: 1m + by: [abc, abc] + outputs: [count_samples, sum_samples, count_series] +`, ` +foo{abc="123"} 4 +bar 5 +foo{abc="123"} 8.5 +foo{abc="456",de="fg"} 8 +`, `bar:1m_by_abc_count_samples 1 +bar:1m_by_abc_count_series 1 +bar:1m_by_abc_sum_samples 5 +foo:1m_by_abc_count_samples{abc="123"} 2 +foo:1m_by_abc_count_samples{abc="456"} 1 +foo:1m_by_abc_count_series{abc="123"} 1 +foo:1m_by_abc_count_series{abc="456"} 1 +foo:1m_by_abc_sum_samples{abc="123"} 12.5 +foo:1m_by_abc_sum_samples{abc="456"} 8 +`) + + // Non-empty `without` list with non-existing labels f(` - interval: 1m without: [foo] @@ -269,7 +290,7 @@ foo:1m_without_foo_sum_samples{abc="123"} 12.5 foo:1m_without_foo_sum_samples{abc="456",de="fg"} 8 `) - // Non-empty without list with existing labels + // Non-empty `without` list with existing labels f(` - interval: 1m without: [abc] @@ -290,7 +311,7 @@ foo:1m_without_abc_sum_samples 12.5 foo:1m_without_abc_sum_samples{de="fg"} 8 `) - // Special case: __name__ in without list + // Special case: __name__ in `without` list f(` - interval: 1m without: [__name__]