lib/streamaggr: sort by and without labels in the aggregate output metric name

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460
This commit is contained in:
Aliaksandr Valialkin 2023-01-05 02:08:24 -08:00
parent 04dff34de4
commit 0e1f0ade31
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 62 additions and 22 deletions

View file

@ -132,7 +132,7 @@ In this case the following stream aggregation can be used for reducing the numbe
outputs: [total]
```
This config specifies labels, which must be removed from the aggregate outpit, in the `without` list.
This config specifies labels, which must be removed from the aggregate output, in the `without` list.
See [these docs](#aggregating-by-labels) for more details.
The aggregated output metric has the following name according to [output metric naming](#output-metric-names):
@ -296,9 +296,9 @@ Output metric names for stream aggregation are constructed according to the foll
- `<metric_name>` is the original metric name.
- `<interval>` is the interval specified in the [stream aggregation config](#stream-aggregation-config).
- `<by_labels>` is `_`-delimited list of `by` labels specified in the [stream aggregation config](#stream-aggregation-config).
- `<by_labels>` is `_`-delimited sorted list of `by` labels specified in the [stream aggregation config](#stream-aggregation-config).
If the `by` list is missing in the config, then the `_by_<by_labels>` part isn't included in the output metric name.
- `<without_labels>` is an optional `_`-delimited list of `without` labels specified in the [stream aggregation config](#stream-aggregation-config).
- `<without_labels>` is an optional `_`-delimited sorted list of `without` labels specified in the [stream aggregation config](#stream-aggregation-config).
If the `without` list is missing in the config, then the `_without_<without_labels>` part isn't included in the output metric name.
- `<output>` is the aggregate used for constucting the output metric. The aggregate name is taken from the `outputs` list
at the corresponding [stream aggregation config](#stream-aggregation-config).
@ -324,7 +324,7 @@ For example, the following config removes the `:1m_sum_samples` suffix added [to
## Aggregation outputs
The following aggregation outputs are supported in the `outputs` list of the [stream aggregation config](#stream-aggregation-config):
The following aggregation outputs can be put in the `outputs` list at [stream aggregation config](#stream-aggregation-config):
* `total` generates output [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) by summing the input counters.
The `total` handler properly handles input counter resets.
@ -375,7 +375,7 @@ removes the `instance` label from output metrics by summing input samples across
```
In this case the `foo{app="bar",instance="..."}` input metrics are transformed into `foo:1m_without_instance_sum_samples{app="bar"}`
output metric.
output metric according to [output metric naming](#output-metric-names).
It is possible specifying the exact list of labels in the output metrics via `by` list.
For example, the following config sums input samples by the `app` label:
@ -387,7 +387,11 @@ For example, the following config sums input samples by the `app` label:
```
In this case the `foo{app="bar",instance="..."}` input metrics are transformed into `foo:1m_by_app_sum_samples{app="bar"}`
output metric.
output metric according to [output metric naming](#output-metric-names).
The labels used in `by` and `without` lists can be modified via `input_relabel_configs` section - see [these docs](#relabeling).
See also [aggregation outputs](#aggregation-outputs).
## Stream aggregation config

3
go.mod
View file

@ -30,6 +30,7 @@ require (
github.com/valyala/fastrand v1.1.0
github.com/valyala/fasttemplate v1.2.2
github.com/valyala/gozstd v1.17.0
github.com/valyala/histogram v1.2.0
github.com/valyala/quicktemplate v1.7.0
golang.org/x/net v0.4.0
golang.org/x/oauth2 v0.3.0
@ -38,8 +39,6 @@ require (
gopkg.in/yaml.v2 v2.4.0
)
require github.com/valyala/histogram v1.2.0
require (
cloud.google.com/go v0.107.0 // indirect
cloud.google.com/go/compute v1.14.0 // indirect

View file

@ -3,6 +3,7 @@ package streamaggr
import (
"fmt"
"math"
"sort"
"strconv"
"strings"
"sync"
@ -225,8 +226,8 @@ func newAggregator(cfg *Config, pushFunc PushFunc) (*aggregator, error) {
}
// check by and without lists
by := cfg.By
without := cfg.Without
by := sortAndRemoveDuplicates(cfg.By)
without := sortAndRemoveDuplicates(cfg.Without)
if len(by) > 0 && len(without) > 0 {
return nil, fmt.Errorf("`by: %s` and `without: %s` lists cannot be set simultaneously", by, without)
}
@ -639,3 +640,18 @@ func removeUnderscoreName(labels []string) []string {
}
return result
}
func sortAndRemoveDuplicates(a []string) []string {
if len(a) == 0 {
return nil
}
a = append([]string{}, a...)
sort.Strings(a)
dst := a[:1]
for _, v := range a[1:] {
if v != dst[len(dst)-1] {
dst = append(dst, v)
}
}
return dst
}

View file

@ -191,7 +191,7 @@ foo:1m_sum_samples{abc="123"} 12.5
foo:1m_sum_samples{abc="456",de="fg"} 8
`)
// Special case: __name__ in by list
// Special case: __name__ in `by` list - this is the same as empty `by` list
f(`
- interval: 1m
by: [__name__]
@ -209,7 +209,7 @@ foo:1m_count_series 2
foo:1m_sum_samples 20.5
`)
// Non-empty by list with non-existing labels
// Non-empty `by` list with non-existing labels
f(`
- interval: 1m
by: [foo, bar]
@ -219,15 +219,15 @@ foo{abc="123"} 4
bar 5
foo{abc="123"} 8.5
foo{abc="456",de="fg"} 8
`, `bar:1m_by_foo_bar_count_samples 1
bar:1m_by_foo_bar_count_series 1
bar:1m_by_foo_bar_sum_samples 5
foo:1m_by_foo_bar_count_samples 3
foo:1m_by_foo_bar_count_series 2
foo:1m_by_foo_bar_sum_samples 20.5
`, `bar:1m_by_bar_foo_count_samples 1
bar:1m_by_bar_foo_count_series 1
bar:1m_by_bar_foo_sum_samples 5
foo:1m_by_bar_foo_count_samples 3
foo:1m_by_bar_foo_count_series 2
foo:1m_by_bar_foo_sum_samples 20.5
`)
// Non-empty by list with existing label
// Non-empty `by` list with existing label
f(`
- interval: 1m
by: [abc]
@ -248,7 +248,28 @@ foo:1m_by_abc_sum_samples{abc="123"} 12.5
foo:1m_by_abc_sum_samples{abc="456"} 8
`)
// Non-empty without list with non-existing labels
// Non-empty `by` list with duplicate existing label
f(`
- interval: 1m
by: [abc, abc]
outputs: [count_samples, sum_samples, count_series]
`, `
foo{abc="123"} 4
bar 5
foo{abc="123"} 8.5
foo{abc="456",de="fg"} 8
`, `bar:1m_by_abc_count_samples 1
bar:1m_by_abc_count_series 1
bar:1m_by_abc_sum_samples 5
foo:1m_by_abc_count_samples{abc="123"} 2
foo:1m_by_abc_count_samples{abc="456"} 1
foo:1m_by_abc_count_series{abc="123"} 1
foo:1m_by_abc_count_series{abc="456"} 1
foo:1m_by_abc_sum_samples{abc="123"} 12.5
foo:1m_by_abc_sum_samples{abc="456"} 8
`)
// Non-empty `without` list with non-existing labels
f(`
- interval: 1m
without: [foo]
@ -269,7 +290,7 @@ foo:1m_without_foo_sum_samples{abc="123"} 12.5
foo:1m_without_foo_sum_samples{abc="456",de="fg"} 8
`)
// Non-empty without list with existing labels
// Non-empty `without` list with existing labels
f(`
- interval: 1m
without: [abc]
@ -290,7 +311,7 @@ foo:1m_without_abc_sum_samples 12.5
foo:1m_without_abc_sum_samples{de="fg"} 8
`)
// Special case: __name__ in without list
// Special case: __name__ in `without` list
f(`
- interval: 1m
without: [__name__]