metricsql: add support of using keep_metric_names for binary operations (#4109)

* metricsql: add support of using keep_metric_names for binary operations

This should help to avoid confusion with queries like one in the issue #3710.

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>

* wip

---------

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
Zakhar Bessarab 2023-07-16 14:00:39 +04:00 committed by GitHub
parent 4cb024d8a3
commit e2367b6d1c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 202 additions and 19 deletions

View file

@ -157,6 +157,10 @@ func adjustBinaryOpTags(be *metricsql.BinaryOpExpr, left, right []*timeseries) (
groupOp = "ignoring"
}
groupTags := be.GroupModifier.Args
if be.KeepMetricNames && groupOp == "on" {
// Add __name__ to groupTags if metric name must be preserved.
groupTags = append(groupTags[:len(groupTags):len(groupTags)], "__name__")
}
for k, tssLeft := range mLeft {
tssRight := mRight[k]
if len(tssRight) == 0 {
@ -315,6 +319,12 @@ func resetMetricGroupIfRequired(be *metricsql.BinaryOpExpr, ts *timeseries) {
// Do not reset MetricGroup for non-boolean `compare` binary ops like Prometheus does.
return
}
if be.KeepMetricNames {
// Do not reset MetricGroup if it is explicitly requested via `a op b keep_metric_names`
// See https://docs.victoriametrics.com/MetricsQL.html#keep_metric_names
return
}
ts.MetricName.ResetMetricGroup()
}

View file

@ -3003,6 +3003,34 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`vector / scalar keep_metric_names`, func(t *testing.T) {
t.Parallel()
q := `sort_desc((label_set(time(), "foo", "bar", "__name__", "q1") or label_set(10, "foo", "qwert", "__name__", "q2")) / 2 keep_metric_names)`
r1 := netstorage.Result{
MetricName: storage.MetricName{
MetricGroup: []byte("q1"),
},
Values: []float64{500, 600, 700, 800, 900, 1000},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
r2 := netstorage.Result{
MetricName: storage.MetricName{
MetricGroup: []byte("q2"),
},
Values: []float64{5, 5, 5, 5, 5, 5},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("qwert"),
}}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`vector * scalar`, func(t *testing.T) {
t.Parallel()
q := `sum(time()) * 2`
@ -3038,6 +3066,34 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`scalar * vector keep_metric_names`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(2 * (label_set(time(), "foo", "bar", "__name__", "q1") or label_set(10, "foo", "qwert", "__name__", "q2")) keep_metric_names)`
r1 := netstorage.Result{
MetricName: storage.MetricName{
MetricGroup: []byte("q1"),
},
Values: []float64{2000, 2400, 2800, 3200, 3600, 4000},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
r2 := netstorage.Result{
MetricName: storage.MetricName{
MetricGroup: []byte("q2"),
},
Values: []float64{20, 20, 20, 20, 20, 20},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("qwert"),
}}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`scalar * on() group_right vector`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(2 * on() group_right() (label_set(time(), "foo", "bar") or label_set(10, "foo", "qwert")))`
@ -3062,6 +3118,34 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`scalar * on() group_right vector keep_metric_names`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(2 * on() group_right() (label_set(time(), "foo", "bar", "__name__", "q1") or label_set(10, "foo", "qwert", "__name__", "q2")) keep_metric_names)`
r1 := netstorage.Result{
MetricName: storage.MetricName{
MetricGroup: []byte("q1"),
},
Values: []float64{2000, 2400, 2800, 3200, 3600, 4000},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
r2 := netstorage.Result{
MetricName: storage.MetricName{
MetricGroup: []byte("q2"),
},
Values: []float64{20, 20, 20, 20, 20, 20},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("qwert"),
}}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`scalar * ignoring(foo) group_right vector`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(label_set(2, "a", "2") * ignoring(foo,a) group_right(a) (label_set(time(), "foo", "bar", "a", "1"), label_set(10, "foo", "qwert")))`
@ -3147,6 +3231,28 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`vector * on(foo) scalar keep_metric_names`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(
(
label_set(time(), "foo", "bar", "xx", "yy", "__name__", "q1"),
label_set(10, "foo", "qwert", "__name__", "q2")
) * on(foo) (label_set(2, "foo","bar","aa","bb", "__name__", "q2")) keep_metric_names)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2000, 2400, 2800, 3200, 3600, 4000},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("q1")
r.MetricName.Tags = []storage.Tag{
{
Key: []byte("foo"),
Value: []byte("bar"),
},
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`vector * on(foo) group_left(additional_tag) duplicate_timeseries_differ_by_additional_tag`, func(t *testing.T) {
t.Parallel()
q := `sort(label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_left(op) (
@ -3375,6 +3481,26 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`vector + vector partial matching keep_metric_names`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(
(label_set(time(), "t1", "v1", "__name__", "q1") or label_set(10, "t2", "v2", "__name__", "q2"))
+
(label_set(100, "t1", "v1", "__name__", "q3") or label_set(time(), "t2", "v3")) keep_metric_names
)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1100, 1300, 1500, 1700, 1900, 2100},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("q1")
r.MetricName.Tags = []storage.Tag{{
Key: []byte("t1"),
Value: []byte("v1"),
}}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`vector + vector no matching`, func(t *testing.T) {
t.Parallel()
q := `sort_desc(

View file

@ -27,8 +27,9 @@ The following tip changes can be tested by building VictoriaMetrics components f
* SECURITY: upgrade base docker image (alpine) from 3.18.0 to 3.18.2. See [alpine 3.18.2 release notes](https://alpinelinux.org/posts/Alpine-3.15.9-3.16.6-3.17.4-3.18.2-released.html).
* SECURITY: upgrade Go builder from Go1.20.5 to Go1.20.6. See [the list of issues addressed in Go1.20.6](https://github.com/golang/go/issues?q=milestone%3AGo1.20.6+label%3ACherryPickApproved).
* FETURE: reduce memory usage by up to 5x for setups with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) and long [retention](https://docs.victoriametrics.com/#retention). See [description for this change](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/7094fa38bc207c7bd7330ea8a834310a310ce5e3) for details.
* FEATURE: reduce memory usage by up to 5x for setups with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) and long [retention](https://docs.victoriametrics.com/#retention). See [description for this change](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/7094fa38bc207c7bd7330ea8a834310a310ce5e3) for details.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): allow selecting time series matching at least one of multiple `or` filters. For example, `{env="prod",job="a" or env="dev",job="b"}` selects series with either `{env="prod",job="a"}` or `{env="dev",job="b"}` labels. This functionality allows passing the selected series to [rollup functions](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions) without the need to use [subqueries](https://docs.victoriametrics.com/MetricsQL.html#subqueries). See [these docs](https://docs.victoriametrics.com/keyConcepts.html#filtering-by-multiple-or-filters).
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add ability to preserve metric names for binary operation results via `keep_metric_names` modifier. For example, `{__name__=~"foo|bar"} / 10 keep_metric_names` leaves `foo` and `bar` metric names in division results. See [these docs](https://docs.victoriametrics.com/MetricsQL.html#keep_metric_names). This helps to address issues like [this one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3710).
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add verbose output for docker installations or when TTY isn't available. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4081).
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): interrupt backoff retries when import process is cancelled. The change makes vmctl more responsive in case of errors during the import. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4442).
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): update backoff policy on retries to reduce probability of overloading for `source` or `destination` databases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4402).
@ -58,6 +59,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): correctly calculate evaluation time for rules. Before, there was a low probability for discrepancy between actual time and rules evaluation time if evaluation interval was lower than the execution time for rules within the group.
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): reset evaluation timestamp after modifying group interval. Before, there could have latency on rule evaluation time.
* BUGFIX: vmselect: fix timestamp alignment for Prometheus querying API if time argument is less than 10m from the beginning of Unix epoch.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly parse binary operations with reserved words on the right side such as `foo + (on{bar="baz"})`. Previously such queries could lead to panic. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4422).
## [v1.91.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.91.3)

View file

@ -108,16 +108,19 @@ The list of MetricsQL features:
Go to [WITH templates playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs) and try it.
* String literals may be concatenated. This is useful with `WITH` templates:
`WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`.
* `keep_metric_names` modifier can be applied to all the [rollup functions](#rollup-functions) and [transform functions](#transform-functions).
* `keep_metric_names` modifier can be applied to all the [rollup functions](#rollup-functions), [transform functions](#transform-functions) and [binary operators](https://prometheus.io/docs/prometheus/latest/querying/operators/#binary-operators).
This modifier prevents from dropping metric names in function results. See [these docs](#keep_metric_names).
## keep_metric_names
By default, metric names are dropped after applying functions, which change the meaning of the original time series.
By default, metric names are dropped after applying functions or [binary operators](https://prometheus.io/docs/prometheus/latest/querying/operators/#binary-operators),
since they may change the meaning of the original time series.
This may result in `duplicate time series` error when the function is applied to multiple time series with different names.
This error can be fixed by applying `keep_metric_names` modifier to the function.
This error can be fixed by applying `keep_metric_names` modifier to the function or binary operator.
For example, `rate({__name__=~"foo|bar"}) keep_metric_names` leaves `foo` and `bar` metric names in the returned time series.
For example:
- `rate({__name__=~"foo|bar"}) keep_metric_names` leaves `foo` and `bar` metric names in the returned time series.
- `{__name__=~"foo|bar"} / 10 keep_metric_names` leaves `foo` and `bar` metric names in the returned time series.
## MetricsQL functions

2
go.mod
View file

@ -12,7 +12,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.2.0
github.com/VictoriaMetrics/metrics v1.24.0
github.com/VictoriaMetrics/metricsql v0.57.1
github.com/VictoriaMetrics/metricsql v0.58.1
github.com/aws/aws-sdk-go-v2 v1.18.1
github.com/aws/aws-sdk-go-v2/config v1.18.27
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.71

4
go.sum
View file

@ -69,8 +69,8 @@ github.com/VictoriaMetrics/fasthttp v1.2.0 h1:nd9Wng4DlNtaI27WlYh5mGXCJOmee/2c2b
github.com/VictoriaMetrics/fasthttp v1.2.0/go.mod h1:zv5YSmasAoSyv8sBVexfArzFDIGGTN4TfCKAtAw7IfE=
github.com/VictoriaMetrics/metrics v1.24.0 h1:ILavebReOjYctAGY5QU2F9X0MYvkcrG3aEn2RKa1Zkw=
github.com/VictoriaMetrics/metrics v1.24.0/go.mod h1:eFT25kvsTidQFHb6U0oa0rTrDRdz4xTYjpL8+UPohys=
github.com/VictoriaMetrics/metricsql v0.57.1 h1:ryMe7w95s80BilS8RlV3G/25BLlmMBVRtTq2GnLB/4o=
github.com/VictoriaMetrics/metricsql v0.57.1/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I=
github.com/VictoriaMetrics/metricsql v0.58.1 h1:zg7xN1HptC5oloWvdt4PtgDaBEyMdsgERikUg0hFZB4=
github.com/VictoriaMetrics/metricsql v0.58.1/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=

View file

@ -382,6 +382,12 @@ func (p *parser) parseExpr() (Expr, error) {
return nil, err
}
be.Right = e2
if isKeepMetricNames(p.lex.Token) {
be.KeepMetricNames = true
if err := p.lex.Next(); err != nil {
return nil, err
}
}
e = balanceBinaryOp(&be)
}
}
@ -1614,6 +1620,9 @@ type BinaryOpExpr struct {
// JoinModifier contains modifier such as "group_left" or "group_right".
JoinModifier ModifierExpr
// If KeepMetricNames is set to true, then the operation should keep metric names.
KeepMetricNames bool
// Left contains left arg for the `left op right` expression.
Left Expr
@ -1624,9 +1633,7 @@ type BinaryOpExpr struct {
// AppendString appends string representation of be to dst and returns the result.
func (be *BinaryOpExpr) AppendString(dst []byte) []byte {
if _, ok := be.Left.(*BinaryOpExpr); ok {
dst = append(dst, '(')
dst = be.Left.AppendString(dst)
dst = append(dst, ')')
dst = appendArgInParens(dst, be.Left)
} else {
dst = be.Left.AppendString(dst)
}
@ -1644,13 +1651,48 @@ func (be *BinaryOpExpr) AppendString(dst []byte) []byte {
dst = be.JoinModifier.AppendString(dst)
}
dst = append(dst, ' ')
if _, ok := be.Right.(*BinaryOpExpr); ok {
dst = append(dst, '(')
dst = be.Right.AppendString(dst)
dst = append(dst, ')')
if be.needRightParens() {
dst = appendArgInParens(dst, be.Right)
} else {
dst = be.Right.AppendString(dst)
}
if be.KeepMetricNames {
dst = append(dst, " keep_metric_names"...)
}
return dst
}
func (be *BinaryOpExpr) needRightParens() bool {
r := be.Right
if _, ok := r.(*BinaryOpExpr); ok {
return true
}
if me, ok := r.(*MetricExpr); ok {
metricName := me.getMetricName()
return isReservedBinaryOpIdent(metricName)
}
if fe, ok := r.(*FuncExpr); ok && isReservedBinaryOpIdent(fe.Name) {
return true
}
if !be.KeepMetricNames {
return false
}
switch r.(type) {
case *FuncExpr:
return true
default:
return false
}
}
func isReservedBinaryOpIdent(s string) bool {
return isBinaryOpGroupModifier(s) || isBinaryOpJoinModifier(s) || isBinaryOpBoolModifier(s)
}
func appendArgInParens(dst []byte, arg Expr) []byte {
dst = append(dst, '(')
dst = arg.AppendString(dst)
dst = append(dst, ')')
return dst
}
@ -1670,7 +1712,7 @@ func (me *ModifierExpr) AppendString(dst []byte) []byte {
for i, arg := range me.Args {
dst = appendEscapedIdent(dst, arg)
if i+1 < len(me.Args) {
dst = append(dst, ", "...)
dst = append(dst, ',')
}
}
dst = append(dst, ')')

2
vendor/modules.txt vendored
View file

@ -99,7 +99,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.24.0
## explicit; go 1.20
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.57.1
# github.com/VictoriaMetrics/metricsql v0.58.1
## explicit; go 1.13
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop