From e2367b6d1c5dcb941ad5dc24788ffbcbb81694de Mon Sep 17 00:00:00 2001 From: Zakhar Bessarab Date: Sun, 16 Jul 2023 14:00:39 +0400 Subject: [PATCH] metricsql: add support of using keep_metric_names for binary operations (#4109) * metricsql: add support of using keep_metric_names for binary operations This should help to avoid confusion with queries like one in the issue #3710. Signed-off-by: Zakhar Bessarab * wip --------- Signed-off-by: Zakhar Bessarab Co-authored-by: Aliaksandr Valialkin --- app/vmselect/promql/binary_op.go | 10 ++ app/vmselect/promql/exec_test.go | 126 ++++++++++++++++++ docs/CHANGELOG.md | 4 +- docs/MetricsQL.md | 11 +- go.mod | 2 +- go.sum | 4 +- .../VictoriaMetrics/metricsql/parser.go | 62 +++++++-- vendor/modules.txt | 2 +- 8 files changed, 202 insertions(+), 19 deletions(-) diff --git a/app/vmselect/promql/binary_op.go b/app/vmselect/promql/binary_op.go index 4c304db25..699f5c801 100644 --- a/app/vmselect/promql/binary_op.go +++ b/app/vmselect/promql/binary_op.go @@ -157,6 +157,10 @@ func adjustBinaryOpTags(be *metricsql.BinaryOpExpr, left, right []*timeseries) ( groupOp = "ignoring" } groupTags := be.GroupModifier.Args + if be.KeepMetricNames && groupOp == "on" { + // Add __name__ to groupTags if metric name must be preserved. + groupTags = append(groupTags[:len(groupTags):len(groupTags)], "__name__") + } for k, tssLeft := range mLeft { tssRight := mRight[k] if len(tssRight) == 0 { @@ -315,6 +319,12 @@ func resetMetricGroupIfRequired(be *metricsql.BinaryOpExpr, ts *timeseries) { // Do not reset MetricGroup for non-boolean `compare` binary ops like Prometheus does. return } + if be.KeepMetricNames { + // Do not reset MetricGroup if it is explicitly requested via `a op b keep_metric_names` + // See https://docs.victoriametrics.com/MetricsQL.html#keep_metric_names + return + } + ts.MetricName.ResetMetricGroup() } diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index b2a53acc0..90b85345f 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -3003,6 +3003,34 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r1, r2} f(q, resultExpected) }) + t.Run(`vector / scalar keep_metric_names`, func(t *testing.T) { + t.Parallel() + q := `sort_desc((label_set(time(), "foo", "bar", "__name__", "q1") or label_set(10, "foo", "qwert", "__name__", "q2")) / 2 keep_metric_names)` + r1 := netstorage.Result{ + MetricName: storage.MetricName{ + MetricGroup: []byte("q1"), + }, + Values: []float64{500, 600, 700, 800, 900, 1000}, + Timestamps: timestampsExpected, + } + r1.MetricName.Tags = []storage.Tag{{ + Key: []byte("foo"), + Value: []byte("bar"), + }} + r2 := netstorage.Result{ + MetricName: storage.MetricName{ + MetricGroup: []byte("q2"), + }, + Values: []float64{5, 5, 5, 5, 5, 5}, + Timestamps: timestampsExpected, + } + r2.MetricName.Tags = []storage.Tag{{ + Key: []byte("foo"), + Value: []byte("qwert"), + }} + resultExpected := []netstorage.Result{r1, r2} + f(q, resultExpected) + }) t.Run(`vector * scalar`, func(t *testing.T) { t.Parallel() q := `sum(time()) * 2` @@ -3038,6 +3066,34 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r1, r2} f(q, resultExpected) }) + t.Run(`scalar * vector keep_metric_names`, func(t *testing.T) { + t.Parallel() + q := `sort_desc(2 * (label_set(time(), "foo", "bar", "__name__", "q1") or label_set(10, "foo", "qwert", "__name__", "q2")) keep_metric_names)` + r1 := netstorage.Result{ + MetricName: storage.MetricName{ + MetricGroup: []byte("q1"), + }, + Values: []float64{2000, 2400, 2800, 3200, 3600, 4000}, + Timestamps: timestampsExpected, + } + r1.MetricName.Tags = []storage.Tag{{ + Key: []byte("foo"), + Value: []byte("bar"), + }} + r2 := netstorage.Result{ + MetricName: storage.MetricName{ + MetricGroup: []byte("q2"), + }, + Values: []float64{20, 20, 20, 20, 20, 20}, + Timestamps: timestampsExpected, + } + r2.MetricName.Tags = []storage.Tag{{ + Key: []byte("foo"), + Value: []byte("qwert"), + }} + resultExpected := []netstorage.Result{r1, r2} + f(q, resultExpected) + }) t.Run(`scalar * on() group_right vector`, func(t *testing.T) { t.Parallel() q := `sort_desc(2 * on() group_right() (label_set(time(), "foo", "bar") or label_set(10, "foo", "qwert")))` @@ -3062,6 +3118,34 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r1, r2} f(q, resultExpected) }) + t.Run(`scalar * on() group_right vector keep_metric_names`, func(t *testing.T) { + t.Parallel() + q := `sort_desc(2 * on() group_right() (label_set(time(), "foo", "bar", "__name__", "q1") or label_set(10, "foo", "qwert", "__name__", "q2")) keep_metric_names)` + r1 := netstorage.Result{ + MetricName: storage.MetricName{ + MetricGroup: []byte("q1"), + }, + Values: []float64{2000, 2400, 2800, 3200, 3600, 4000}, + Timestamps: timestampsExpected, + } + r1.MetricName.Tags = []storage.Tag{{ + Key: []byte("foo"), + Value: []byte("bar"), + }} + r2 := netstorage.Result{ + MetricName: storage.MetricName{ + MetricGroup: []byte("q2"), + }, + Values: []float64{20, 20, 20, 20, 20, 20}, + Timestamps: timestampsExpected, + } + r2.MetricName.Tags = []storage.Tag{{ + Key: []byte("foo"), + Value: []byte("qwert"), + }} + resultExpected := []netstorage.Result{r1, r2} + f(q, resultExpected) + }) t.Run(`scalar * ignoring(foo) group_right vector`, func(t *testing.T) { t.Parallel() q := `sort_desc(label_set(2, "a", "2") * ignoring(foo,a) group_right(a) (label_set(time(), "foo", "bar", "a", "1"), label_set(10, "foo", "qwert")))` @@ -3147,6 +3231,28 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`vector * on(foo) scalar keep_metric_names`, func(t *testing.T) { + t.Parallel() + q := `sort_desc( + ( + label_set(time(), "foo", "bar", "xx", "yy", "__name__", "q1"), + label_set(10, "foo", "qwert", "__name__", "q2") + ) * on(foo) (label_set(2, "foo","bar","aa","bb", "__name__", "q2")) keep_metric_names)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{2000, 2400, 2800, 3200, 3600, 4000}, + Timestamps: timestampsExpected, + } + r.MetricName.MetricGroup = []byte("q1") + r.MetricName.Tags = []storage.Tag{ + { + Key: []byte("foo"), + Value: []byte("bar"), + }, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`vector * on(foo) group_left(additional_tag) duplicate_timeseries_differ_by_additional_tag`, func(t *testing.T) { t.Parallel() q := `sort(label_set(time()/10, "foo", "bar", "xx", "yy", "__name__", "qwert") + on(foo) group_left(op) ( @@ -3375,6 +3481,26 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`vector + vector partial matching keep_metric_names`, func(t *testing.T) { + t.Parallel() + q := `sort_desc( + (label_set(time(), "t1", "v1", "__name__", "q1") or label_set(10, "t2", "v2", "__name__", "q2")) + + + (label_set(100, "t1", "v1", "__name__", "q3") or label_set(time(), "t2", "v3")) keep_metric_names + )` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{1100, 1300, 1500, 1700, 1900, 2100}, + Timestamps: timestampsExpected, + } + r.MetricName.MetricGroup = []byte("q1") + r.MetricName.Tags = []storage.Tag{{ + Key: []byte("t1"), + Value: []byte("v1"), + }} + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`vector + vector no matching`, func(t *testing.T) { t.Parallel() q := `sort_desc( diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index e6e8ec2ac..7f273581a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -27,8 +27,9 @@ The following tip changes can be tested by building VictoriaMetrics components f * SECURITY: upgrade base docker image (alpine) from 3.18.0 to 3.18.2. See [alpine 3.18.2 release notes](https://alpinelinux.org/posts/Alpine-3.15.9-3.16.6-3.17.4-3.18.2-released.html). * SECURITY: upgrade Go builder from Go1.20.5 to Go1.20.6. See [the list of issues addressed in Go1.20.6](https://github.com/golang/go/issues?q=milestone%3AGo1.20.6+label%3ACherryPickApproved). -* FETURE: reduce memory usage by up to 5x for setups with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) and long [retention](https://docs.victoriametrics.com/#retention). See [description for this change](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/7094fa38bc207c7bd7330ea8a834310a310ce5e3) for details. +* FEATURE: reduce memory usage by up to 5x for setups with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) and long [retention](https://docs.victoriametrics.com/#retention). See [description for this change](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/7094fa38bc207c7bd7330ea8a834310a310ce5e3) for details. * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): allow selecting time series matching at least one of multiple `or` filters. For example, `{env="prod",job="a" or env="dev",job="b"}` selects series with either `{env="prod",job="a"}` or `{env="dev",job="b"}` labels. This functionality allows passing the selected series to [rollup functions](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions) without the need to use [subqueries](https://docs.victoriametrics.com/MetricsQL.html#subqueries). See [these docs](https://docs.victoriametrics.com/keyConcepts.html#filtering-by-multiple-or-filters). +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add ability to preserve metric names for binary operation results via `keep_metric_names` modifier. For example, `{__name__=~"foo|bar"} / 10 keep_metric_names` leaves `foo` and `bar` metric names in division results. See [these docs](https://docs.victoriametrics.com/MetricsQL.html#keep_metric_names). This helps to address issues like [this one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3710). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add verbose output for docker installations or when TTY isn't available. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4081). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): interrupt backoff retries when import process is cancelled. The change makes vmctl more responsive in case of errors during the import. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4442). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): update backoff policy on retries to reduce probability of overloading for `source` or `destination` databases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4402). @@ -58,6 +59,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): correctly calculate evaluation time for rules. Before, there was a low probability for discrepancy between actual time and rules evaluation time if evaluation interval was lower than the execution time for rules within the group. * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): reset evaluation timestamp after modifying group interval. Before, there could have latency on rule evaluation time. * BUGFIX: vmselect: fix timestamp alignment for Prometheus querying API if time argument is less than 10m from the beginning of Unix epoch. +* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly parse binary operations with reserved words on the right side such as `foo + (on{bar="baz"})`. Previously such queries could lead to panic. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4422). ## [v1.91.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.91.3) diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 66c4af59f..26476bff9 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -108,16 +108,19 @@ The list of MetricsQL features: Go to [WITH templates playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs) and try it. * String literals may be concatenated. This is useful with `WITH` templates: `WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`. -* `keep_metric_names` modifier can be applied to all the [rollup functions](#rollup-functions) and [transform functions](#transform-functions). +* `keep_metric_names` modifier can be applied to all the [rollup functions](#rollup-functions), [transform functions](#transform-functions) and [binary operators](https://prometheus.io/docs/prometheus/latest/querying/operators/#binary-operators). This modifier prevents from dropping metric names in function results. See [these docs](#keep_metric_names). ## keep_metric_names -By default, metric names are dropped after applying functions, which change the meaning of the original time series. +By default, metric names are dropped after applying functions or [binary operators](https://prometheus.io/docs/prometheus/latest/querying/operators/#binary-operators), +since they may change the meaning of the original time series. This may result in `duplicate time series` error when the function is applied to multiple time series with different names. -This error can be fixed by applying `keep_metric_names` modifier to the function. +This error can be fixed by applying `keep_metric_names` modifier to the function or binary operator. -For example, `rate({__name__=~"foo|bar"}) keep_metric_names` leaves `foo` and `bar` metric names in the returned time series. +For example: +- `rate({__name__=~"foo|bar"}) keep_metric_names` leaves `foo` and `bar` metric names in the returned time series. +- `{__name__=~"foo|bar"} / 10 keep_metric_names` leaves `foo` and `bar` metric names in the returned time series. ## MetricsQL functions diff --git a/go.mod b/go.mod index d39516ece..d6a707e0d 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.2.0 github.com/VictoriaMetrics/metrics v1.24.0 - github.com/VictoriaMetrics/metricsql v0.57.1 + github.com/VictoriaMetrics/metricsql v0.58.1 github.com/aws/aws-sdk-go-v2 v1.18.1 github.com/aws/aws-sdk-go-v2/config v1.18.27 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.71 diff --git a/go.sum b/go.sum index b8e0ae53c..72a5bd0f0 100644 --- a/go.sum +++ b/go.sum @@ -69,8 +69,8 @@ github.com/VictoriaMetrics/fasthttp v1.2.0 h1:nd9Wng4DlNtaI27WlYh5mGXCJOmee/2c2b github.com/VictoriaMetrics/fasthttp v1.2.0/go.mod h1:zv5YSmasAoSyv8sBVexfArzFDIGGTN4TfCKAtAw7IfE= github.com/VictoriaMetrics/metrics v1.24.0 h1:ILavebReOjYctAGY5QU2F9X0MYvkcrG3aEn2RKa1Zkw= github.com/VictoriaMetrics/metrics v1.24.0/go.mod h1:eFT25kvsTidQFHb6U0oa0rTrDRdz4xTYjpL8+UPohys= -github.com/VictoriaMetrics/metricsql v0.57.1 h1:ryMe7w95s80BilS8RlV3G/25BLlmMBVRtTq2GnLB/4o= -github.com/VictoriaMetrics/metricsql v0.57.1/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I= +github.com/VictoriaMetrics/metricsql v0.58.1 h1:zg7xN1HptC5oloWvdt4PtgDaBEyMdsgERikUg0hFZB4= +github.com/VictoriaMetrics/metricsql v0.58.1/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/parser.go b/vendor/github.com/VictoriaMetrics/metricsql/parser.go index 5e6f7be97..2fb8d73af 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/parser.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/parser.go @@ -382,6 +382,12 @@ func (p *parser) parseExpr() (Expr, error) { return nil, err } be.Right = e2 + if isKeepMetricNames(p.lex.Token) { + be.KeepMetricNames = true + if err := p.lex.Next(); err != nil { + return nil, err + } + } e = balanceBinaryOp(&be) } } @@ -1614,6 +1620,9 @@ type BinaryOpExpr struct { // JoinModifier contains modifier such as "group_left" or "group_right". JoinModifier ModifierExpr + // If KeepMetricNames is set to true, then the operation should keep metric names. + KeepMetricNames bool + // Left contains left arg for the `left op right` expression. Left Expr @@ -1624,16 +1633,14 @@ type BinaryOpExpr struct { // AppendString appends string representation of be to dst and returns the result. func (be *BinaryOpExpr) AppendString(dst []byte) []byte { if _, ok := be.Left.(*BinaryOpExpr); ok { - dst = append(dst, '(') - dst = be.Left.AppendString(dst) - dst = append(dst, ')') + dst = appendArgInParens(dst, be.Left) } else { dst = be.Left.AppendString(dst) } dst = append(dst, ' ') dst = append(dst, be.Op...) if be.Bool { - dst = append(dst, " bool"...) + dst = append(dst, "bool"...) } if be.GroupModifier.Op != "" { dst = append(dst, ' ') @@ -1644,13 +1651,48 @@ func (be *BinaryOpExpr) AppendString(dst []byte) []byte { dst = be.JoinModifier.AppendString(dst) } dst = append(dst, ' ') - if _, ok := be.Right.(*BinaryOpExpr); ok { - dst = append(dst, '(') - dst = be.Right.AppendString(dst) - dst = append(dst, ')') + if be.needRightParens() { + dst = appendArgInParens(dst, be.Right) } else { dst = be.Right.AppendString(dst) } + if be.KeepMetricNames { + dst = append(dst, " keep_metric_names"...) + } + return dst +} + +func (be *BinaryOpExpr) needRightParens() bool { + r := be.Right + if _, ok := r.(*BinaryOpExpr); ok { + return true + } + if me, ok := r.(*MetricExpr); ok { + metricName := me.getMetricName() + return isReservedBinaryOpIdent(metricName) + } + if fe, ok := r.(*FuncExpr); ok && isReservedBinaryOpIdent(fe.Name) { + return true + } + if !be.KeepMetricNames { + return false + } + switch r.(type) { + case *FuncExpr: + return true + default: + return false + } +} + +func isReservedBinaryOpIdent(s string) bool { + return isBinaryOpGroupModifier(s) || isBinaryOpJoinModifier(s) || isBinaryOpBoolModifier(s) +} + +func appendArgInParens(dst []byte, arg Expr) []byte { + dst = append(dst, '(') + dst = arg.AppendString(dst) + dst = append(dst, ')') return dst } @@ -1666,11 +1708,11 @@ type ModifierExpr struct { // AppendString appends string representation of me to dst and returns the result. func (me *ModifierExpr) AppendString(dst []byte) []byte { dst = append(dst, me.Op...) - dst = append(dst, " ("...) + dst = append(dst, "("...) for i, arg := range me.Args { dst = appendEscapedIdent(dst, arg) if i+1 < len(me.Args) { - dst = append(dst, ", "...) + dst = append(dst, ',') } } dst = append(dst, ')') diff --git a/vendor/modules.txt b/vendor/modules.txt index 29fa2291a..7ad3700a5 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -99,7 +99,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.24.0 ## explicit; go 1.20 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.57.1 +# github.com/VictoriaMetrics/metricsql v0.58.1 ## explicit; go 1.13 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop