From 7fdea4b31c20028f18bef58b17d61db51dd84cf8 Mon Sep 17 00:00:00 2001 From: Hui Wang <haley@victoriametrics.com> Date: Thu, 25 Apr 2024 18:54:42 +0800 Subject: [PATCH] app/vmselect: implement cmd-line flags `-search.disableImplicitConversions` and `-search.logImplicitConversions` (#6180) address https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4338 support disable or log [implicit conversions](https://docs.victoriametrics.com/metricsql/#implicit-query-conversions) for subquery with cmd-line flags `-search.disableImplicitConversion` and `-search.logImplicitConversion` Signed-off-by: hagen1778 <roman@victoriametrics.com> Co-authored-by: hagen1778 <roman@victoriametrics.com> (cherry picked from commit dd0d2c77c8a3c0c22b1e5e14960f9f3048e85513) --- app/vmselect/promql/exec.go | 69 ++++++++++++ app/vmselect/promql/exec_test.go | 101 ++++++++++++++++++ .../packages/vmui/src/assets/MetricsQL.md | 8 +- docs/CHANGELOG.md | 1 + docs/MetricsQL.md | 4 +- 5 files changed, 179 insertions(+), 4 deletions(-) diff --git a/app/vmselect/promql/exec.go b/app/vmselect/promql/exec.go index 9a5149fbda..b2b05d5d9e 100644 --- a/app/vmselect/promql/exec.go +++ b/app/vmselect/promql/exec.go @@ -13,6 +13,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/querystats" "github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" "github.com/VictoriaMetrics/metrics" @@ -26,6 +27,12 @@ var ( `For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped `+ `in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. `+ `This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter`) + disableImplicitConversion = flag.Bool("search.disableImplicitConversion", false, "Whether to return an error for queries that rely on implicit subquery conversions, "+ + "see https://docs.victoriametrics.com/metricsql/#subqueries for details. "+ + "See also -search.logImplicitConversion.") + logImplicitConversion = flag.Bool("search.logImplicitConversion", false, "Whether to log queries with implicit subquery conversions, "+ + "see https://docs.victoriametrics.com/metricsql/#subqueries for details. "+ + "Such conversion can be disabled using -search.disableImplicitConversion.") ) // UserReadableError is a type of error which supposed to be returned to the user without additional context. @@ -64,6 +71,16 @@ func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly boo return nil, err } + if *disableImplicitConversion || *logImplicitConversion { + complete := isSubQueryComplete(e, false) + if !complete && *disableImplicitConversion { + return nil, fmt.Errorf("query contains subquery that requires implicit conversion and is rejected according to `-search.disableImplicitConversion=true` setting. See https://docs.victoriametrics.com/metricsql/#subqueries for details") + } + if !complete && *logImplicitConversion { + logger.Warnf("query=%q contains subquery that requires implicit conversion, see https://docs.victoriametrics.com/metricsql/#subqueries for details", e.AppendString(nil)) + } + } + qid := activeQueriesV.Add(ec, q) rv, err := evalExpr(qt, ec, e) activeQueriesV.Remove(qid) @@ -405,3 +422,55 @@ func (pc *parseCache) Put(q string, pcv *parseCacheValue) { pc.m[q] = pcv pc.mu.Unlock() } + +// isSubQueryComplete checks if expr contains incomplete subquery +func isSubQueryComplete(e metricsql.Expr, isSubExpr bool) bool { + switch exp := e.(type) { + case *metricsql.FuncExpr: + if isSubExpr { + return false + } + fe := e.(*metricsql.FuncExpr) + for _, arg := range exp.Args { + if getRollupFunc(fe.Name) != nil { + isSubExpr = true + } + if !isSubQueryComplete(arg, isSubExpr) { + return false + } + } + case *metricsql.RollupExpr: + if _, ok := exp.Expr.(*metricsql.MetricExpr); ok { + return true + } + // exp.Step is optional in subqueries + if exp.Window == nil { + return false + } + return isSubQueryComplete(exp.Expr, false) + case *metricsql.AggrFuncExpr: + if isSubExpr { + return false + } + for _, arg := range exp.Args { + if !isSubQueryComplete(arg, false) { + return false + } + } + case *metricsql.BinaryOpExpr: + if isSubExpr { + return false + } + if !isSubQueryComplete(exp.Left, false) { + return false + } + if !isSubQueryComplete(exp.Right, false) { + return false + } + case *metricsql.MetricExpr: + return true + default: + return true + } + return true +} diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 940aca306d..08dadc0130 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -9452,3 +9452,104 @@ func testAddLabels(t *testing.T, mn *storage.MetricName, labels ...string) { }) } } + +func TestIsSubQueryCompleteTrue(t *testing.T) { + f := func(q string) { + t.Helper() + e, err := metricsql.Parse(q) + if err != nil { + t.Fatal(err) + } + if !isSubQueryComplete(e, false) { + t.Fatalf("query should be complete: %s", e.AppendString(nil)) + } + } + + f("rate(http_total)") + f("sum(http_total)") + f("absent(http_total)") + f("rate(http_total[1m])") + f("avg_over_time(up[1m])") + f("sum(http_total[1m])") + f("sum(rate(http_total))") + f("sum(sum(http_total))") + f(`sum(sum_over_time(http_total[1m] )) by (instance)`) + f("sum(up{cluster='a'}[1m] or up{cluster='b'}[1m])") + f("(avg_over_time(alarm_test1[1m]) - avg_over_time(alarm_test1[1m] offset 5m)) > 0.1") + f("http_total[1m] offset 1m") + + // subquery + f("rate(http_total)[5m:1m]") + f("rate(sum(http_total)[5m:1m])") + f("rate(rate(http_total)[5m:1m])") + f("sum(rate(http_total[1m]))") + f("sum(rate(sum(http_total)[5m:1m]))") + f("rate(sum(rate(http_total))[5m:1m])") + f("rate(sum(sum(http_total))[5m:1m])") + f("rate(sum(rate(http_total))[5m:1m])") + f("rate(sum(sum(http_total))[5m:1m])") + f("avg_over_time(rate(http_total[5m])[5m:1m])") + f("delta(avg_over_time(up[1m])[5m:1m]) > 0.1") + f("avg_over_time(avg by (site) (metric)[2m:1m])") + + f("sum(http_total)[5m:1m] offset 1m") + f("round(sum(sum_over_time(http_total[1m])) by (instance)) [5m:1m] offset 1m") + + f("rate(sum(http_total)[5m:1m]) - rate(sum(http_total)[5m:1m])") + f("avg_over_time((rate(http_total)-rate(http_total))[5m:1m])") + + f("sum_over_time((up{cluster='a'} or up{cluster='b'})[5m:1m])") + f("sum_over_time((up{cluster='a'} or up{cluster='b'})[5m:1m])") + f("sum(sum_over_time((up{cluster='a'} or up{cluster='b'})[5m:1m])) by (instance)") + + // step (or resolution) is optional in subqueries + f("max_over_time(rate(my_counter_total[5m])[1h:])") + f("max_over_time(rate(my_counter_total[5m])[1h:1m])[5m:1m]") + f("max_over_time(rate(my_counter_total[5m])[1h:])[5m:]") + + f(` +WITH ( + cpuSeconds = node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}, + cpuIdle = rate(cpuSeconds{mode='idle'}[5m]) +) +max_over_time(cpuIdle[1h:])`) +} + +func TestIsSubQueryCompleteFalse(t *testing.T) { + f := func(q string) { + t.Helper() + e, err := metricsql.Parse(q) + if err != nil { + t.Fatal(err) + } + if isSubQueryComplete(e, false) { + t.Fatalf("expect to detect incomplete subquery: %s", e.AppendString(nil)) + } + } + + f("rate(sum(http_total))") + f("rate(rate(http_total))") + f("sum(rate(sum(http_total)))") + f("rate(sum(rate(http_total)))") + f("rate(sum(sum(http_total)))") + f("avg_over_time(rate(http_total[5m]))") + + // https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3974 + f("sum(http_total) offset 1m") + f(`round(sum(sum_over_time(http_total[1m])) by (instance)) offset 1m`) + + f("rate(sum(http_total)) - rate(sum(http_total))") + f("avg_over_time(rate(http_total)-rate(http_total))") + + // https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3996 + f("sum_over_time(up{cluster='a'} or up{cluster='b'})") + f("sum_over_time(up{cluster='a'}[1m] or up{cluster='b'}[1m])") + f("sum(sum_over_time(up{cluster='a'}[1m] or up{cluster='b'}[1m])) by (instance)") + + f(` +WITH ( + cpuSeconds = node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}, + cpuIdle = rate(cpuSeconds{mode='idle'}[5m]) +) +max_over_time(cpuIdle)`) +} diff --git a/app/vmui/packages/vmui/src/assets/MetricsQL.md b/app/vmui/packages/vmui/src/assets/MetricsQL.md index 7f3ccc498b..5c517e4ed7 100644 --- a/app/vmui/packages/vmui/src/assets/MetricsQL.md +++ b/app/vmui/packages/vmui/src/assets/MetricsQL.md @@ -2211,7 +2211,8 @@ MetricsQL supports and extends PromQL subqueries. See [this article](https://val Any [rollup function](#rollup-functions) for something other than [series selector](https://docs.victoriametrics.com/keyconcepts/#filtering) form a subquery. Nested rollup functions can be implicit thanks to the [implicit query conversions](#implicit-query-conversions). For example, `delta(sum(m))` is implicitly converted to `delta(sum(default_rollup(m))[1i:1i])`, so it becomes a subquery, -since it contains [default_rollup](#default_rollup) nested into [delta](#delta). +since it contains [default_rollup](#default_rollup) nested into [delta](#delta). +This behavior can be disabled or logged via cmd-line flags `-search.disableImplicitConversion` and `-search.logImplicitConversion` since v1.101.0. VictoriaMetrics performs subqueries in the following way: @@ -2241,7 +2242,8 @@ VictoriaMetrics performs the following implicit conversions for incoming queries * `abs(temperature)` is transformed to `abs(default_rollup(temperature))`, because [abs](#abs) isn't a [rollup function](#rollup-functions) - it is [transform function](#transform-functions) * If `step` in square brackets is missing inside [subquery](#subqueries), then `1i` step is automatically added there. - For example, `avg_over_time(rate(http_requests_total[5m])[1h])` is automatically converted to `avg_over_time(rate(http_requests_total[5m])[1h:1i])`. + For example, `avg_over_time(rate(http_requests_total[5m])[1h])` is automatically converted to `avg_over_time(rate(http_requests_total[5m])[1h:1i])`. * If something other than [series selector](https://docs.victoriametrics.com/keyconcepts/#filtering) is passed to [rollup function](#rollup-functions), then a [subquery](#subqueries) with `1i` lookbehind window and `1i` step is automatically formed. - For example, `rate(sum(up))` is automatically converted to `rate((sum(default_rollup(up)))[1i:1i])`. + For example, `rate(sum(up))` is automatically converted to `rate((sum(default_rollup(up)))[1i:1i])`. + This behavior can be disabled or logged via cmd-line flags `-search.disableImplicitConversion` and `-search.logImplicitConversion` since v1.101.0. diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 03d0410864..51d5a671c0 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -33,6 +33,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [MetricsQL](https://docs.victoriametrics.com/metricsql/): support filtering by multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (200, 201, 300)` returns `status_code` metrics with any of `200`, `201` or `300` values, while `status_code != (400, 404, 500)` returns `status_code` metrics with all the values except of `400`, `404` and `500`. * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): add support for fault domain awareness to `vmselect`. It can be configured to return full responses if up to `-globalReplicationFactor - 1` fault domains (aka `vmstorage` groups) are unavailable. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054) and [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#vmstorage-groups-at-vmselect). * FEATURE: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: add support for automatic issuing of TLS certificates for HTTPS server at `-httpListenAddr` via [Let's Encrypt service](https://letsencrypt.org/). See [these docs](https://docs.victoriametrics.com/#automatic-issuing-of-tls-certificates) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5949). +* FEATURE: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/): support disable or log [implicit conversions](https://docs.victoriametrics.com/metricsql/#implicit-query-conversions) for subquery with cmd-line flags `-search.disableImplicitConversion` and `-search.logImplicitConversion`. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4338). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support data replication additionally to sharding among remote storage systems if `-remoteWrite.shardByURLReplicas=N` command-line flag is set additionally to `-remoteWrite.shardByURL` command-line flag, where `N` is desired replication factor. This allows setting up data replication among failure domains when the replication factor is smaller than the number of failure domains. See [these docs](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): reduce CPU usage when [sharding among remote storage systems](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages) is enabled. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) addresses in `-remoteWrite.url` command-line option and in scrape target urls. For example, `-remoteWrite.url=http://srv+victoria-metrics/api/v1/write` automatically resolves the `victoria-metrics` DNS SRV to a list of hostnames with TCP ports and then sends the collected metrics to these TCP addresses. See [these docs](https://docs.victoriametrics.com/vmagent/#srv-urls) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6053). diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index cdfa3039fc..fec10e5dc0 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -2229,6 +2229,7 @@ Any [rollup function](#rollup-functions) for something other than [series select Nested rollup functions can be implicit thanks to the [implicit query conversions](#implicit-query-conversions). For example, `delta(sum(m))` is implicitly converted to `delta(sum(default_rollup(m))[1i:1i])`, so it becomes a subquery, since it contains [default_rollup](#default_rollup) nested into [delta](#delta). +This behavior can be disabled or logged via cmd-line flags `-search.disableImplicitConversion` and `-search.logImplicitConversion` since v1.101.0. VictoriaMetrics performs subqueries in the following way: @@ -2261,4 +2262,5 @@ VictoriaMetrics performs the following implicit conversions for incoming queries For example, `avg_over_time(rate(http_requests_total[5m])[1h])` is automatically converted to `avg_over_time(rate(http_requests_total[5m])[1h:1i])`. * If something other than [series selector](https://docs.victoriametrics.com/keyconcepts/#filtering) is passed to [rollup function](#rollup-functions), then a [subquery](#subqueries) with `1i` lookbehind window and `1i` step is automatically formed. - For example, `rate(sum(up))` is automatically converted to `rate((sum(default_rollup(up)))[1i:1i])`. + For example, `rate(sum(up))` is automatically converted to `rate((sum(default_rollup(up)))[1i:1i])`. + This behavior can be disabled or logged via cmd-line flags `-search.disableImplicitConversion` and `-search.logImplicitConversion` since v1.101.0.