app/vmselect/promql: add support for matching against multiple numeric constants via q == (c1,...,cN) and q != (c1,...,cN) syntax

2024-11-21 14:44:00 +00:00 · 2024-04-19 17:56:29 +02:00 · 2024-04-19 17:56:29 +02:00 · 34e253f9d6
commit 34e253f9d6
parent 249a467ea4
5 changed files with 163 additions and 3 deletions
--- a/app/vmselect/promql/binary_op.go
+++ b/app/vmselect/promql/binary_op.go
@ -23,8 +23,8 @@ var binaryOpFuncs = map[string]binaryOpFunc{
 	"atan2": newBinaryOpArithFunc(binaryop.Atan2),
 	// cmp ops
-	"==": newBinaryOpCmpFunc(binaryop.Eq),
+	"==": binaryOpEqFunc,
-	"!=": newBinaryOpCmpFunc(binaryop.Neq),
+	"!=": binaryOpNeqFunc,
 	">":  newBinaryOpCmpFunc(binaryop.Gt),
 	"<":  newBinaryOpCmpFunc(binaryop.Lt),
 	">=": newBinaryOpCmpFunc(binaryop.Gte),
@ -54,6 +54,84 @@ type binaryOpFuncArg struct {
 type binaryOpFunc func(bfa *binaryOpFuncArg) ([]*timeseries, error)
 func binaryOpEqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
 	if !isUnionFunc(bfa.be.Left) && !isUnionFunc(bfa.be.Right) {
 		return binaryOpEqStdFunc(bfa)
 	}
 	// Special case for `q == (1,2,3)`
 	left := bfa.left
 	right := bfa.right
 	if isUnionFunc(bfa.be.Left) {
 		left, right = right, left
 	}
 	if len(left) == 0 || len(right) == 0 {
 		return nil, nil
 	}
 	for _, tsLeft := range left {
 		values := tsLeft.Values
 		for j, v := range values {
 			if !containsValueAt(right, v, j) {
 				values[j] = nan
 			}
 		}
 	}
 	// Do not remove time series containing only NaNs, since then the `(foo op bar) default N`
 	// won't work as expected if `(foo op bar)` results to NaN series.
 	return left, nil
 }
 func binaryOpNeqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
 	if !isUnionFunc(bfa.be.Left) && !isUnionFunc(bfa.be.Right) {
 		return binaryOpNeqStdFunc(bfa)
 	}
 	// Special case for `q != (1,2,3)`
 	left := bfa.left
 	right := bfa.right
 	if isUnionFunc(bfa.be.Left) {
 		left, right = right, left
 	}
 	if len(left) == 0 {
 		return nil, nil
 	}
 	if len(right) == 0 {
 		return left, nil
 	}
 	for _, tsLeft := range left {
 		values := tsLeft.Values
 		for j, v := range values {
 			if containsValueAt(right, v, j) {
 				values[j] = nan
 			}
 		}
 	}
 	// Do not remove time series containing only NaNs, since then the `(foo op bar) default N`
 	// won't work as expected if `(foo op bar)` results to NaN series.
 	return left, nil
 }
 func isUnionFunc(e metricsql.Expr) bool {
 	if fe, ok := e.(*metricsql.FuncExpr); ok && (fe.Name == "" || strings.ToLower(fe.Name) == "union") {
 		return true
 	}
 	return false
 }
 func containsValueAt(tss []*timeseries, v float64, idx int) bool {
 	for _, ts := range tss {
 		if ts.Values[idx] == v {
 			return true
 		}
 	}
 	return false
 }
 var (
 	binaryOpEqStdFunc  = newBinaryOpCmpFunc(binaryop.Eq)
 	binaryOpNeqStdFunc = newBinaryOpCmpFunc(binaryop.Neq)
 )
 func newBinaryOpCmpFunc(cf func(left, right float64) bool) binaryOpFunc {
 	cfe := func(left, right float64, isBool bool) float64 {
 		if !isBool {
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@ -5203,9 +5203,24 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
-	t.Run(`sum(union-args)`, func(t *testing.T) {
+	t.Run(`sum(union-scalars)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sum((1, 2, 3))`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{6, 6, 6, 6, 6, 6},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
 	t.Run(`sum(union-vectors)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sum((
 			alias(1, "foo"),
 			alias(2, "foo"),
 			alias(3, "foo"),
 		))`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{1, 1, 1, 1, 1, 1},
@ -5763,6 +5778,51 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
 	t.Run(`equal-list`, func(t *testing.T) {
 		t.Parallel()
 		q := `time() == (100, 1000, 1400, 600)`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{1000, nan, 1400, nan, nan, nan},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
 	t.Run(`equal-list-reverse`, func(t *testing.T) {
 		t.Parallel()
 		q := `(100, 1000, 1400, 600) == time()`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{1000, nan, 1400, nan, nan, nan},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
 	t.Run(`not-equal-list`, func(t *testing.T) {
 		t.Parallel()
 		q := `alias(time(), "foobar") != UNIon(100, 1000, 1400, 600)`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{nan, 1200, nan, 1600, 1800, 2000},
 			Timestamps: timestampsExpected,
 		}
 		r.MetricName.MetricGroup = []byte("foobar")
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
 	t.Run(`not-equal-list-reverse`, func(t *testing.T) {
 		t.Parallel()
 		q := `(100, 1000, 1400, 600) != time()`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{nan, 1200, nan, 1600, 1800, 2000},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
 	t.Run(`quantiles_over_time(single_sample)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort_by_label(
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@ -1658,6 +1658,16 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
 		return evalNumber(tfa.ec, nan), nil
 	}
 	if areAllArgsScalar(args) {
 		// Special case for (v1,...,vN) where vX are scalars - return all the scalars as time series.
 		// This is needed for "q == (v1,...,vN)" and "q != (v1,...,vN)" cases, where vX are numeric constants.
 		rvs := make([]*timeseries, len(args))
 		for i, arg := range args {
 			rvs[i] = arg[0]
 		}
 		return rvs, nil
 	}
 	rvs := make([]*timeseries, 0, len(args[0]))
 	m := make(map[string]bool, len(args[0]))
 	bb := bbPool.Get()
@ -1676,6 +1686,15 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
 	return rvs, nil
 }
 func areAllArgsScalar(args [][]*timeseries) bool {
 	for _, arg := range args {
 		if !isScalar(arg) {
 			return false
 		}
 	}
 	return true
 }
 func transformLabelKeep(tfa *transformFuncArg) ([]*timeseries, error) {
 	args := tfa.args
 	if len(args) < 1 {
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -30,6 +30,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
 ## tip
 * FEATURE: [MetricsQL](https://docs.victoriametrics.com/metricsql/): support filtering by multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (200, 201, 300)` returns `status_code` metrics with any of `200`, `201` or `300` values, while `status_code != (400, 404, 500)` returns `status_code` metrics with all the values except of `400`, `404` and `500`.
 * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): add support for fault domain awareness to `vmselect`. It can be configured to return full responses if up to `-globalReplicationFactor - 1` fault domains (aka `vmstorage` groups) are unavailable. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054) and [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#vmstorage-groups-at-vmselect).
 * FEATURE: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: add support for automatic issuing of TLS certificates for HTTPS server at `-httpListenAddr` via [Let's Encrypt service](https://letsencrypt.org/). See [these docs](https://docs.victoriametrics.com/#automatic-issuing-of-tls-certificates) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5949).
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support data replication additionally to sharding among remote storage systems if `-remoteWrite.shardByURLReplicas=N` command-line flag is set additionally to `-remoteWrite.shardByURL` command-line flag, where `N` is desired replication factor. This allows setting up data replication among failure domains when the replication factor is smaller than the number of failure domains. See [these docs](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054).
--- a/docs/MetricsQL.md
+++ b/docs/MetricsQL.md
@ -79,6 +79,8 @@ The list of MetricsQL features on top of PromQL:
 * [Series selectors](https://docs.victoriametrics.com/keyconcepts/#filtering) accept multiple `or` filters. For example, `{env="prod",job="a" or env="dev",job="b"}`
  selects series with `{env="prod",job="a"}` or `{env="dev",job="b"}` labels.
  See [these docs](https://docs.victoriametrics.com/keyconcepts/#filtering-by-multiple-or-filters) for details.
 * Support for matching against multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (300, 301, 304)`
  returns `status_code` metrics with one of `300`, `301` or `304` values.
 * Support for `group_left(*)` and `group_right(*)` for copying all the labels from time series on the `one` side
  of [many-to-one operations](https://prometheus.io/docs/prometheus/latest/querying/operators/#many-to-one-and-one-to-many-vector-matches).
  The copied label names may clash with the existing label names, so MetricsQL provides an ability to add prefix to the copied metric names