app/vmselect/promql: add support for matching against multiple numeric constants via q == (c1,...,cN) and q != (c1,...,cN) syntax

This commit is contained in:
Aliaksandr Valialkin 2024-04-19 17:56:29 +02:00
parent 249a467ea4
commit 34e253f9d6
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 163 additions and 3 deletions

View file

@ -23,8 +23,8 @@ var binaryOpFuncs = map[string]binaryOpFunc{
"atan2": newBinaryOpArithFunc(binaryop.Atan2), "atan2": newBinaryOpArithFunc(binaryop.Atan2),
// cmp ops // cmp ops
"==": newBinaryOpCmpFunc(binaryop.Eq), "==": binaryOpEqFunc,
"!=": newBinaryOpCmpFunc(binaryop.Neq), "!=": binaryOpNeqFunc,
">": newBinaryOpCmpFunc(binaryop.Gt), ">": newBinaryOpCmpFunc(binaryop.Gt),
"<": newBinaryOpCmpFunc(binaryop.Lt), "<": newBinaryOpCmpFunc(binaryop.Lt),
">=": newBinaryOpCmpFunc(binaryop.Gte), ">=": newBinaryOpCmpFunc(binaryop.Gte),
@ -54,6 +54,84 @@ type binaryOpFuncArg struct {
type binaryOpFunc func(bfa *binaryOpFuncArg) ([]*timeseries, error) type binaryOpFunc func(bfa *binaryOpFuncArg) ([]*timeseries, error)
func binaryOpEqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
if !isUnionFunc(bfa.be.Left) && !isUnionFunc(bfa.be.Right) {
return binaryOpEqStdFunc(bfa)
}
// Special case for `q == (1,2,3)`
left := bfa.left
right := bfa.right
if isUnionFunc(bfa.be.Left) {
left, right = right, left
}
if len(left) == 0 || len(right) == 0 {
return nil, nil
}
for _, tsLeft := range left {
values := tsLeft.Values
for j, v := range values {
if !containsValueAt(right, v, j) {
values[j] = nan
}
}
}
// Do not remove time series containing only NaNs, since then the `(foo op bar) default N`
// won't work as expected if `(foo op bar)` results to NaN series.
return left, nil
}
func binaryOpNeqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
if !isUnionFunc(bfa.be.Left) && !isUnionFunc(bfa.be.Right) {
return binaryOpNeqStdFunc(bfa)
}
// Special case for `q != (1,2,3)`
left := bfa.left
right := bfa.right
if isUnionFunc(bfa.be.Left) {
left, right = right, left
}
if len(left) == 0 {
return nil, nil
}
if len(right) == 0 {
return left, nil
}
for _, tsLeft := range left {
values := tsLeft.Values
for j, v := range values {
if containsValueAt(right, v, j) {
values[j] = nan
}
}
}
// Do not remove time series containing only NaNs, since then the `(foo op bar) default N`
// won't work as expected if `(foo op bar)` results to NaN series.
return left, nil
}
func isUnionFunc(e metricsql.Expr) bool {
if fe, ok := e.(*metricsql.FuncExpr); ok && (fe.Name == "" || strings.ToLower(fe.Name) == "union") {
return true
}
return false
}
func containsValueAt(tss []*timeseries, v float64, idx int) bool {
for _, ts := range tss {
if ts.Values[idx] == v {
return true
}
}
return false
}
var (
binaryOpEqStdFunc = newBinaryOpCmpFunc(binaryop.Eq)
binaryOpNeqStdFunc = newBinaryOpCmpFunc(binaryop.Neq)
)
func newBinaryOpCmpFunc(cf func(left, right float64) bool) binaryOpFunc { func newBinaryOpCmpFunc(cf func(left, right float64) bool) binaryOpFunc {
cfe := func(left, right float64, isBool bool) float64 { cfe := func(left, right float64, isBool bool) float64 {
if !isBool { if !isBool {

View file

@ -5203,9 +5203,24 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r} resultExpected := []netstorage.Result{r}
f(q, resultExpected) f(q, resultExpected)
}) })
t.Run(`sum(union-args)`, func(t *testing.T) { t.Run(`sum(union-scalars)`, func(t *testing.T) {
t.Parallel() t.Parallel()
q := `sum((1, 2, 3))` q := `sum((1, 2, 3))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{6, 6, 6, 6, 6, 6},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`sum(union-vectors)`, func(t *testing.T) {
t.Parallel()
q := `sum((
alias(1, "foo"),
alias(2, "foo"),
alias(3, "foo"),
))`
r := netstorage.Result{ r := netstorage.Result{
MetricName: metricNameExpected, MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1}, Values: []float64{1, 1, 1, 1, 1, 1},
@ -5763,6 +5778,51 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r} resultExpected := []netstorage.Result{r}
f(q, resultExpected) f(q, resultExpected)
}) })
t.Run(`equal-list`, func(t *testing.T) {
t.Parallel()
q := `time() == (100, 1000, 1400, 600)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, nan, 1400, nan, nan, nan},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`equal-list-reverse`, func(t *testing.T) {
t.Parallel()
q := `(100, 1000, 1400, 600) == time()`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, nan, 1400, nan, nan, nan},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`not-equal-list`, func(t *testing.T) {
t.Parallel()
q := `alias(time(), "foobar") != UNIon(100, 1000, 1400, 600)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, 1200, nan, 1600, 1800, 2000},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("foobar")
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`not-equal-list-reverse`, func(t *testing.T) {
t.Parallel()
q := `(100, 1000, 1400, 600) != time()`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, 1200, nan, 1600, 1800, 2000},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`quantiles_over_time(single_sample)`, func(t *testing.T) { t.Run(`quantiles_over_time(single_sample)`, func(t *testing.T) {
t.Parallel() t.Parallel()
q := `sort_by_label( q := `sort_by_label(

View file

@ -1658,6 +1658,16 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
return evalNumber(tfa.ec, nan), nil return evalNumber(tfa.ec, nan), nil
} }
if areAllArgsScalar(args) {
// Special case for (v1,...,vN) where vX are scalars - return all the scalars as time series.
// This is needed for "q == (v1,...,vN)" and "q != (v1,...,vN)" cases, where vX are numeric constants.
rvs := make([]*timeseries, len(args))
for i, arg := range args {
rvs[i] = arg[0]
}
return rvs, nil
}
rvs := make([]*timeseries, 0, len(args[0])) rvs := make([]*timeseries, 0, len(args[0]))
m := make(map[string]bool, len(args[0])) m := make(map[string]bool, len(args[0]))
bb := bbPool.Get() bb := bbPool.Get()
@ -1676,6 +1686,15 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
return rvs, nil return rvs, nil
} }
func areAllArgsScalar(args [][]*timeseries) bool {
for _, arg := range args {
if !isScalar(arg) {
return false
}
}
return true
}
func transformLabelKeep(tfa *transformFuncArg) ([]*timeseries, error) { func transformLabelKeep(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args args := tfa.args
if len(args) < 1 { if len(args) < 1 {

View file

@ -30,6 +30,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
## tip ## tip
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/metricsql/): support filtering by multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (200, 201, 300)` returns `status_code` metrics with any of `200`, `201` or `300` values, while `status_code != (400, 404, 500)` returns `status_code` metrics with all the values except of `400`, `404` and `500`.
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): add support for fault domain awareness to `vmselect`. It can be configured to return full responses if up to `-globalReplicationFactor - 1` fault domains (aka `vmstorage` groups) are unavailable. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054) and [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#vmstorage-groups-at-vmselect). * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): add support for fault domain awareness to `vmselect`. It can be configured to return full responses if up to `-globalReplicationFactor - 1` fault domains (aka `vmstorage` groups) are unavailable. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054) and [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#vmstorage-groups-at-vmselect).
* FEATURE: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: add support for automatic issuing of TLS certificates for HTTPS server at `-httpListenAddr` via [Let's Encrypt service](https://letsencrypt.org/). See [these docs](https://docs.victoriametrics.com/#automatic-issuing-of-tls-certificates) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5949). * FEATURE: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: add support for automatic issuing of TLS certificates for HTTPS server at `-httpListenAddr` via [Let's Encrypt service](https://letsencrypt.org/). See [these docs](https://docs.victoriametrics.com/#automatic-issuing-of-tls-certificates) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5949).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support data replication additionally to sharding among remote storage systems if `-remoteWrite.shardByURLReplicas=N` command-line flag is set additionally to `-remoteWrite.shardByURL` command-line flag, where `N` is desired replication factor. This allows setting up data replication among failure domains when the replication factor is smaller than the number of failure domains. See [these docs](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support data replication additionally to sharding among remote storage systems if `-remoteWrite.shardByURLReplicas=N` command-line flag is set additionally to `-remoteWrite.shardByURL` command-line flag, where `N` is desired replication factor. This allows setting up data replication among failure domains when the replication factor is smaller than the number of failure domains. See [these docs](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054).

View file

@ -79,6 +79,8 @@ The list of MetricsQL features on top of PromQL:
* [Series selectors](https://docs.victoriametrics.com/keyconcepts/#filtering) accept multiple `or` filters. For example, `{env="prod",job="a" or env="dev",job="b"}` * [Series selectors](https://docs.victoriametrics.com/keyconcepts/#filtering) accept multiple `or` filters. For example, `{env="prod",job="a" or env="dev",job="b"}`
selects series with `{env="prod",job="a"}` or `{env="dev",job="b"}` labels. selects series with `{env="prod",job="a"}` or `{env="dev",job="b"}` labels.
See [these docs](https://docs.victoriametrics.com/keyconcepts/#filtering-by-multiple-or-filters) for details. See [these docs](https://docs.victoriametrics.com/keyconcepts/#filtering-by-multiple-or-filters) for details.
* Support for matching against multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (300, 301, 304)`
returns `status_code` metrics with one of `300`, `301` or `304` values.
* Support for `group_left(*)` and `group_right(*)` for copying all the labels from time series on the `one` side * Support for `group_left(*)` and `group_right(*)` for copying all the labels from time series on the `one` side
of [many-to-one operations](https://prometheus.io/docs/prometheus/latest/querying/operators/#many-to-one-and-one-to-many-vector-matches). of [many-to-one operations](https://prometheus.io/docs/prometheus/latest/querying/operators/#many-to-one-and-one-to-many-vector-matches).
The copied label names may clash with the existing label names, so MetricsQL provides an ability to add prefix to the copied metric names The copied label names may clash with the existing label names, so MetricsQL provides an ability to add prefix to the copied metric names