app/vmselect/promql: add support for matching against multiple numeric constants via q == (c1,...,cN) and q != (c1,...,cN) syntax

This commit is contained in:
Aliaksandr Valialkin 2024-04-19 17:56:29 +02:00
parent 249a467ea4
commit 34e253f9d6
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 163 additions and 3 deletions

View file

@ -23,8 +23,8 @@ var binaryOpFuncs = map[string]binaryOpFunc{
"atan2": newBinaryOpArithFunc(binaryop.Atan2),
// cmp ops
"==": newBinaryOpCmpFunc(binaryop.Eq),
"!=": newBinaryOpCmpFunc(binaryop.Neq),
"==": binaryOpEqFunc,
"!=": binaryOpNeqFunc,
">": newBinaryOpCmpFunc(binaryop.Gt),
"<": newBinaryOpCmpFunc(binaryop.Lt),
">=": newBinaryOpCmpFunc(binaryop.Gte),
@ -54,6 +54,84 @@ type binaryOpFuncArg struct {
type binaryOpFunc func(bfa *binaryOpFuncArg) ([]*timeseries, error)
func binaryOpEqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
if !isUnionFunc(bfa.be.Left) && !isUnionFunc(bfa.be.Right) {
return binaryOpEqStdFunc(bfa)
}
// Special case for `q == (1,2,3)`
left := bfa.left
right := bfa.right
if isUnionFunc(bfa.be.Left) {
left, right = right, left
}
if len(left) == 0 || len(right) == 0 {
return nil, nil
}
for _, tsLeft := range left {
values := tsLeft.Values
for j, v := range values {
if !containsValueAt(right, v, j) {
values[j] = nan
}
}
}
// Do not remove time series containing only NaNs, since then the `(foo op bar) default N`
// won't work as expected if `(foo op bar)` results to NaN series.
return left, nil
}
func binaryOpNeqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
if !isUnionFunc(bfa.be.Left) && !isUnionFunc(bfa.be.Right) {
return binaryOpNeqStdFunc(bfa)
}
// Special case for `q != (1,2,3)`
left := bfa.left
right := bfa.right
if isUnionFunc(bfa.be.Left) {
left, right = right, left
}
if len(left) == 0 {
return nil, nil
}
if len(right) == 0 {
return left, nil
}
for _, tsLeft := range left {
values := tsLeft.Values
for j, v := range values {
if containsValueAt(right, v, j) {
values[j] = nan
}
}
}
// Do not remove time series containing only NaNs, since then the `(foo op bar) default N`
// won't work as expected if `(foo op bar)` results to NaN series.
return left, nil
}
func isUnionFunc(e metricsql.Expr) bool {
if fe, ok := e.(*metricsql.FuncExpr); ok && (fe.Name == "" || strings.ToLower(fe.Name) == "union") {
return true
}
return false
}
func containsValueAt(tss []*timeseries, v float64, idx int) bool {
for _, ts := range tss {
if ts.Values[idx] == v {
return true
}
}
return false
}
var (
binaryOpEqStdFunc = newBinaryOpCmpFunc(binaryop.Eq)
binaryOpNeqStdFunc = newBinaryOpCmpFunc(binaryop.Neq)
)
func newBinaryOpCmpFunc(cf func(left, right float64) bool) binaryOpFunc {
cfe := func(left, right float64, isBool bool) float64 {
if !isBool {

View file

@ -5203,9 +5203,24 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`sum(union-args)`, func(t *testing.T) {
t.Run(`sum(union-scalars)`, func(t *testing.T) {
t.Parallel()
q := `sum((1, 2, 3))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{6, 6, 6, 6, 6, 6},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`sum(union-vectors)`, func(t *testing.T) {
t.Parallel()
q := `sum((
alias(1, "foo"),
alias(2, "foo"),
alias(3, "foo"),
))`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
@ -5763,6 +5778,51 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`equal-list`, func(t *testing.T) {
t.Parallel()
q := `time() == (100, 1000, 1400, 600)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, nan, 1400, nan, nan, nan},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`equal-list-reverse`, func(t *testing.T) {
t.Parallel()
q := `(100, 1000, 1400, 600) == time()`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1000, nan, 1400, nan, nan, nan},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`not-equal-list`, func(t *testing.T) {
t.Parallel()
q := `alias(time(), "foobar") != UNIon(100, 1000, 1400, 600)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, 1200, nan, 1600, 1800, 2000},
Timestamps: timestampsExpected,
}
r.MetricName.MetricGroup = []byte("foobar")
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`not-equal-list-reverse`, func(t *testing.T) {
t.Parallel()
q := `(100, 1000, 1400, 600) != time()`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, 1200, nan, 1600, 1800, 2000},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`quantiles_over_time(single_sample)`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label(

View file

@ -1658,6 +1658,16 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
return evalNumber(tfa.ec, nan), nil
}
if areAllArgsScalar(args) {
// Special case for (v1,...,vN) where vX are scalars - return all the scalars as time series.
// This is needed for "q == (v1,...,vN)" and "q != (v1,...,vN)" cases, where vX are numeric constants.
rvs := make([]*timeseries, len(args))
for i, arg := range args {
rvs[i] = arg[0]
}
return rvs, nil
}
rvs := make([]*timeseries, 0, len(args[0]))
m := make(map[string]bool, len(args[0]))
bb := bbPool.Get()
@ -1676,6 +1686,15 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
return rvs, nil
}
func areAllArgsScalar(args [][]*timeseries) bool {
for _, arg := range args {
if !isScalar(arg) {
return false
}
}
return true
}
func transformLabelKeep(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if len(args) < 1 {

View file

@ -30,6 +30,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
## tip
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/metricsql/): support filtering by multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (200, 201, 300)` returns `status_code` metrics with any of `200`, `201` or `300` values, while `status_code != (400, 404, 500)` returns `status_code` metrics with all the values except of `400`, `404` and `500`.
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): add support for fault domain awareness to `vmselect`. It can be configured to return full responses if up to `-globalReplicationFactor - 1` fault domains (aka `vmstorage` groups) are unavailable. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054) and [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#vmstorage-groups-at-vmselect).
* FEATURE: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: add support for automatic issuing of TLS certificates for HTTPS server at `-httpListenAddr` via [Let's Encrypt service](https://letsencrypt.org/). See [these docs](https://docs.victoriametrics.com/#automatic-issuing-of-tls-certificates) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5949).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support data replication additionally to sharding among remote storage systems if `-remoteWrite.shardByURLReplicas=N` command-line flag is set additionally to `-remoteWrite.shardByURL` command-line flag, where `N` is desired replication factor. This allows setting up data replication among failure domains when the replication factor is smaller than the number of failure domains. See [these docs](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054).

View file

@ -79,6 +79,8 @@ The list of MetricsQL features on top of PromQL:
* [Series selectors](https://docs.victoriametrics.com/keyconcepts/#filtering) accept multiple `or` filters. For example, `{env="prod",job="a" or env="dev",job="b"}`
selects series with `{env="prod",job="a"}` or `{env="dev",job="b"}` labels.
See [these docs](https://docs.victoriametrics.com/keyconcepts/#filtering-by-multiple-or-filters) for details.
* Support for matching against multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (300, 301, 304)`
returns `status_code` metrics with one of `300`, `301` or `304` values.
* Support for `group_left(*)` and `group_right(*)` for copying all the labels from time series on the `one` side
of [many-to-one operations](https://prometheus.io/docs/prometheus/latest/querying/operators/#many-to-one-and-one-to-many-vector-matches).
The copied label names may clash with the existing label names, so MetricsQL provides an ability to add prefix to the copied metric names