From 9eaa2ab87166b7449bedce3cc52aef590647b942 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@gmail.com>
Date: Tue, 21 Jan 2020 15:00:13 +0200
Subject: [PATCH] app/vmselect/promql: add `label_match(q, label, regexp)` and
 `label_mismatch(q, label, regexp)` functions for filtering out time series
 with labels matching the given regexp

---
 app/vmselect/promql/exec_test.go | 36 ++++++++++++++++++++
 app/vmselect/promql/transform.go | 58 ++++++++++++++++++++++++++++++++
 docs/ExtendedPromQL.md           |  1 +
 lib/metricsql/transform.go       |  2 ++
 4 files changed, 97 insertions(+)

diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go
index ea99f0c75a..d55eaf5122 100644
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@@ -1476,6 +1476,38 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`label_match()`, func(t *testing.T) {
+		t.Parallel()
+		q := `
+		label_match((
+			alias(time(), "foo"),
+			alias(2*time(), "bar"),
+		), "__name__", "f.+")`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.MetricGroup = []byte("foo")
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`label_mismatch()`, func(t *testing.T) {
+		t.Parallel()
+		q := `
+		label_mismatch((
+			alias(time(), "foo"),
+			alias(2*time(), "bar"),
+		), "__name__", "f.+")`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{2000, 2400, 2800, 3200, 3600, 4000},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.MetricGroup = []byte("bar")
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`two_timeseries`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort_desc(time() or label_set(2, "xx", "foo"))`
@@ -5247,6 +5279,8 @@ func TestExecError(t *testing.T) {
 	f(`label_set(1, "foo")`)
 	f(`label_del()`)
 	f(`label_keep()`)
+	f(`label_match()`)
+	f(`label_mismatch()`)
 	f(`round()`)
 	f(`round(1,2,3)`)
 	f(`scalar()`)
@@ -5347,6 +5381,8 @@ func TestExecError(t *testing.T) {
 	f(`label_transform(1, "foo", 3, 4)`)
 	f(`label_transform(1, "foo", "bar", 4)`)
 	f(`label_transform(1, "foo", "invalid(regexp", "baz`)
+	f(`label_match(1, 2, 3)`)
+	f(`label_mismatch(1, 2, 3)`)
 	f(`alias(1, 2)`)
 	f(`aggr_over_time(1, 2)`)
 	f(`aggr_over_time(("foo", "bar"), 3)`)
diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go
index 35f0490b4f..9801e757c0 100644
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@@ -65,6 +65,8 @@ var transformFuncs = map[string]transformFunc{
 	"label_move":         transformLabelMove,
 	"label_transform":    transformLabelTransform,
 	"label_value":        transformLabelValue,
+	"label_match":        transformLabelMatch,
+	"label_mismatch":     transformLabelMismatch,
 	"union":              transformUnion,
 	"":                   transformUnion, // empty func is a synonim to union
 	"keep_last_value":    transformKeepLastValue,
@@ -1203,6 +1205,62 @@ func transformLabelValue(tfa *transformFuncArg) ([]*timeseries, error) {
 	return rvs, nil
 }
 
+func transformLabelMatch(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 3); err != nil {
+		return nil, err
+	}
+	labelName, err := getString(args[1], 1)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get label name: %s", err)
+	}
+	labelRe, err := getString(args[2], 2)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get regexp: %s", err)
+	}
+	r, err := metricsql.CompileRegexpAnchored(labelRe)
+	if err != nil {
+		return nil, fmt.Errorf(`cannot compile regexp %q: %s`, labelRe, err)
+	}
+	tss := args[0]
+	rvs := tss[:0]
+	for _, ts := range tss {
+		labelValue := ts.MetricName.GetTagValue(labelName)
+		if r.Match(labelValue) {
+			rvs = append(rvs, ts)
+		}
+	}
+	return rvs, nil
+}
+
+func transformLabelMismatch(tfa *transformFuncArg) ([]*timeseries, error) {
+	args := tfa.args
+	if err := expectTransformArgsNum(args, 3); err != nil {
+		return nil, err
+	}
+	labelName, err := getString(args[1], 1)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get label name: %s", err)
+	}
+	labelRe, err := getString(args[2], 2)
+	if err != nil {
+		return nil, fmt.Errorf("cannot get regexp: %s", err)
+	}
+	r, err := metricsql.CompileRegexpAnchored(labelRe)
+	if err != nil {
+		return nil, fmt.Errorf(`cannot compile regexp %q: %s`, labelRe, err)
+	}
+	tss := args[0]
+	rvs := tss[:0]
+	for _, ts := range tss {
+		labelValue := ts.MetricName.GetTagValue(labelName)
+		if !r.Match(labelValue) {
+			rvs = append(rvs, ts)
+		}
+	}
+	return rvs, nil
+}
+
 func transformLn(v float64) float64 {
 	return math.Log(v)
 }
diff --git a/docs/ExtendedPromQL.md b/docs/ExtendedPromQL.md
index 7e661cb273..b199e0a8eb 100644
--- a/docs/ExtendedPromQL.md
+++ b/docs/ExtendedPromQL.md
@@ -52,6 +52,7 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g
   - `label_move(q, src_label1, dst_label1, ... src_labelN, dst_labelN)` for moving label values from `src_*` to `dst_*`.
   - `label_transform(q, label, regexp, replacement)` for replacing all the `regexp` occurences with `replacement` in the `label` values from `q`.
   - `label_value(q, label)` - returns numeric values for the given `label` from `q`.
+- `label_match(q, label, regexp)` and `label_mismatch(q, label, regexp)` for filtering time series with labels matching (or not matching) the given regexps.
 - `step()` function for returning the step in seconds used in the query.
 - `start()` and `end()` functions for returning the start and end timestamps of the `[start ... end]` range used in the query.
 - `integrate(m[d])` for returning integral over the given duration `d` for the given metric `m`.
diff --git a/lib/metricsql/transform.go b/lib/metricsql/transform.go
index 16094dfdd8..cb57017056 100644
--- a/lib/metricsql/transform.go
+++ b/lib/metricsql/transform.go
@@ -44,6 +44,8 @@ var transformFuncs = map[string]bool{
 	"label_move":         true,
 	"label_transform":    true,
 	"label_value":        true,
+	"label_match":        true,
+	"label_mismatch":     true,
 	"union":              true,
 	"":                   true, // empty func is a synonim to union
 	"keep_last_value":    true,