mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-21 15:45:01 +00:00
vmalert: add function "query", "first" and "value" to alert templates functions (#960)
The commit adds a support for template function `query`, `first` and `value`. The function `query` executes a MetricsQL query for active alerts. In vmalert we update templates on every evaluation for active alerts to keep them up to date. With `query` func it may become a perf issue since it will fire a query on every execution. We should keep it in mind for now. https://github.com/VictoriaMetrics/VictoriaMetrics/issues/539
This commit is contained in:
parent
0b2726c3be
commit
6247884057
11 changed files with 131 additions and 36 deletions
|
@ -21,7 +21,6 @@ may fail;
|
||||||
* by default, rules execution is sequential within one group, but persisting of execution results to remote
|
* by default, rules execution is sequential within one group, but persisting of execution results to remote
|
||||||
storage is asynchronous. Hence, user shouldn't rely on recording rules chaining when result of previous
|
storage is asynchronous. Hence, user shouldn't rely on recording rules chaining when result of previous
|
||||||
recording rule is reused in next one;
|
recording rule is reused in next one;
|
||||||
* there is no `query` function support in templates yet;
|
|
||||||
* `vmalert` has no UI, just an API for getting groups and rules statuses.
|
* `vmalert` has no UI, just an API for getting groups and rules statuses.
|
||||||
|
|
||||||
### QuickStart
|
### QuickStart
|
||||||
|
|
|
@ -137,6 +137,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query) }
|
||||||
updated := make(map[uint64]struct{})
|
updated := make(map[uint64]struct{})
|
||||||
// update list of active alerts
|
// update list of active alerts
|
||||||
for _, m := range qMetrics {
|
for _, m := range qMetrics {
|
||||||
|
@ -158,14 +159,14 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
||||||
a.Value = m.Value
|
a.Value = m.Value
|
||||||
// and re-exec template since Value can be used
|
// and re-exec template since Value can be used
|
||||||
// in templates
|
// in templates
|
||||||
err = ar.template(a)
|
err = ar.template(a, qFn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
a, err := ar.newAlert(m, ar.lastExecTime)
|
a, err := ar.newAlert(m, ar.lastExecTime, qFn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ar.lastExecError = err
|
ar.lastExecError = err
|
||||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||||
|
@ -245,7 +246,7 @@ func hash(m datasource.Metric) uint64 {
|
||||||
return hash.Sum64()
|
return hash.Sum64()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time) (*notifier.Alert, error) {
|
func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time, qFn notifier.QueryFn) (*notifier.Alert, error) {
|
||||||
a := ¬ifier.Alert{
|
a := ¬ifier.Alert{
|
||||||
GroupID: ar.GroupID,
|
GroupID: ar.GroupID,
|
||||||
Name: ar.Name,
|
Name: ar.Name,
|
||||||
|
@ -264,16 +265,16 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time) (*notifie
|
||||||
}
|
}
|
||||||
a.Labels[l.Name] = l.Value
|
a.Labels[l.Name] = l.Value
|
||||||
}
|
}
|
||||||
return a, ar.template(a)
|
return a, ar.template(a, qFn)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) template(a *notifier.Alert) error {
|
func (ar *AlertingRule) template(a *notifier.Alert, qFn notifier.QueryFn) error {
|
||||||
var err error
|
var err error
|
||||||
a.Labels, err = a.ExecTemplate(a.Labels)
|
a.Labels, err = a.ExecTemplate(qFn, a.Labels)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
a.Annotations, err = a.ExecTemplate(ar.Annotations)
|
a.Annotations, err = a.ExecTemplate(qFn, ar.Annotations)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -393,6 +394,8 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
||||||
return fmt.Errorf("querier is nil")
|
return fmt.Errorf("querier is nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query) }
|
||||||
|
|
||||||
// account for external labels in filter
|
// account for external labels in filter
|
||||||
var labelsFilter string
|
var labelsFilter string
|
||||||
for k, v := range labels {
|
for k, v := range labels {
|
||||||
|
@ -421,7 +424,7 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
||||||
m.Labels = append(m.Labels, l)
|
m.Labels = append(m.Labels, l)
|
||||||
}
|
}
|
||||||
|
|
||||||
a, err := ar.newAlert(m, time.Unix(int64(m.Value), 0))
|
a, err := ar.newAlert(m, time.Unix(int64(m.Value), 0), qFn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create alert: %w", err)
|
return fmt.Errorf("failed to create alert: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,8 +7,9 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
|
||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
func TestMain(m *testing.M) {
|
||||||
|
@ -42,7 +43,7 @@ func TestParseBad(t *testing.T) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
[]string{"testdata/dir/rules2-bad.rules"},
|
[]string{"testdata/dir/rules2-bad.rules"},
|
||||||
"function \"value\" not defined",
|
"function \"unknown\" not defined",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
[]string{"testdata/dir/rules3-bad.rules"},
|
[]string{"testdata/dir/rules3-bad.rules"},
|
||||||
|
@ -137,12 +138,14 @@ func TestGroup_Validate(t *testing.T) {
|
||||||
Alert: "alert",
|
Alert: "alert",
|
||||||
Expr: "up == 1",
|
Expr: "up == 1",
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"summary": "{{ value|query }}",
|
"summary": `
|
||||||
|
{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" "localhost" | query }}
|
||||||
|
{{ . | first | value | humanize1024 }}B
|
||||||
|
{{ end }}`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
expErr: "error parsing annotation",
|
|
||||||
validateAnnotations: true,
|
validateAnnotations: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -6,6 +6,6 @@ groups:
|
||||||
expr: vm_rows > 0
|
expr: vm_rows > 0
|
||||||
labels:
|
labels:
|
||||||
label: bar
|
label: bar
|
||||||
summary: "{{ value|query }}"
|
summary: "{{ unknown|query }}"
|
||||||
annotations:
|
annotations:
|
||||||
description: "{{$labels}}"
|
description: "{{$labels}}"
|
||||||
|
|
|
@ -7,11 +7,18 @@ groups:
|
||||||
expr: sum(vm_tcplistener_conns) by(instance) > 1
|
expr: sum(vm_tcplistener_conns) by(instance) > 1
|
||||||
for: 3m
|
for: 3m
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Too high connection number for {{$labels.instance}}"
|
summary: Too high connection number for {{$labels.instance}}
|
||||||
|
{{ with printf "sum(vm_tcplistener_conns{instance=%q})" .Labels.instance | query }}
|
||||||
|
{{ . | first | value }}
|
||||||
|
{{ end }}
|
||||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||||
- alert: ExampleAlertAlwaysFiring
|
- alert: ExampleAlertAlwaysFiring
|
||||||
expr: sum by(job)
|
expr: sum by(job)
|
||||||
(up == 1)
|
(up == 1)
|
||||||
|
annotations:
|
||||||
|
summary: Instances up {{ range query "up" }}
|
||||||
|
{{ . | label "instance" }}
|
||||||
|
{{ end }}
|
||||||
- record: handler:requests:rate5m
|
- record: handler:requests:rate5m
|
||||||
expr: sum(rate(prometheus_http_requests_total[5m])) by (handler)
|
expr: sum(rate(prometheus_http_requests_total[5m])) by (handler)
|
||||||
labels:
|
labels:
|
||||||
|
|
|
@ -34,6 +34,17 @@ func (m *Metric) AddLabel(key, value string) {
|
||||||
m.Labels = append(m.Labels, Label{Name: key, Value: value})
|
m.Labels = append(m.Labels, Label{Name: key, Value: value})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Label returns the given label value.
|
||||||
|
// If label is missing empty string will be returned
|
||||||
|
func (m *Metric) Label(key string) string {
|
||||||
|
for _, l := range m.Labels {
|
||||||
|
if l.Name == key {
|
||||||
|
return l.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// Label represents metric's label
|
// Label represents metric's label
|
||||||
type Label struct {
|
type Label struct {
|
||||||
Name string
|
Name string
|
||||||
|
|
|
@ -167,7 +167,7 @@ func TestGroupStart(t *testing.T) {
|
||||||
m2 := metricWithLabels(t, "instance", inst2, "job", job)
|
m2 := metricWithLabels(t, "instance", inst2, "job", job)
|
||||||
|
|
||||||
r := g.Rules[0].(*AlertingRule)
|
r := g.Rules[0].(*AlertingRule)
|
||||||
alert1, err := r.newAlert(m1, time.Now())
|
alert1, err := r.newAlert(m1, time.Now(), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("faield to create alert: %s", err)
|
t.Fatalf("faield to create alert: %s", err)
|
||||||
}
|
}
|
||||||
|
@ -179,7 +179,7 @@ func TestGroupStart(t *testing.T) {
|
||||||
alert1.Labels["host"] = inst1
|
alert1.Labels["host"] = inst1
|
||||||
alert1.ID = hash(m1)
|
alert1.ID = hash(m1)
|
||||||
|
|
||||||
alert2, err := r.newAlert(m2, time.Now())
|
alert2, err := r.newAlert(m2, time.Now(), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("faield to create alert: %s", err)
|
t.Fatalf("faield to create alert: %s", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -206,7 +206,7 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
|
||||||
"tpl": externalAlertSource,
|
"tpl": externalAlertSource,
|
||||||
}
|
}
|
||||||
return func(alert notifier.Alert) string {
|
return func(alert notifier.Alert) string {
|
||||||
templated, err := alert.ExecTemplate(m)
|
templated, err := alert.ExecTemplate(nil, m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("can not exec source template %s", err)
|
logger.Errorf("can not exec source template %s", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,21 +62,23 @@ const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}{{ $expr := .Ex
|
||||||
|
|
||||||
// ExecTemplate executes the Alert template for give
|
// ExecTemplate executes the Alert template for give
|
||||||
// map of annotations.
|
// map of annotations.
|
||||||
func (a *Alert) ExecTemplate(annotations map[string]string) (map[string]string, error) {
|
// Every alert could have a different datasource, so function
|
||||||
|
// requires a queryFunction as an argument.
|
||||||
|
func (a *Alert) ExecTemplate(q QueryFn, annotations map[string]string) (map[string]string, error) {
|
||||||
tplData := alertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr}
|
tplData := alertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr}
|
||||||
return templateAnnotations(annotations, tplHeader, tplData)
|
return templateAnnotations(annotations, tplData, funcsWithQuery(q))
|
||||||
}
|
}
|
||||||
|
|
||||||
// ValidateTemplates validate annotations for possible template error, uses empty data for template population
|
// ValidateTemplates validate annotations for possible template error, uses empty data for template population
|
||||||
func ValidateTemplates(annotations map[string]string) error {
|
func ValidateTemplates(annotations map[string]string) error {
|
||||||
_, err := templateAnnotations(annotations, tplHeader, alertTplData{
|
_, err := templateAnnotations(annotations, alertTplData{
|
||||||
Labels: map[string]string{},
|
Labels: map[string]string{},
|
||||||
Value: 0,
|
Value: 0,
|
||||||
})
|
}, tmplFunc)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func templateAnnotations(annotations map[string]string, header string, data alertTplData) (map[string]string, error) {
|
func templateAnnotations(annotations map[string]string, data alertTplData, funcs template.FuncMap) (map[string]string, error) {
|
||||||
var builder strings.Builder
|
var builder strings.Builder
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
eg := new(utils.ErrGroup)
|
eg := new(utils.ErrGroup)
|
||||||
|
@ -85,10 +87,10 @@ func templateAnnotations(annotations map[string]string, header string, data aler
|
||||||
r[key] = text
|
r[key] = text
|
||||||
buf.Reset()
|
buf.Reset()
|
||||||
builder.Reset()
|
builder.Reset()
|
||||||
builder.Grow(len(header) + len(text))
|
builder.Grow(len(tplHeader) + len(text))
|
||||||
builder.WriteString(header)
|
builder.WriteString(tplHeader)
|
||||||
builder.WriteString(text)
|
builder.WriteString(text)
|
||||||
if err := templateAnnotation(&buf, builder.String(), data); err != nil {
|
if err := templateAnnotation(&buf, builder.String(), data, funcs); err != nil {
|
||||||
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -97,8 +99,9 @@ func templateAnnotations(annotations map[string]string, header string, data aler
|
||||||
return r, eg.Err()
|
return r, eg.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
func templateAnnotation(dst io.Writer, text string, data alertTplData) error {
|
func templateAnnotation(dst io.Writer, text string, data alertTplData, funcs template.FuncMap) error {
|
||||||
tpl, err := template.New("").Funcs(tmplFunc).Option("missingkey=zero").Parse(text)
|
t := template.New("").Funcs(funcs).Option("missingkey=zero")
|
||||||
|
tpl, err := t.Parse(text)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error parsing annotation: %w", err)
|
return fmt.Errorf("error parsing annotation: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,8 @@ package notifier
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestAlert_ExecTemplate(t *testing.T) {
|
func TestAlert_ExecTemplate(t *testing.T) {
|
||||||
|
@ -60,11 +62,41 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
||||||
"exprEscapedPath": "vm_rows%7B%5C%22label%5C%22=%5C%22bar%5C%22%7D%3E0",
|
"exprEscapedPath": "vm_rows%7B%5C%22label%5C%22=%5C%22bar%5C%22%7D%3E0",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "query",
|
||||||
|
alert: &Alert{Expr: `vm_rows{"label"="bar"}>0`},
|
||||||
|
annotations: map[string]string{
|
||||||
|
"summary": `{{ query "foo" | first | value }}`,
|
||||||
|
"desc": `{{ range query "bar" }}{{ . | label "foo" }} {{ . | value }};{{ end }}`,
|
||||||
|
},
|
||||||
|
expTpl: map[string]string{
|
||||||
|
"summary": "1",
|
||||||
|
"desc": "bar 1;garply 2;",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qFn := func(q string) ([]datasource.Metric, error) {
|
||||||
|
return []datasource.Metric{
|
||||||
|
{
|
||||||
|
Labels: []datasource.Label{
|
||||||
|
{Name: "foo", Value: "bar"},
|
||||||
|
{Name: "baz", Value: "qux"},
|
||||||
|
},
|
||||||
|
Value: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Labels: []datasource.Label{
|
||||||
|
{Name: "foo", Value: "garply"},
|
||||||
|
{Name: "baz", Value: "fred"},
|
||||||
|
},
|
||||||
|
Value: 2,
|
||||||
|
},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
tpl, err := tc.alert.ExecTemplate(tc.annotations)
|
tpl, err := tc.alert.ExecTemplate(qFn, tc.annotations)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,21 +14,40 @@
|
||||||
package notifier
|
package notifier
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
html_template "html/template"
|
|
||||||
"math"
|
"math"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
text_template "text/template"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
htmlTpl "html/template"
|
||||||
|
textTpl "text/template"
|
||||||
|
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
)
|
)
|
||||||
|
|
||||||
var tmplFunc text_template.FuncMap
|
// QueryFn is used to wrap a call to datasource into simple-to-use function
|
||||||
|
// for templating functions.
|
||||||
|
type QueryFn func(query string) ([]datasource.Metric, error)
|
||||||
|
|
||||||
// InitTemplateFunc returns template helper functions
|
func funcsWithQuery(query QueryFn) textTpl.FuncMap {
|
||||||
|
fm := make(textTpl.FuncMap)
|
||||||
|
for k, fn := range tmplFunc {
|
||||||
|
fm[k] = fn
|
||||||
|
}
|
||||||
|
fm["query"] = func(q string) ([]datasource.Metric, error) {
|
||||||
|
return query(q)
|
||||||
|
}
|
||||||
|
return fm
|
||||||
|
}
|
||||||
|
|
||||||
|
var tmplFunc textTpl.FuncMap
|
||||||
|
|
||||||
|
// InitTemplateFunc initiates template helper functions
|
||||||
func InitTemplateFunc(externalURL *url.URL) {
|
func InitTemplateFunc(externalURL *url.URL) {
|
||||||
tmplFunc = text_template.FuncMap{
|
tmplFunc = textTpl.FuncMap{
|
||||||
"args": func(args ...interface{}) map[string]interface{} {
|
"args": func(args ...interface{}) map[string]interface{} {
|
||||||
result := make(map[string]interface{})
|
result := make(map[string]interface{})
|
||||||
for i, a := range args {
|
for i, a := range args {
|
||||||
|
@ -40,8 +59,8 @@ func InitTemplateFunc(externalURL *url.URL) {
|
||||||
re := regexp.MustCompile(pattern)
|
re := regexp.MustCompile(pattern)
|
||||||
return re.ReplaceAllString(text, repl)
|
return re.ReplaceAllString(text, repl)
|
||||||
},
|
},
|
||||||
"safeHtml": func(text string) html_template.HTML {
|
"safeHtml": func(text string) htmlTpl.HTML {
|
||||||
return html_template.HTML(text)
|
return htmlTpl.HTML(text)
|
||||||
},
|
},
|
||||||
"match": regexp.MatchString,
|
"match": regexp.MatchString,
|
||||||
"title": strings.Title,
|
"title": strings.Title,
|
||||||
|
@ -151,6 +170,24 @@ func InitTemplateFunc(externalURL *url.URL) {
|
||||||
"quotesEscape": func(q string) string {
|
"quotesEscape": func(q string) string {
|
||||||
return strings.Replace(q, `"`, `\"`, -1)
|
return strings.Replace(q, `"`, `\"`, -1)
|
||||||
},
|
},
|
||||||
|
// query function supposed to be substituted at funcsWithQuery().
|
||||||
|
// it is present here only for validation purposes, when there is no
|
||||||
|
// provided datasource.
|
||||||
|
"query": func(q string) ([]datasource.Metric, error) {
|
||||||
|
return nil, nil
|
||||||
|
},
|
||||||
|
"first": func(metrics []datasource.Metric) (datasource.Metric, error) {
|
||||||
|
if len(metrics) > 0 {
|
||||||
|
return metrics[0], nil
|
||||||
|
}
|
||||||
|
return datasource.Metric{}, errors.New("first() called on vector with no elements")
|
||||||
|
},
|
||||||
|
"label": func(label string, m datasource.Metric) string {
|
||||||
|
return m.Label(label)
|
||||||
|
},
|
||||||
|
"value": func(m datasource.Metric) float64 {
|
||||||
|
return m.Value
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue