mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
vmalert: add function "query", "first" and "value" to alert templates functions (#960)
The commit adds a support for template function `query`, `first` and `value`. The function `query` executes a MetricsQL query for active alerts. In vmalert we update templates on every evaluation for active alerts to keep them up to date. With `query` func it may become a perf issue since it will fire a query on every execution. We should keep it in mind for now. https://github.com/VictoriaMetrics/VictoriaMetrics/issues/539
This commit is contained in:
parent
0b2726c3be
commit
6247884057
11 changed files with 131 additions and 36 deletions
|
@ -21,7 +21,6 @@ may fail;
|
|||
* by default, rules execution is sequential within one group, but persisting of execution results to remote
|
||||
storage is asynchronous. Hence, user shouldn't rely on recording rules chaining when result of previous
|
||||
recording rule is reused in next one;
|
||||
* there is no `query` function support in templates yet;
|
||||
* `vmalert` has no UI, just an API for getting groups and rules statuses.
|
||||
|
||||
### QuickStart
|
||||
|
|
|
@ -137,6 +137,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
|||
}
|
||||
}
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query) }
|
||||
updated := make(map[uint64]struct{})
|
||||
// update list of active alerts
|
||||
for _, m := range qMetrics {
|
||||
|
@ -158,14 +159,14 @@ func (ar *AlertingRule) Exec(ctx context.Context, q datasource.Querier, series b
|
|||
a.Value = m.Value
|
||||
// and re-exec template since Value can be used
|
||||
// in templates
|
||||
err = ar.template(a)
|
||||
err = ar.template(a, qFn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
a, err := ar.newAlert(m, ar.lastExecTime)
|
||||
a, err := ar.newAlert(m, ar.lastExecTime, qFn)
|
||||
if err != nil {
|
||||
ar.lastExecError = err
|
||||
return nil, fmt.Errorf("failed to create alert: %w", err)
|
||||
|
@ -245,7 +246,7 @@ func hash(m datasource.Metric) uint64 {
|
|||
return hash.Sum64()
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time) (*notifier.Alert, error) {
|
||||
func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time, qFn notifier.QueryFn) (*notifier.Alert, error) {
|
||||
a := ¬ifier.Alert{
|
||||
GroupID: ar.GroupID,
|
||||
Name: ar.Name,
|
||||
|
@ -264,16 +265,16 @@ func (ar *AlertingRule) newAlert(m datasource.Metric, start time.Time) (*notifie
|
|||
}
|
||||
a.Labels[l.Name] = l.Value
|
||||
}
|
||||
return a, ar.template(a)
|
||||
return a, ar.template(a, qFn)
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) template(a *notifier.Alert) error {
|
||||
func (ar *AlertingRule) template(a *notifier.Alert, qFn notifier.QueryFn) error {
|
||||
var err error
|
||||
a.Labels, err = a.ExecTemplate(a.Labels)
|
||||
a.Labels, err = a.ExecTemplate(qFn, a.Labels)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
a.Annotations, err = a.ExecTemplate(ar.Annotations)
|
||||
a.Annotations, err = a.ExecTemplate(qFn, ar.Annotations)
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -393,6 +394,8 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
|||
return fmt.Errorf("querier is nil")
|
||||
}
|
||||
|
||||
qFn := func(query string) ([]datasource.Metric, error) { return q.Query(ctx, query) }
|
||||
|
||||
// account for external labels in filter
|
||||
var labelsFilter string
|
||||
for k, v := range labels {
|
||||
|
@ -421,7 +424,7 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
|||
m.Labels = append(m.Labels, l)
|
||||
}
|
||||
|
||||
a, err := ar.newAlert(m, time.Unix(int64(m.Value), 0))
|
||||
a, err := ar.newAlert(m, time.Unix(int64(m.Value), 0), qFn)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create alert: %w", err)
|
||||
}
|
||||
|
|
|
@ -7,8 +7,9 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
|
@ -42,7 +43,7 @@ func TestParseBad(t *testing.T) {
|
|||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules2-bad.rules"},
|
||||
"function \"value\" not defined",
|
||||
"function \"unknown\" not defined",
|
||||
},
|
||||
{
|
||||
[]string{"testdata/dir/rules3-bad.rules"},
|
||||
|
@ -137,12 +138,14 @@ func TestGroup_Validate(t *testing.T) {
|
|||
Alert: "alert",
|
||||
Expr: "up == 1",
|
||||
Labels: map[string]string{
|
||||
"summary": "{{ value|query }}",
|
||||
"summary": `
|
||||
{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" "localhost" | query }}
|
||||
{{ . | first | value | humanize1024 }}B
|
||||
{{ end }}`,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expErr: "error parsing annotation",
|
||||
validateAnnotations: true,
|
||||
},
|
||||
{
|
||||
|
|
|
@ -6,6 +6,6 @@ groups:
|
|||
expr: vm_rows > 0
|
||||
labels:
|
||||
label: bar
|
||||
summary: "{{ value|query }}"
|
||||
summary: "{{ unknown|query }}"
|
||||
annotations:
|
||||
description: "{{$labels}}"
|
||||
|
|
|
@ -7,11 +7,18 @@ groups:
|
|||
expr: sum(vm_tcplistener_conns) by(instance) > 1
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: "Too high connection number for {{$labels.instance}}"
|
||||
summary: Too high connection number for {{$labels.instance}}
|
||||
{{ with printf "sum(vm_tcplistener_conns{instance=%q})" .Labels.instance | query }}
|
||||
{{ . | first | value }}
|
||||
{{ end }}
|
||||
description: "It is {{ $value }} connections for {{$labels.instance}}"
|
||||
- alert: ExampleAlertAlwaysFiring
|
||||
expr: sum by(job)
|
||||
(up == 1)
|
||||
annotations:
|
||||
summary: Instances up {{ range query "up" }}
|
||||
{{ . | label "instance" }}
|
||||
{{ end }}
|
||||
- record: handler:requests:rate5m
|
||||
expr: sum(rate(prometheus_http_requests_total[5m])) by (handler)
|
||||
labels:
|
||||
|
|
|
@ -34,6 +34,17 @@ func (m *Metric) AddLabel(key, value string) {
|
|||
m.Labels = append(m.Labels, Label{Name: key, Value: value})
|
||||
}
|
||||
|
||||
// Label returns the given label value.
|
||||
// If label is missing empty string will be returned
|
||||
func (m *Metric) Label(key string) string {
|
||||
for _, l := range m.Labels {
|
||||
if l.Name == key {
|
||||
return l.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Label represents metric's label
|
||||
type Label struct {
|
||||
Name string
|
||||
|
|
|
@ -167,7 +167,7 @@ func TestGroupStart(t *testing.T) {
|
|||
m2 := metricWithLabels(t, "instance", inst2, "job", job)
|
||||
|
||||
r := g.Rules[0].(*AlertingRule)
|
||||
alert1, err := r.newAlert(m1, time.Now())
|
||||
alert1, err := r.newAlert(m1, time.Now(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("faield to create alert: %s", err)
|
||||
}
|
||||
|
@ -179,7 +179,7 @@ func TestGroupStart(t *testing.T) {
|
|||
alert1.Labels["host"] = inst1
|
||||
alert1.ID = hash(m1)
|
||||
|
||||
alert2, err := r.newAlert(m2, time.Now())
|
||||
alert2, err := r.newAlert(m2, time.Now(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("faield to create alert: %s", err)
|
||||
}
|
||||
|
|
|
@ -206,7 +206,7 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
|
|||
"tpl": externalAlertSource,
|
||||
}
|
||||
return func(alert notifier.Alert) string {
|
||||
templated, err := alert.ExecTemplate(m)
|
||||
templated, err := alert.ExecTemplate(nil, m)
|
||||
if err != nil {
|
||||
logger.Errorf("can not exec source template %s", err)
|
||||
}
|
||||
|
|
|
@ -62,21 +62,23 @@ const tplHeader = `{{ $value := .Value }}{{ $labels := .Labels }}{{ $expr := .Ex
|
|||
|
||||
// ExecTemplate executes the Alert template for give
|
||||
// map of annotations.
|
||||
func (a *Alert) ExecTemplate(annotations map[string]string) (map[string]string, error) {
|
||||
// Every alert could have a different datasource, so function
|
||||
// requires a queryFunction as an argument.
|
||||
func (a *Alert) ExecTemplate(q QueryFn, annotations map[string]string) (map[string]string, error) {
|
||||
tplData := alertTplData{Value: a.Value, Labels: a.Labels, Expr: a.Expr}
|
||||
return templateAnnotations(annotations, tplHeader, tplData)
|
||||
return templateAnnotations(annotations, tplData, funcsWithQuery(q))
|
||||
}
|
||||
|
||||
// ValidateTemplates validate annotations for possible template error, uses empty data for template population
|
||||
func ValidateTemplates(annotations map[string]string) error {
|
||||
_, err := templateAnnotations(annotations, tplHeader, alertTplData{
|
||||
_, err := templateAnnotations(annotations, alertTplData{
|
||||
Labels: map[string]string{},
|
||||
Value: 0,
|
||||
})
|
||||
}, tmplFunc)
|
||||
return err
|
||||
}
|
||||
|
||||
func templateAnnotations(annotations map[string]string, header string, data alertTplData) (map[string]string, error) {
|
||||
func templateAnnotations(annotations map[string]string, data alertTplData, funcs template.FuncMap) (map[string]string, error) {
|
||||
var builder strings.Builder
|
||||
var buf bytes.Buffer
|
||||
eg := new(utils.ErrGroup)
|
||||
|
@ -85,10 +87,10 @@ func templateAnnotations(annotations map[string]string, header string, data aler
|
|||
r[key] = text
|
||||
buf.Reset()
|
||||
builder.Reset()
|
||||
builder.Grow(len(header) + len(text))
|
||||
builder.WriteString(header)
|
||||
builder.Grow(len(tplHeader) + len(text))
|
||||
builder.WriteString(tplHeader)
|
||||
builder.WriteString(text)
|
||||
if err := templateAnnotation(&buf, builder.String(), data); err != nil {
|
||||
if err := templateAnnotation(&buf, builder.String(), data, funcs); err != nil {
|
||||
eg.Add(fmt.Errorf("key %q, template %q: %w", key, text, err))
|
||||
continue
|
||||
}
|
||||
|
@ -97,8 +99,9 @@ func templateAnnotations(annotations map[string]string, header string, data aler
|
|||
return r, eg.Err()
|
||||
}
|
||||
|
||||
func templateAnnotation(dst io.Writer, text string, data alertTplData) error {
|
||||
tpl, err := template.New("").Funcs(tmplFunc).Option("missingkey=zero").Parse(text)
|
||||
func templateAnnotation(dst io.Writer, text string, data alertTplData, funcs template.FuncMap) error {
|
||||
t := template.New("").Funcs(funcs).Option("missingkey=zero")
|
||||
tpl, err := t.Parse(text)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing annotation: %w", err)
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@ package notifier
|
|||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
)
|
||||
|
||||
func TestAlert_ExecTemplate(t *testing.T) {
|
||||
|
@ -60,11 +62,41 @@ func TestAlert_ExecTemplate(t *testing.T) {
|
|||
"exprEscapedPath": "vm_rows%7B%5C%22label%5C%22=%5C%22bar%5C%22%7D%3E0",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "query",
|
||||
alert: &Alert{Expr: `vm_rows{"label"="bar"}>0`},
|
||||
annotations: map[string]string{
|
||||
"summary": `{{ query "foo" | first | value }}`,
|
||||
"desc": `{{ range query "bar" }}{{ . | label "foo" }} {{ . | value }};{{ end }}`,
|
||||
},
|
||||
expTpl: map[string]string{
|
||||
"summary": "1",
|
||||
"desc": "bar 1;garply 2;",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
qFn := func(q string) ([]datasource.Metric, error) {
|
||||
return []datasource.Metric{
|
||||
{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "foo", Value: "bar"},
|
||||
{Name: "baz", Value: "qux"},
|
||||
},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Labels: []datasource.Label{
|
||||
{Name: "foo", Value: "garply"},
|
||||
{Name: "baz", Value: "fred"},
|
||||
},
|
||||
Value: 2,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
tpl, err := tc.alert.ExecTemplate(tc.annotations)
|
||||
tpl, err := tc.alert.ExecTemplate(qFn, tc.annotations)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
|
|
@ -14,21 +14,40 @@
|
|||
package notifier
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
html_template "html/template"
|
||||
"math"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
text_template "text/template"
|
||||
"time"
|
||||
|
||||
htmlTpl "html/template"
|
||||
textTpl "text/template"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
)
|
||||
|
||||
var tmplFunc text_template.FuncMap
|
||||
// QueryFn is used to wrap a call to datasource into simple-to-use function
|
||||
// for templating functions.
|
||||
type QueryFn func(query string) ([]datasource.Metric, error)
|
||||
|
||||
// InitTemplateFunc returns template helper functions
|
||||
func funcsWithQuery(query QueryFn) textTpl.FuncMap {
|
||||
fm := make(textTpl.FuncMap)
|
||||
for k, fn := range tmplFunc {
|
||||
fm[k] = fn
|
||||
}
|
||||
fm["query"] = func(q string) ([]datasource.Metric, error) {
|
||||
return query(q)
|
||||
}
|
||||
return fm
|
||||
}
|
||||
|
||||
var tmplFunc textTpl.FuncMap
|
||||
|
||||
// InitTemplateFunc initiates template helper functions
|
||||
func InitTemplateFunc(externalURL *url.URL) {
|
||||
tmplFunc = text_template.FuncMap{
|
||||
tmplFunc = textTpl.FuncMap{
|
||||
"args": func(args ...interface{}) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
for i, a := range args {
|
||||
|
@ -40,8 +59,8 @@ func InitTemplateFunc(externalURL *url.URL) {
|
|||
re := regexp.MustCompile(pattern)
|
||||
return re.ReplaceAllString(text, repl)
|
||||
},
|
||||
"safeHtml": func(text string) html_template.HTML {
|
||||
return html_template.HTML(text)
|
||||
"safeHtml": func(text string) htmlTpl.HTML {
|
||||
return htmlTpl.HTML(text)
|
||||
},
|
||||
"match": regexp.MatchString,
|
||||
"title": strings.Title,
|
||||
|
@ -151,6 +170,24 @@ func InitTemplateFunc(externalURL *url.URL) {
|
|||
"quotesEscape": func(q string) string {
|
||||
return strings.Replace(q, `"`, `\"`, -1)
|
||||
},
|
||||
// query function supposed to be substituted at funcsWithQuery().
|
||||
// it is present here only for validation purposes, when there is no
|
||||
// provided datasource.
|
||||
"query": func(q string) ([]datasource.Metric, error) {
|
||||
return nil, nil
|
||||
},
|
||||
"first": func(metrics []datasource.Metric) (datasource.Metric, error) {
|
||||
if len(metrics) > 0 {
|
||||
return metrics[0], nil
|
||||
}
|
||||
return datasource.Metric{}, errors.New("first() called on vector with no elements")
|
||||
},
|
||||
"label": func(label string, m datasource.Metric) string {
|
||||
return m.Label(label)
|
||||
},
|
||||
"value": func(m datasource.Metric) float64 {
|
||||
return m.Value
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue