2020-05-10 16:58:17 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"math/rand"
|
2020-06-01 10:46:37 +00:00
|
|
|
"os"
|
2021-11-29 23:18:48 +00:00
|
|
|
"strings"
|
2020-05-10 16:58:17 +00:00
|
|
|
"sync"
|
|
|
|
"testing"
|
|
|
|
"time"
|
2020-06-01 10:46:37 +00:00
|
|
|
|
2021-05-25 13:27:22 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
2023-10-13 11:54:33 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
2020-06-01 10:46:37 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
2021-11-29 23:18:48 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
2023-10-13 11:54:33 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
2022-05-14 09:38:44 +00:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
|
2020-05-10 16:58:17 +00:00
|
|
|
)
|
|
|
|
|
2020-06-01 10:46:37 +00:00
|
|
|
func TestMain(m *testing.M) {
|
2022-05-14 09:38:44 +00:00
|
|
|
if err := templates.Load([]string{"testdata/templates/*good.tmpl"}, true); err != nil {
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
2020-06-01 10:46:37 +00:00
|
|
|
os.Exit(m.Run())
|
|
|
|
}
|
|
|
|
|
2020-09-03 08:04:42 +00:00
|
|
|
// TestManagerEmptyRulesDir tests
|
|
|
|
// successful cases of
|
|
|
|
// starting with empty rules folder
|
|
|
|
func TestManagerEmptyRulesDir(t *testing.T) {
|
2023-10-13 11:54:33 +00:00
|
|
|
m := &manager{groups: make(map[uint64]*rule.Group)}
|
2021-05-25 13:27:22 +00:00
|
|
|
cfg := loadCfg(t, []string{"foo/bar"}, true, true)
|
|
|
|
if err := m.update(context.Background(), cfg, false); err != nil {
|
2022-07-18 09:02:51 +00:00
|
|
|
t.Fatalf("expected to load successfully with empty rules dir; got err instead: %v", err)
|
2020-05-10 16:58:17 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// TestManagerUpdateConcurrent supposed to test concurrent
|
|
|
|
// execution of configuration update.
|
|
|
|
// Should be executed with -race flag
|
|
|
|
func TestManagerUpdateConcurrent(t *testing.T) {
|
2020-05-17 14:12:09 +00:00
|
|
|
m := &manager{
|
2023-10-13 11:54:33 +00:00
|
|
|
groups: make(map[uint64]*rule.Group),
|
|
|
|
querierBuilder: &datasource.FakeQuerier{},
|
|
|
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
2020-05-17 14:12:09 +00:00
|
|
|
}
|
2020-05-10 16:58:17 +00:00
|
|
|
paths := []string{
|
2020-06-01 10:46:37 +00:00
|
|
|
"config/testdata/dir/rules0-good.rules",
|
|
|
|
"config/testdata/dir/rules0-bad.rules",
|
|
|
|
"config/testdata/dir/rules1-good.rules",
|
|
|
|
"config/testdata/dir/rules1-bad.rules",
|
2022-05-14 09:38:44 +00:00
|
|
|
"config/testdata/rules/rules0-good.rules",
|
|
|
|
"config/testdata/rules/rules1-good.rules",
|
|
|
|
"config/testdata/rules/rules2-good.rules",
|
2020-05-10 16:58:17 +00:00
|
|
|
}
|
2021-06-09 09:20:38 +00:00
|
|
|
evalInterval := *evaluationInterval
|
|
|
|
defer func() { *evaluationInterval = evalInterval }()
|
2020-05-17 14:12:09 +00:00
|
|
|
*evaluationInterval = time.Millisecond
|
2021-05-25 13:27:22 +00:00
|
|
|
cfg := loadCfg(t, []string{paths[0]}, true, true)
|
|
|
|
if err := m.start(context.Background(), cfg); err != nil {
|
2020-05-17 14:12:09 +00:00
|
|
|
t.Fatalf("failed to start: %s", err)
|
|
|
|
}
|
2020-05-10 16:58:17 +00:00
|
|
|
|
2020-05-17 14:12:09 +00:00
|
|
|
const workers = 500
|
|
|
|
const iterations = 10
|
2020-05-10 16:58:17 +00:00
|
|
|
wg := sync.WaitGroup{}
|
2020-05-17 14:12:09 +00:00
|
|
|
wg.Add(workers)
|
|
|
|
for i := 0; i < workers; i++ {
|
2023-01-24 03:25:08 +00:00
|
|
|
go func(n int) {
|
2020-05-10 16:58:17 +00:00
|
|
|
defer wg.Done()
|
2023-01-24 03:25:08 +00:00
|
|
|
r := rand.New(rand.NewSource(int64(n)))
|
2020-05-17 14:12:09 +00:00
|
|
|
for i := 0; i < iterations; i++ {
|
2023-01-24 03:25:08 +00:00
|
|
|
rnd := r.Intn(len(paths))
|
2022-07-22 11:50:41 +00:00
|
|
|
cfg, err := config.Parse([]string{paths[rnd]}, notifier.ValidateTemplates, true)
|
2021-05-25 13:27:22 +00:00
|
|
|
if err != nil { // update can fail and this is expected
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
_ = m.update(context.Background(), cfg, false)
|
2020-05-10 16:58:17 +00:00
|
|
|
}
|
2023-01-24 03:25:08 +00:00
|
|
|
}(i)
|
2020-05-10 16:58:17 +00:00
|
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
}
|
|
|
|
|
2024-07-12 19:57:56 +00:00
|
|
|
// TestManagerUpdate tests sequential configuration updates.
|
|
|
|
func TestManagerUpdate_Success(t *testing.T) {
|
2020-06-01 10:46:37 +00:00
|
|
|
const defaultEvalInterval = time.Second * 30
|
|
|
|
currentEvalInterval := *evaluationInterval
|
|
|
|
*evaluationInterval = defaultEvalInterval
|
|
|
|
defer func() {
|
|
|
|
*evaluationInterval = currentEvalInterval
|
|
|
|
}()
|
|
|
|
|
|
|
|
var (
|
2023-10-13 11:54:33 +00:00
|
|
|
VMRows = &rule.AlertingRule{
|
2020-06-01 10:46:37 +00:00
|
|
|
Name: "VMRows",
|
|
|
|
Expr: "vm_rows > 0",
|
|
|
|
For: 10 * time.Second,
|
|
|
|
Labels: map[string]string{
|
|
|
|
"label": "bar",
|
|
|
|
"host": "{{ $labels.instance }}",
|
|
|
|
},
|
|
|
|
Annotations: map[string]string{
|
|
|
|
"summary": "{{ $value|humanize }}",
|
|
|
|
"description": "{{$labels}}",
|
|
|
|
},
|
|
|
|
}
|
2023-10-13 11:54:33 +00:00
|
|
|
Conns = &rule.AlertingRule{
|
2020-06-01 10:46:37 +00:00
|
|
|
Name: "Conns",
|
|
|
|
Expr: "sum(vm_tcplistener_conns) by(instance) > 1",
|
|
|
|
Annotations: map[string]string{
|
|
|
|
"summary": "Too high connection number for {{$labels.instance}}",
|
|
|
|
"description": "It is {{ $value }} connections for {{$labels.instance}}",
|
|
|
|
},
|
|
|
|
}
|
2023-10-13 11:54:33 +00:00
|
|
|
ExampleAlertAlwaysFiring = &rule.AlertingRule{
|
2020-06-01 10:46:37 +00:00
|
|
|
Name: "ExampleAlertAlwaysFiring",
|
|
|
|
Expr: "sum by(job) (up == 1)",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2024-07-12 19:57:56 +00:00
|
|
|
f := func(initPath, updatePath string, groupsExpected []*rule.Group) {
|
|
|
|
t.Helper()
|
|
|
|
|
|
|
|
ctx, cancel := context.WithCancel(context.TODO())
|
|
|
|
m := &manager{
|
|
|
|
groups: make(map[uint64]*rule.Group),
|
|
|
|
querierBuilder: &datasource.FakeQuerier{},
|
|
|
|
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
|
|
|
}
|
|
|
|
|
|
|
|
cfgInit := loadCfg(t, []string{initPath}, true, true)
|
|
|
|
if err := m.update(ctx, cfgInit, false); err != nil {
|
|
|
|
t.Fatalf("failed to complete initial rules update: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
cfgUpdate, err := config.Parse([]string{updatePath}, notifier.ValidateTemplates, true)
|
|
|
|
if err == nil { // update can fail and that's expected
|
|
|
|
_ = m.update(ctx, cfgUpdate, false)
|
|
|
|
}
|
|
|
|
if len(groupsExpected) != len(m.groups) {
|
|
|
|
t.Fatalf("unexpected number of groups; got %d; want %d", len(m.groups), len(groupsExpected))
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, wantG := range groupsExpected {
|
|
|
|
gotG, ok := m.groups[wantG.ID()]
|
|
|
|
if !ok {
|
|
|
|
t.Fatalf("expected to have group %q", wantG.Name)
|
|
|
|
}
|
|
|
|
compareGroups(t, wantG, gotG)
|
|
|
|
}
|
|
|
|
|
|
|
|
cancel()
|
|
|
|
m.close()
|
|
|
|
}
|
|
|
|
|
|
|
|
// update good rules
|
|
|
|
f("config/testdata/rules/rules0-good.rules", "config/testdata/dir/rules1-good.rules", []*rule.Group{
|
2020-05-10 16:58:17 +00:00
|
|
|
{
|
2024-07-12 19:57:56 +00:00
|
|
|
File: "config/testdata/dir/rules1-good.rules",
|
|
|
|
Name: "duplicatedGroupDiffFiles",
|
|
|
|
Type: config.NewPrometheusType(),
|
|
|
|
Interval: defaultEvalInterval,
|
|
|
|
Rules: []rule.Rule{
|
|
|
|
&rule.AlertingRule{
|
|
|
|
Name: "VMRows",
|
|
|
|
Expr: "vm_rows > 0",
|
|
|
|
For: 5 * time.Minute,
|
|
|
|
Labels: map[string]string{"dc": "gcp", "label": "bar"},
|
|
|
|
Annotations: map[string]string{
|
|
|
|
"summary": "{{ $value }}",
|
|
|
|
"description": "{{$labels}}",
|
2020-06-01 10:46:37 +00:00
|
|
|
},
|
2020-05-10 16:58:17 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
2024-07-12 19:57:56 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
// update good rules from 1 to 2 groups
|
|
|
|
f("config/testdata/dir/rules/rules1-good.rules", "config/testdata/rules/rules0-good.rules", []*rule.Group{
|
2020-05-10 16:58:17 +00:00
|
|
|
{
|
2024-07-12 19:57:56 +00:00
|
|
|
File: "config/testdata/rules/rules0-good.rules",
|
|
|
|
Name: "groupGorSingleAlert",
|
|
|
|
Type: config.NewPrometheusType(),
|
|
|
|
Interval: defaultEvalInterval,
|
|
|
|
Rules: []rule.Rule{VMRows},
|
2020-05-10 16:58:17 +00:00
|
|
|
},
|
|
|
|
{
|
2024-07-12 19:57:56 +00:00
|
|
|
File: "config/testdata/rules/rules0-good.rules",
|
|
|
|
Interval: defaultEvalInterval,
|
|
|
|
Type: config.NewPrometheusType(),
|
|
|
|
Name: "TestGroup",
|
|
|
|
Rules: []rule.Rule{
|
|
|
|
Conns,
|
|
|
|
ExampleAlertAlwaysFiring,
|
2020-05-10 16:58:17 +00:00
|
|
|
},
|
|
|
|
},
|
2024-07-12 19:57:56 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
// update with one bad rule file
|
|
|
|
f("config/testdata/rules/rules0-good.rules", "config/testdata/dir/rules2-bad.rules", []*rule.Group{
|
2020-09-03 08:04:42 +00:00
|
|
|
{
|
2024-07-12 19:57:56 +00:00
|
|
|
File: "config/testdata/rules/rules0-good.rules",
|
|
|
|
Name: "groupGorSingleAlert",
|
|
|
|
Type: config.NewPrometheusType(),
|
|
|
|
Interval: defaultEvalInterval,
|
|
|
|
Rules: []rule.Rule{VMRows},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
File: "config/testdata/rules/rules0-good.rules",
|
|
|
|
Interval: defaultEvalInterval,
|
|
|
|
Name: "TestGroup",
|
|
|
|
Type: config.NewPrometheusType(),
|
|
|
|
Rules: []rule.Rule{
|
|
|
|
Conns,
|
|
|
|
ExampleAlertAlwaysFiring,
|
2020-09-03 08:04:42 +00:00
|
|
|
},
|
|
|
|
},
|
2024-07-12 19:57:56 +00:00
|
|
|
})
|
2020-05-10 16:58:17 +00:00
|
|
|
|
2024-07-12 19:57:56 +00:00
|
|
|
// update empty dir rules from 0 to 2 groups
|
|
|
|
f("config/testdata/empty/*", "config/testdata/rules/rules0-good.rules", []*rule.Group{
|
|
|
|
{
|
|
|
|
File: "config/testdata/rules/rules0-good.rules",
|
|
|
|
Name: "groupGorSingleAlert",
|
|
|
|
Type: config.NewPrometheusType(),
|
|
|
|
Interval: defaultEvalInterval,
|
|
|
|
Rules: []rule.Rule{VMRows},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
File: "config/testdata/rules/rules0-good.rules",
|
|
|
|
Interval: defaultEvalInterval,
|
|
|
|
Type: config.NewPrometheusType(),
|
|
|
|
Name: "TestGroup",
|
|
|
|
Rules: []rule.Rule{
|
|
|
|
Conns,
|
|
|
|
ExampleAlertAlwaysFiring,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
})
|
2020-05-10 16:58:17 +00:00
|
|
|
}
|
2024-07-12 19:57:56 +00:00
|
|
|
|
2023-10-13 11:54:33 +00:00
|
|
|
func compareGroups(t *testing.T, a, b *rule.Group) {
|
|
|
|
t.Helper()
|
|
|
|
if a.Name != b.Name {
|
|
|
|
t.Fatalf("expected group name %q; got %q", a.Name, b.Name)
|
|
|
|
}
|
|
|
|
if a.File != b.File {
|
|
|
|
t.Fatalf("expected group %q file name %q; got %q", a.Name, a.File, b.File)
|
|
|
|
}
|
|
|
|
if a.Interval != b.Interval {
|
|
|
|
t.Fatalf("expected group %q interval %v; got %v", a.Name, a.Interval, b.Interval)
|
|
|
|
}
|
|
|
|
if len(a.Rules) != len(b.Rules) {
|
|
|
|
t.Fatalf("expected group %s to have %d rules; got: %d",
|
|
|
|
a.Name, len(a.Rules), len(b.Rules))
|
|
|
|
}
|
|
|
|
for i, r := range a.Rules {
|
|
|
|
got, want := r, b.Rules[i]
|
|
|
|
if a.ID() != b.ID() {
|
|
|
|
t.Fatalf("expected to have rule %q; got %q", want.ID(), got.ID())
|
|
|
|
}
|
|
|
|
if err := rule.CompareRules(t, want, got); err != nil {
|
|
|
|
t.Fatalf("comparison error: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-05-25 13:27:22 +00:00
|
|
|
|
2024-07-12 19:57:56 +00:00
|
|
|
func TestManagerUpdate_Failure(t *testing.T) {
|
|
|
|
f := func(notifiers []notifier.Notifier, rw remotewrite.RWClient, cfg config.Group, errStrExpected string) {
|
|
|
|
t.Helper()
|
|
|
|
|
|
|
|
m := &manager{
|
|
|
|
groups: make(map[uint64]*rule.Group),
|
|
|
|
querierBuilder: &datasource.FakeQuerier{},
|
|
|
|
rw: rw,
|
|
|
|
}
|
|
|
|
if notifiers != nil {
|
|
|
|
m.notifiers = func() []notifier.Notifier { return notifiers }
|
|
|
|
}
|
|
|
|
err := m.update(context.Background(), []config.Group{cfg}, false)
|
|
|
|
if err == nil {
|
|
|
|
t.Fatalf("expected to get error; got nil")
|
|
|
|
}
|
|
|
|
errStr := err.Error()
|
|
|
|
if !strings.Contains(errStr, errStrExpected) {
|
|
|
|
t.Fatalf("missing %q in the error %q", errStrExpected, errStr)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
f(nil, nil, config.Group{
|
|
|
|
Name: "Recording rule only",
|
|
|
|
Rules: []config.Rule{
|
|
|
|
{Record: "record", Expr: "max(up)"},
|
2021-11-29 23:18:48 +00:00
|
|
|
},
|
2024-07-12 19:57:56 +00:00
|
|
|
}, "contains recording rules")
|
|
|
|
|
|
|
|
f(nil, nil, config.Group{
|
|
|
|
Name: "Alerting rule only",
|
|
|
|
Rules: []config.Rule{
|
|
|
|
{Alert: "alert", Expr: "up > 0"},
|
2021-11-29 23:18:48 +00:00
|
|
|
},
|
2024-07-12 19:57:56 +00:00
|
|
|
}, "contains alerting rules")
|
|
|
|
|
|
|
|
f([]notifier.Notifier{¬ifier.FakeNotifier{}}, nil, config.Group{
|
|
|
|
Name: "Recording and alerting rules",
|
|
|
|
Rules: []config.Rule{
|
|
|
|
{Alert: "alert1", Expr: "up > 0"},
|
|
|
|
{Alert: "alert2", Expr: "up > 0"},
|
|
|
|
{Record: "record", Expr: "max(up)"},
|
2021-11-29 23:18:48 +00:00
|
|
|
},
|
2024-07-12 19:57:56 +00:00
|
|
|
}, "contains recording rules")
|
2021-11-29 23:18:48 +00:00
|
|
|
|
2024-07-12 19:57:56 +00:00
|
|
|
f(nil, &remotewrite.Client{}, config.Group{
|
|
|
|
Name: "Recording and alerting rules",
|
|
|
|
Rules: []config.Rule{
|
|
|
|
{Record: "record1", Expr: "max(up)"},
|
|
|
|
{Record: "record2", Expr: "max(up)"},
|
|
|
|
{Alert: "alert", Expr: "up > 0"},
|
|
|
|
},
|
|
|
|
}, "contains alerting rules")
|
2021-11-29 23:18:48 +00:00
|
|
|
}
|
|
|
|
|
2021-05-25 13:27:22 +00:00
|
|
|
func loadCfg(t *testing.T, path []string, validateAnnotations, validateExpressions bool) []config.Group {
|
|
|
|
t.Helper()
|
2022-07-22 11:50:41 +00:00
|
|
|
var validateTplFn config.ValidateTplFn
|
|
|
|
if validateAnnotations {
|
|
|
|
validateTplFn = notifier.ValidateTemplates
|
|
|
|
}
|
|
|
|
cfg, err := config.Parse(path, validateTplFn, validateExpressions)
|
2021-05-25 13:27:22 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
return cfg
|
|
|
|
}
|