mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
8c8ff5d0cb
The change introduces new entity `manager` which replaces `watchdog`, decouples requestHandler and groups. Manager supposed to control life cycle of groups, rules and config reloads. Groups export an ID method which returns a hash from filename and group name. ID supposed to be unique identifier across all loaded groups. Some tests were added to improve coverage. Bug with wrong annotation value if $value is used in templates after metrics being restored fixed. Notifier interface was extended to accept context. New set of metrics was introduced for config reload.
545 lines
15 KiB
Go
545 lines
15 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
|
)
|
|
|
|
func TestRule_Validate(t *testing.T) {
|
|
if err := (&Rule{}).Validate(); err == nil {
|
|
t.Errorf("exptected empty name error")
|
|
}
|
|
if err := (&Rule{Name: "alert"}).Validate(); err == nil {
|
|
t.Errorf("exptected empty expr error")
|
|
}
|
|
if err := (&Rule{Name: "alert", Expr: "test{"}).Validate(); err == nil {
|
|
t.Errorf("exptected invalid expr error")
|
|
}
|
|
if err := (&Rule{Name: "alert", Expr: "test>0"}).Validate(); err != nil {
|
|
t.Errorf("exptected valid rule got %s", err)
|
|
}
|
|
}
|
|
|
|
func TestRule_AlertToTimeSeries(t *testing.T) {
|
|
timestamp := time.Now()
|
|
testCases := []struct {
|
|
rule *Rule
|
|
alert *notifier.Alert
|
|
expTS []prompbmarshal.TimeSeries
|
|
}{
|
|
{
|
|
newTestRule("instant", 0),
|
|
¬ifier.Alert{State: notifier.StateFiring},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
alertNameLabel: "instant",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
{
|
|
newTestRule("instant extra labels", 0),
|
|
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
|
"job": "foo",
|
|
"instance": "bar",
|
|
}},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
alertNameLabel: "instant extra labels",
|
|
"job": "foo",
|
|
"instance": "bar",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
{
|
|
newTestRule("instant labels override", 0),
|
|
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
|
alertStateLabel: "foo",
|
|
"__name__": "bar",
|
|
}},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
alertNameLabel: "instant labels override",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for", time.Second),
|
|
¬ifier.Alert{State: notifier.StateFiring, Start: timestamp.Add(time.Second)},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
alertNameLabel: "for",
|
|
}, timestamp),
|
|
newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
|
|
"__name__": alertForStateMetricName,
|
|
alertNameLabel: "for",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for pending", 10*time.Second),
|
|
¬ifier.Alert{State: notifier.StatePending, Start: timestamp.Add(time.Second)},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StatePending.String(),
|
|
alertNameLabel: "for pending",
|
|
}, timestamp),
|
|
newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
|
|
"__name__": alertForStateMetricName,
|
|
alertNameLabel: "for pending",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
}
|
|
for _, tc := range testCases {
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
|
tss := tc.rule.AlertToTimeSeries(tc.alert, timestamp)
|
|
if len(tc.expTS) != len(tss) {
|
|
t.Fatalf("expected number of timeseries %d; got %d", len(tc.expTS), len(tss))
|
|
}
|
|
for i := range tc.expTS {
|
|
expTS, gotTS := tc.expTS[i], tss[i]
|
|
if len(expTS.Samples) != len(gotTS.Samples) {
|
|
t.Fatalf("expected number of samples %d; got %d", len(expTS.Samples), len(gotTS.Samples))
|
|
}
|
|
for i, exp := range expTS.Samples {
|
|
got := gotTS.Samples[i]
|
|
if got.Value != exp.Value {
|
|
t.Errorf("expected value %.2f; got %.2f", exp.Value, got.Value)
|
|
}
|
|
if got.Timestamp != exp.Timestamp {
|
|
t.Errorf("expected timestamp %d; got %d", exp.Timestamp, got.Timestamp)
|
|
}
|
|
}
|
|
if len(expTS.Labels) != len(gotTS.Labels) {
|
|
t.Fatalf("expected number of labels %d; got %d", len(expTS.Labels), len(gotTS.Labels))
|
|
}
|
|
for i, exp := range expTS.Labels {
|
|
got := gotTS.Labels[i]
|
|
if got.Name != exp.Name {
|
|
t.Errorf("expected label name %q; got %q", exp.Name, got.Name)
|
|
}
|
|
if got.Value != exp.Value {
|
|
t.Errorf("expected label value %q; got %q", exp.Value, got.Value)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func newTestRule(name string, waitFor time.Duration) *Rule {
|
|
return &Rule{Name: name, alerts: make(map[uint64]*notifier.Alert), For: waitFor}
|
|
}
|
|
|
|
func TestRule_Exec(t *testing.T) {
|
|
testCases := []struct {
|
|
rule *Rule
|
|
steps [][]datasource.Metric
|
|
expAlerts map[uint64]*notifier.Alert
|
|
}{
|
|
{
|
|
newTestRule("empty", 0),
|
|
[][]datasource.Metric{},
|
|
map[uint64]*notifier.Alert{},
|
|
},
|
|
{
|
|
newTestRule("empty labels", 0),
|
|
[][]datasource.Metric{
|
|
{datasource.Metric{}},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(datasource.Metric{}): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive=>firing", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive=>firing=>inactive", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive=>firing=>inactive=>empty", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("multiple-firing", 0),
|
|
[][]datasource.Metric{
|
|
{
|
|
metricWithLabels(t, "name", "foo"),
|
|
metricWithLabels(t, "name", "foo1"),
|
|
metricWithLabels(t, "name", "foo2"),
|
|
},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
hash(metricWithLabels(t, "name", "foo1")): {State: notifier.StateFiring},
|
|
hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("multiple-steps-firing", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo1")},
|
|
{metricWithLabels(t, "name", "foo2")},
|
|
},
|
|
// 1: fire first alert
|
|
// 2: fire second alert, set first inactive
|
|
// 3: fire third alert, set second inactive, delete first one
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo1")): {State: notifier.StateInactive},
|
|
hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("duplicate", 0),
|
|
[][]datasource.Metric{
|
|
{
|
|
// metrics with the same labelset should result in one alert
|
|
metricWithLabels(t, "name", "foo", "type", "bar"),
|
|
metricWithLabels(t, "type", "bar", "name", "foo"),
|
|
},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo", "type", "bar")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending", time.Minute),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StatePending},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-fired", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending=>inactive", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
// empty step to reset pending alerts
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending=>firing=>inactive", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
// empty step to reset pending alerts
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending=>firing=>inactive=>pending", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
// empty step to reset pending alerts
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StatePending},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending=>firing=>inactive=>pending=>firing", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
// empty step to reset pending alerts
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
}
|
|
fakeGroup := Group{Name: "TestRule_Exec"}
|
|
for _, tc := range testCases {
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
|
fq := &fakeQuerier{}
|
|
tc.rule.group = fakeGroup
|
|
for _, step := range tc.steps {
|
|
fq.reset()
|
|
fq.add(step...)
|
|
if err := tc.rule.Exec(context.TODO(), fq); err != nil {
|
|
t.Fatalf("unexpected err: %s", err)
|
|
}
|
|
// artificial delay between applying steps
|
|
time.Sleep(time.Millisecond)
|
|
}
|
|
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
|
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
|
|
}
|
|
for key, exp := range tc.expAlerts {
|
|
got, ok := tc.rule.alerts[key]
|
|
if !ok {
|
|
t.Fatalf("expected to have key %d", key)
|
|
}
|
|
if got.State != exp.State {
|
|
t.Fatalf("expected state %d; got %d", exp.State, got.State)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
|
|
t.Helper()
|
|
if len(labels) == 0 || len(labels)%2 != 0 {
|
|
t.Fatalf("expected to get even number of labels")
|
|
}
|
|
m := datasource.Metric{}
|
|
for i := 0; i < len(labels); i += 2 {
|
|
m.Labels = append(m.Labels, datasource.Label{
|
|
Name: labels[i],
|
|
Value: labels[i+1],
|
|
})
|
|
}
|
|
return m
|
|
}
|
|
|
|
type fakeQuerier struct {
|
|
sync.Mutex
|
|
metrics []datasource.Metric
|
|
}
|
|
|
|
func (fq *fakeQuerier) reset() {
|
|
fq.Lock()
|
|
fq.metrics = fq.metrics[:0]
|
|
fq.Unlock()
|
|
}
|
|
|
|
func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
|
|
fq.Lock()
|
|
fq.metrics = append(fq.metrics, metrics...)
|
|
fq.Unlock()
|
|
}
|
|
|
|
func (fq *fakeQuerier) Query(_ context.Context, _ string) ([]datasource.Metric, error) {
|
|
fq.Lock()
|
|
cpy := make([]datasource.Metric, len(fq.metrics))
|
|
copy(cpy, fq.metrics)
|
|
fq.Unlock()
|
|
return cpy, nil
|
|
}
|
|
|
|
func TestRule_Restore(t *testing.T) {
|
|
testCases := []struct {
|
|
rule *Rule
|
|
metrics []datasource.Metric
|
|
expAlerts map[uint64]*notifier.Alert
|
|
}{
|
|
{
|
|
newTestRuleWithLabels("no extra labels"),
|
|
[]datasource.Metric{
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
alertNameLabel, "",
|
|
),
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(datasource.Metric{}): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(time.Hour)},
|
|
},
|
|
},
|
|
{
|
|
newTestRuleWithLabels("metric labels"),
|
|
[]datasource.Metric{
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
alertNameLabel, "",
|
|
"foo", "bar",
|
|
"namespace", "baz",
|
|
),
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t,
|
|
"foo", "bar",
|
|
"namespace", "baz",
|
|
)): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(time.Hour)},
|
|
},
|
|
},
|
|
{
|
|
newTestRuleWithLabels("rule labels", "source", "vm"),
|
|
[]datasource.Metric{
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
alertNameLabel, "",
|
|
"foo", "bar",
|
|
"namespace", "baz",
|
|
// following pair supposed to be dropped
|
|
"source", "vm",
|
|
),
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t,
|
|
"foo", "bar",
|
|
"namespace", "baz",
|
|
)): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(time.Hour)},
|
|
},
|
|
},
|
|
{
|
|
newTestRuleWithLabels("multiple alerts"),
|
|
[]datasource.Metric{
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
"host", "localhost-1",
|
|
),
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(2*time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
"host", "localhost-2",
|
|
),
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(3*time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
"host", "localhost-3",
|
|
),
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "host", "localhost-1")): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(time.Hour)},
|
|
hash(metricWithLabels(t, "host", "localhost-2")): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(2 * time.Hour)},
|
|
hash(metricWithLabels(t, "host", "localhost-3")): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(3 * time.Hour)},
|
|
},
|
|
},
|
|
}
|
|
fakeGroup := Group{Name: "TestRule_Exec"}
|
|
for _, tc := range testCases {
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
|
fq := &fakeQuerier{}
|
|
tc.rule.group = fakeGroup
|
|
fq.add(tc.metrics...)
|
|
if err := tc.rule.Restore(context.TODO(), fq, time.Hour); err != nil {
|
|
t.Fatalf("unexpected err: %s", err)
|
|
}
|
|
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
|
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
|
|
}
|
|
for key, exp := range tc.expAlerts {
|
|
got, ok := tc.rule.alerts[key]
|
|
if !ok {
|
|
t.Fatalf("expected to have key %d", key)
|
|
}
|
|
if got.State != exp.State {
|
|
t.Fatalf("expected state %d; got %d", exp.State, got.State)
|
|
}
|
|
if got.Start != exp.Start {
|
|
t.Fatalf("expected Start %v; got %v", exp.Start, got.Start)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func newTestRuleWithLabels(name string, labels ...string) *Rule {
|
|
r := newTestRule(name, 0)
|
|
r.Labels = make(map[string]string)
|
|
for i := 0; i < len(labels); i += 2 {
|
|
r.Labels[labels[i]] = labels[i+1]
|
|
}
|
|
return r
|
|
}
|
|
|
|
func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
|
|
t.Helper()
|
|
m := metricWithLabels(t, labels...)
|
|
m.Value = value
|
|
return m
|
|
}
|