vmalert: add keep_firing_for field for alerting rule (#4669)

vmalert: support `keep_firing_for` field for alerting rule

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4529

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Haleygo 2023-07-27 21:13:13 +08:00 committed by GitHub
parent 7fc34aa1e6
commit ae0e4a8c90
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 817 additions and 620 deletions

View file

@ -203,6 +203,10 @@ expr: <string>
# as firing once they return.
[ for: <duration> | default = 0s ]
# Alert will continue firing for this long even when the alerting expression no longer has results.
# This allows you to delay alert resolution.
[ keep_firing_for: <duration> | default = 0s ]
# Whether to print debug information into logs.
# Information includes alerts state changes and requests sent to the datasource.
# Please note, that if rule's query params contain sensitive
@ -736,6 +740,7 @@ See full description for these flags in `./vmalert -help`.
* Graphite engine isn't supported yet;
* `query` template function is disabled for performance reasons (might be changed in future);
* `limit` group's param has no effect during replay (might be changed in future);
* `keep_firing_for` alerting rule param has no effect during replay (might be changed in future).
## Unit Testing for Rules

View file

@ -21,17 +21,18 @@ import (
// AlertingRule is basic alert entity
type AlertingRule struct {
Type config.Type
RuleID uint64
Name string
Expr string
For time.Duration
Labels map[string]string
Annotations map[string]string
GroupID uint64
GroupName string
EvalInterval time.Duration
Debug bool
Type config.Type
RuleID uint64
Name string
Expr string
For time.Duration
KeepFiringFor time.Duration
Labels map[string]string
Annotations map[string]string
GroupID uint64
GroupName string
EvalInterval time.Duration
Debug bool
q datasource.Querier
@ -56,17 +57,18 @@ type alertingRuleMetrics struct {
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
ar := &AlertingRule{
Type: group.Type,
RuleID: cfg.ID,
Name: cfg.Alert,
Expr: cfg.Expr,
For: cfg.For.Duration(),
Labels: cfg.Labels,
Annotations: cfg.Annotations,
GroupID: group.ID(),
GroupName: group.Name,
EvalInterval: group.Interval,
Debug: cfg.Debug,
Type: group.Type,
RuleID: cfg.ID,
Name: cfg.Alert,
Expr: cfg.Expr,
For: cfg.For.Duration(),
KeepFiringFor: cfg.KeepFiringFor.Duration(),
Labels: cfg.Labels,
Annotations: cfg.Annotations,
GroupID: group.ID(),
GroupName: group.Name,
EvalInterval: group.Interval,
Debug: cfg.Debug,
q: qb.BuildWithParams(datasource.QuerierParams{
DataSourceType: group.Type.String(),
EvaluationInterval: group.Interval,
@ -366,6 +368,7 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
if err != nil {
return nil, err
}
a.KeepFiringSince = time.Time{}
continue
}
a, err := ar.newAlert(m, ls, start, qFn)
@ -391,12 +394,24 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
ar.logDebugf(ts, a, "PENDING => DELETED: is absent in current evaluation round")
continue
}
// check if alert should keep StateFiring if rule has
// `keep_firing_for` field
if a.State == notifier.StateFiring {
a.State = notifier.StateInactive
a.ResolvedAt = ts
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
if ar.KeepFiringFor > 0 {
if a.KeepFiringSince.IsZero() {
a.KeepFiringSince = ts
}
}
// alerts with ar.KeepFiringFor>0 may remain FIRING
// even if their expression isn't true anymore
if ts.Sub(a.KeepFiringSince) > ar.KeepFiringFor {
a.State = notifier.StateInactive
a.ResolvedAt = ts
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
continue
}
ar.logDebugf(ts, a, "KEEP_FIRING: will keep firing for %fs since %v", ar.KeepFiringFor.Seconds(), a.KeepFiringSince)
}
continue
}
numActivePending++
if a.State == notifier.StatePending && ts.Sub(a.ActiveAt) >= ar.For {
@ -436,6 +451,7 @@ func (ar *AlertingRule) UpdateWith(r Rule) error {
}
ar.Expr = nr.Expr
ar.For = nr.For
ar.KeepFiringFor = nr.KeepFiringFor
ar.Labels = nr.Labels
ar.Annotations = nr.Annotations
ar.EvalInterval = nr.EvalInterval
@ -508,6 +524,7 @@ func (ar *AlertingRule) ToAPI() APIRule {
Name: ar.Name,
Query: ar.Expr,
Duration: ar.For.Seconds(),
KeepFiringFor: ar.KeepFiringFor.Seconds(),
Labels: ar.Labels,
Annotations: ar.Annotations,
LastEvaluation: lastState.time,
@ -576,6 +593,9 @@ func (ar *AlertingRule) newAlertAPI(a notifier.Alert) *APIAlert {
if alertURLGeneratorFn != nil {
aa.SourceLink = alertURLGeneratorFn(a)
}
if a.State == notifier.StateFiring && !a.KeepFiringSince.IsZero() {
aa.Stabilizing = true
}
return aa
}

View file

@ -113,7 +113,7 @@ func TestAlertingRule_Exec(t *testing.T) {
testCases := []struct {
rule *AlertingRule
steps [][]datasource.Metric
expAlerts []testAlert
expAlerts map[int][]testAlert
}{
{
newTestAlertingRule("empty", 0),
@ -125,50 +125,8 @@ func TestAlertingRule_Exec(t *testing.T) {
[][]datasource.Metric{
{datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}},
},
[]testAlert{
{alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("single-firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("single-firing=>inactive", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
newTestAlertingRule("single-firing=>inactive=>firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
{
newTestAlertingRule("single-firing=>inactive=>firing=>inactive", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
{metricWithLabels(t, "name", "foo")},
{},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
map[int][]testAlert{
0: {{alert: &notifier.Alert{State: notifier.StateFiring}}},
},
},
{
@ -180,12 +138,16 @@ func TestAlertingRule_Exec(t *testing.T) {
{},
{},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
2: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
},
},
{
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>inactive=>firing", 0),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{},
@ -194,8 +156,13 @@ func TestAlertingRule_Exec(t *testing.T) {
{},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
2: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
5: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
},
},
{
@ -207,10 +174,12 @@ func TestAlertingRule_Exec(t *testing.T) {
metricWithLabels(t, "name", "foo2"),
},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
map[int][]testAlert{
0: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateFiring}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
},
{
@ -223,10 +192,19 @@ func TestAlertingRule_Exec(t *testing.T) {
// 1: fire first alert
// 2: fire second alert, set first inactive
// 3: fire third alert, set second inactive
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
map[int][]testAlert{
0: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
1: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
2: {
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo1"}, alert: &notifier.Alert{State: notifier.StateInactive}},
{labels: []string{"name", "foo2"}, alert: &notifier.Alert{State: notifier.StateFiring}},
},
},
},
{
@ -234,8 +212,8 @@ func TestAlertingRule_Exec(t *testing.T) {
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}},
map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
},
},
{
@ -244,8 +222,9 @@ func TestAlertingRule_Exec(t *testing.T) {
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
},
},
{
@ -253,34 +232,13 @@ func TestAlertingRule_Exec(t *testing.T) {
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to reset and delete pending alerts
// empty step to delete pending alerts
{},
},
nil,
},
{
newTestAlertingRule("for-pending=>firing=>inactive", defaultStep),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to reset pending alerts
{},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}},
},
},
{
newTestAlertingRule("for-pending=>firing=>inactive=>pending", defaultStep),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to reset pending alerts
{},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}},
map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
2: {},
},
},
{
@ -288,13 +246,57 @@ func TestAlertingRule_Exec(t *testing.T) {
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to reset pending alerts
// empty step to set alert inactive
{},
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
},
[]testAlert{
{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}},
map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
2: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
},
},
{
newTestAlertingRuleWithKeepFiring("for-pending=>firing=>keepfiring=>firing", defaultStep, defaultStep),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to keep firing
{},
{metricWithLabels(t, "name", "foo")},
},
map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
2: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
},
},
{
newTestAlertingRuleWithKeepFiring("for-pending=>firing=>keepfiring=>keepfiring=>inactive=>pending=>firing", defaultStep, 2*defaultStep),
[][]datasource.Metric{
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
// empty step to keep firing
{},
// another empty step to keep firing
{},
// empty step to set alert inactive
{},
{metricWithLabels(t, "name", "foo")},
{metricWithLabels(t, "name", "foo")},
},
map[int][]testAlert{
0: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
1: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
2: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateInactive}}},
5: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StatePending}}},
6: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
},
},
}
@ -304,7 +306,7 @@ func TestAlertingRule_Exec(t *testing.T) {
fq := &fakeQuerier{}
tc.rule.q = fq
tc.rule.GroupID = fakeGroup.ID()
for _, step := range tc.steps {
for i, step := range tc.steps {
fq.reset()
fq.add(step...)
if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil {
@ -312,28 +314,31 @@ func TestAlertingRule_Exec(t *testing.T) {
}
// artificial delay between applying steps
time.Sleep(defaultStep)
}
if len(tc.rule.alerts) != len(tc.expAlerts) {
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
}
expAlerts := make(map[uint64]*notifier.Alert)
for _, ta := range tc.expAlerts {
labels := make(map[string]string)
for i := 0; i < len(ta.labels); i += 2 {
k, v := ta.labels[i], ta.labels[i+1]
labels[k] = v
if _, ok := tc.expAlerts[i]; !ok {
continue
}
labels[alertNameLabel] = tc.rule.Name
h := hash(labels)
expAlerts[h] = ta.alert
}
for key, exp := range expAlerts {
got, ok := tc.rule.alerts[key]
if !ok {
t.Fatalf("expected to have key %d", key)
if len(tc.rule.alerts) != len(tc.expAlerts[i]) {
t.Fatalf("evalIndex %d: expected %d alerts; got %d", i, len(tc.expAlerts[i]), len(tc.rule.alerts))
}
if got.State != exp.State {
t.Fatalf("expected state %d; got %d", exp.State, got.State)
expAlerts := make(map[uint64]*notifier.Alert)
for _, ta := range tc.expAlerts[i] {
labels := make(map[string]string)
for i := 0; i < len(ta.labels); i += 2 {
k, v := ta.labels[i], ta.labels[i+1]
labels[k] = v
}
labels[alertNameLabel] = tc.rule.Name
h := hash(labels)
expAlerts[h] = ta.alert
}
for key, exp := range expAlerts {
got, ok := tc.rule.alerts[key]
if !ok {
t.Fatalf("evalIndex %d: expected to have key %d", i, key)
}
if got.State != exp.State {
t.Fatalf("evalIndex %d: expected state %d; got %d", i, exp.State, got.State)
}
}
}
})
@ -867,7 +872,6 @@ func TestAlertingRule_Template(t *testing.T) {
gotAlert := tc.rule.alerts[hash]
if gotAlert == nil {
t.Fatalf("alert %d is missing; labels: %v; annotations: %v", hash, expAlert.Labels, expAlert.Annotations)
break
}
if !reflect.DeepEqual(expAlert.Annotations, gotAlert.Annotations) {
t.Fatalf("expected to have annotations %#v; got %#v", expAlert.Annotations, gotAlert.Annotations)
@ -970,11 +974,18 @@ func newTestRuleWithLabels(name string, labels ...string) *AlertingRule {
}
func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
return &AlertingRule{
rule := AlertingRule{
Name: name,
For: waitFor,
EvalInterval: waitFor,
alerts: make(map[uint64]*notifier.Alert),
state: newRuleState(10),
}
return &rule
}
func newTestAlertingRuleWithKeepFiring(name string, waitFor, keepFiringFor time.Duration) *AlertingRule {
rule := newTestAlertingRule(name, waitFor)
rule.KeepFiringFor = keepFiringFor
return rule
}

View file

@ -105,14 +105,16 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
// Rule describes entity that represent either
// recording rule or alerting rule.
type Rule struct {
ID uint64
Record string `yaml:"record,omitempty"`
Alert string `yaml:"alert,omitempty"`
Expr string `yaml:"expr"`
For *promutils.Duration `yaml:"for,omitempty"`
Labels map[string]string `yaml:"labels,omitempty"`
Annotations map[string]string `yaml:"annotations,omitempty"`
Debug bool `yaml:"debug,omitempty"`
ID uint64
Record string `yaml:"record,omitempty"`
Alert string `yaml:"alert,omitempty"`
Expr string `yaml:"expr"`
For *promutils.Duration `yaml:"for,omitempty"`
// Alert will continue firing for this long even when the alerting expression no longer has results.
KeepFiringFor *promutils.Duration `yaml:"keep_firing_for,omitempty"`
Labels map[string]string `yaml:"labels,omitempty"`
Annotations map[string]string `yaml:"annotations,omitempty"`
Debug bool `yaml:"debug,omitempty"`
// UpdateEntriesLimit defines max number of rule's state updates stored in memory.
// Overrides `-rule.updateEntriesLimit`.
UpdateEntriesLimit *int `yaml:"update_entries_limit,omitempty"`

View file

@ -404,7 +404,7 @@ func TestHashRule(t *testing.T) {
true,
},
{
Rule{Alert: "alert", Expr: "up == 1", For: promutils.NewDuration(time.Minute)},
Rule{Alert: "alert", Expr: "up == 1", For: promutils.NewDuration(time.Minute), KeepFiringFor: promutils.NewDuration(time.Minute)},
Rule{Alert: "alert", Expr: "up == 1"},
true,
},

View file

@ -46,18 +46,36 @@ func TestUpdateWith(t *testing.T) {
"summary": "{{ $value|humanize }}",
"description": "{{$labels}}",
},
}},
[]config.Rule{{
Alert: "foo",
Expr: "up > 10",
For: promutils.NewDuration(time.Second),
Labels: map[string]string{
"baz": "bar",
},
{
Alert: "bar",
Expr: "up > 0",
For: promutils.NewDuration(time.Second),
Labels: map[string]string{
"bar": "baz",
},
}},
[]config.Rule{
{
Alert: "foo",
Expr: "up > 10",
For: promutils.NewDuration(time.Second),
Labels: map[string]string{
"baz": "bar",
},
Annotations: map[string]string{
"summary": "none",
},
},
Annotations: map[string]string{
"summary": "none",
},
}},
{
Alert: "bar",
Expr: "up > 0",
For: promutils.NewDuration(2 * time.Second),
KeepFiringFor: promutils.NewDuration(time.Minute),
Labels: map[string]string{
"bar": "baz",
},
}},
},
{
"update recording rule",

View file

@ -272,6 +272,9 @@ func compareAlertingRules(t *testing.T, a, b *AlertingRule) error {
if a.For != b.For {
return fmt.Errorf("expected to have for %q; got %q", a.For, b.For)
}
if a.KeepFiringFor != b.KeepFiringFor {
return fmt.Errorf("expected to have KeepFiringFor %q; got %q", a.KeepFiringFor, b.KeepFiringFor)
}
if !reflect.DeepEqual(a.Annotations, b.Annotations) {
return fmt.Errorf("expected to have annotations %#v; got %#v", a.Annotations, b.Annotations)
}

View file

@ -39,6 +39,8 @@ type Alert struct {
ResolvedAt time.Time
// LastSent defines the moment when Alert was sent last time
LastSent time.Time
// KeepFiringSince defines the moment when StateFiring was kept because of `keep_firing_for` instead of real alert
KeepFiringSince time.Time
// Value stores the value returned from evaluating expression from Expr field
Value float64
// ID is the unique identifier for the Alert

View file

@ -116,7 +116,11 @@ btn-primary
<div class="row">
<div class="col-12 mb-2">
{% if r.Type == "alerting" %}
{% if r.KeepFiringFor > 0 %}
<b>alert:</b> {%s r.Name %} (for: {%v r.Duration %} seconds, keep_firing_for: {%v r.KeepFiringFor %} seconds)
{% else %}
<b>alert:</b> {%s r.Name %} (for: {%v r.Duration %} seconds)
{% endif %}
{% else %}
<b>record:</b> {%s r.Name %}
{% endif %}
@ -225,6 +229,7 @@ btn-primary
<td>
{%s ar.ActiveAt.Format("2006-01-02T15:04:05Z07:00") %}
{% if ar.Restored %}{%= badgeRestored() %}{% endif %}
{% if ar.Stabilizing %}{%= badgeStabilizing() %}{% endif %}
</td>
<td>{%s ar.Value %}</td>
<td>
@ -442,6 +447,18 @@ btn-primary
</div>
</div>
</div>
{% if rule.KeepFiringFor > 0 %}
<div class="container border-bottom p-2">
<div class="row">
<div class="col-2">
Keep firing for
</div>
<div class="col">
{%v rule.KeepFiringFor %} seconds
</div>
</div>
</div>
{% endif %}
{% endif %}
<div class="container border-bottom p-2">
<div class="row">
@ -561,6 +578,10 @@ btn-primary
<span class="badge bg-warning text-dark" title="Alert state was restored after the service restart from remote storage">restored</span>
{% endfunc %}
{% func badgeStabilizing() %}
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
{% endfunc %}
{% func seriesFetchedWarn(r APIRule) %}
{% if isNoMatch(r) %}
<svg xmlns="http://www.w3.org/2000/svg"

File diff suppressed because it is too large Load diff

View file

@ -32,6 +32,9 @@ type APIAlert struct {
SourceLink string `json:"source"`
// Restored shows whether Alert's state was restored on restart
Restored bool `json:"restored"`
// Stabilizing shows when firing state is kept because of
// `keep_firing_for` instead of real alert
Stabilizing bool `json:"stabilizing"`
}
// WebLink returns a link to the alert which can be used in UI.
@ -96,9 +99,11 @@ type APIRule struct {
// Query represents Rule's `expression` field
Query string `json:"query"`
// Duration represents Rule's `for` field
Duration float64 `json:"duration"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
Duration float64 `json:"duration"`
// Alert will continue firing for this long even when the alerting expression no longer has results.
KeepFiringFor float64 `json:"keep_firing_for"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
// LastError contains the error faced while executing the rule.
LastError string `json:"lastError"`
// EvaluationTime is the time taken to completely evaluate the rule in float seconds.

View file

@ -66,7 +66,8 @@ The previous behavior can be restored in the following ways:
* FEATUTE: [vmalert](https://docs.victoriametrics.com/vmalert.html): allow disabling of `step` param attached to [instant queries](https://docs.victoriametrics.com/keyConcepts.html#instant-query). This might be useful for using vmalert with datasources that to not support this param, unlike VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4573) for details.
* FEATUTE: [vmalert](https://docs.victoriametrics.com/vmalert.html): support option for "blackholing" alerting notifications if `-notifier.blackhole` cmd-line flag is set. Enable this flag if you want vmalert to evaluate alerting rules without sending any notifications to external receivers (eg. alertmanager). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4122) for details. Thanks to @venkatbvc for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4639).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add unit test for alerting and recording rules, see more [details](https://docs.victoriametrics.com/vmalert.html#unit-testing-for-rules) here. Thanks to @Haleygo for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4596).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): allow overriding default GET params for rules with `graphite` datasource type, in the same way as it happens for `prometheus` type. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4685).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): allow overriding default GET params for rules with `graphite` datasource type, in the same way as it happens for `prometheus` type. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4685).
* FEATUTE: [vmalert](https://docs.victoriametrics.com/vmalert.html): support `keep_firing_for` field for alerting rules. See docs updated [here](https://docs.victoriametrics.com/vmalert.html#alerting-rules) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4529). Thanks to @Haleygo for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4669).
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): expose `vmauth_user_request_duration_seconds` and `vmauth_unauthorized_user_request_duration_seconds` summary metrics for measuring requests latency per user.
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): show backup progress percentage in log during backup uploading. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4460).
* FEATURE: [vmrestore](https://docs.victoriametrics.com/vmrestore.html): show restoring progress percentage in log during backup downloading. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4460).

View file

@ -214,6 +214,10 @@ expr: <string>
# as firing once they return.
[ for: <duration> | default = 0s ]
# Alert will continue firing for this long even when the alerting expression no longer has results.
# This allows you to delay alert resolution.
[ keep_firing_for: <duration> | default = 0s ]
# Whether to print debug information into logs.
# Information includes alerts state changes and requests sent to the datasource.
# Please note, that if rule's query params contain sensitive
@ -747,6 +751,7 @@ See full description for these flags in `./vmalert -help`.
* Graphite engine isn't supported yet;
* `query` template function is disabled for performance reasons (might be changed in future);
* `limit` group's param has no effect during replay (might be changed in future);
* `keep_firing_for` alerting rule param has no effect during replay (might be changed in future).
## Unit Testing for Rules