vmalert: fix variable $activeAt value when templating rule annotation in replay mode

Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
2024-11-21 14:44:00 +00:00 · 2024-09-20 17:07:40 +08:00 · 2024-09-20 17:07:40 +08:00 · d6d02d7aeb
commit d6d02d7aeb
parent 6167bccc5a
3 changed files with 17 additions and 17 deletions
--- a/app/vmalert/rule/alerting.go
+++ b/app/vmalert/rule/alerting.go
@ -335,6 +335,8 @@ func (ar *AlertingRule) execRange(ctx context.Context, start, end time.Time) ([]
 				// reset to Pending if there are gaps > EvalInterval between DPs
 				a.State = notifier.StatePending
 				a.ActiveAt = at
+				// re-template the annotations as active timestamp is changed
+				_, a.Annotations, _ = ar.expandTemplates(s, qFn, at)
 				a.Start = time.Time{}
 			} else if at.Sub(a.ActiveAt) >= ar.For && a.State != notifier.StateFiring {
 				a.State = notifier.StateFiring
--- a/app/vmalert/rule/alerting_test.go
+++ b/app/vmalert/rule/alerting_test.go
@ -289,7 +289,7 @@ func TestAlertingRule_Exec(t *testing.T) {
 		4: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
 	})

-	f(newTestAlertingRuleWithKeepFiring("for-pending=>firing=>keepfiring=>firing", defaultStep, defaultStep), [][]datasource.Metric{
+	f(newTestAlertingRuleWithCustomFields("for-pending=>firing=>keepfiring=>firing", defaultStep, 0, defaultStep, nil), [][]datasource.Metric{
 		{metricWithLabels(t, "name", "foo")},
 		{metricWithLabels(t, "name", "foo")},
 		// empty step to keep firing
@ -302,7 +302,7 @@ func TestAlertingRule_Exec(t *testing.T) {
 		3: {{labels: []string{"name", "foo"}, alert: &notifier.Alert{State: notifier.StateFiring}}},
 	})

-	f(newTestAlertingRuleWithKeepFiring("for-pending=>firing=>keepfiring=>keepfiring=>inactive=>pending=>firing", defaultStep, 2*defaultStep), [][]datasource.Metric{
+	f(newTestAlertingRuleWithCustomFields("for-pending=>firing=>keepfiring=>keepfiring=>inactive=>pending=>firing", defaultStep, 0, 2*defaultStep, nil), [][]datasource.Metric{
 		{metricWithLabels(t, "name", "foo")},
 		{metricWithLabels(t, "name", "foo")},
 		// empty step to keep firing
@ -395,7 +395,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
 		{State: notifier.StateFiring, ActiveAt: time.Unix(3e3, 0)},
 	}, nil)

-	f(newTestAlertingRule("for-pending", time.Second), []datasource.Metric{
+	f(newTestAlertingRuleWithCustomFields("for-pending", time.Second, 0, 0, map[string]string{"activeAt": "{{ $activeAt.UnixMilli }}"}), []datasource.Metric{
 		{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
 	}, []*notifier.Alert{
 		{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
@ -406,7 +406,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
 			GroupID:     fakeGroup.ID(),
 			Name:        "for-pending",
 			Labels:      map[string]string{"alertname": "for-pending"},
-			Annotations: map[string]string{},
+			Annotations: map[string]string{"activeAt": "5000"},
 			State:       notifier.StatePending,
 			ActiveAt:    time.Unix(5, 0),
 			Value:       1,
@ -414,7 +414,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
 		},
 	})

-	f(newTestAlertingRule("for-firing", 3*time.Second), []datasource.Metric{
+	f(newTestAlertingRuleWithCustomFields("for-firing", 3*time.Second, 0, 0, map[string]string{"activeAt": "{{ $activeAt.UnixMilli }}"}), []datasource.Metric{
 		{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
 	}, []*notifier.Alert{
 		{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
@ -425,7 +425,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
 			GroupID:     fakeGroup.ID(),
 			Name:        "for-firing",
 			Labels:      map[string]string{"alertname": "for-firing"},
-			Annotations: map[string]string{},
+			Annotations: map[string]string{"activeAt": "1000"},
 			State:       notifier.StateFiring,
 			ActiveAt:    time.Unix(1, 0),
 			Start:       time.Unix(5, 0),
@ -434,7 +434,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
 		},
 	})

-	f(newTestAlertingRule("for-hold-pending", time.Second), []datasource.Metric{
+	f(newTestAlertingRuleWithCustomFields("for-hold-pending", time.Second, 0, 0, map[string]string{"activeAt": "{{ $activeAt.UnixMilli }}"}), []datasource.Metric{
 		{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 2, 5}},
 	}, []*notifier.Alert{
 		{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
@ -445,7 +445,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
 			GroupID:     fakeGroup.ID(),
 			Name:        "for-hold-pending",
 			Labels:      map[string]string{"alertname": "for-hold-pending"},
-			Annotations: map[string]string{},
+			Annotations: map[string]string{"activeAt": "5000"},
 			State:       notifier.StatePending,
 			ActiveAt:    time.Unix(5, 0),
 			Value:       1,
@ -453,7 +453,7 @@ func TestAlertingRuleExecRange(t *testing.T) {
 		},
 	})

-	f(newTestAlertingRuleWithEvalInterval("firing=>inactive=>inactive=>firing=>firing", 0, time.Second), []datasource.Metric{
+	f(newTestAlertingRuleWithCustomFields("firing=>inactive=>inactive=>firing=>firing", 0, time.Second, 0, nil), []datasource.Metric{
 		{Values: []float64{1, 1, 1, 1}, Timestamps: []int64{1, 4, 5, 6}},
 	}, []*notifier.Alert{
 		{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
@ -538,7 +538,6 @@ func TestAlertingRuleExecRange(t *testing.T) {
 				"source": "vm",
 			},
 		},
-		//
 		{
 			State: notifier.StateFiring, ActiveAt: time.Unix(1, 0),
 			Labels: map[string]string{
@ -1036,15 +1035,13 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
 	return &rule
 }

-func newTestAlertingRuleWithEvalInterval(name string, waitFor, evalInterval time.Duration) *AlertingRule {
+func newTestAlertingRuleWithCustomFields(name string, waitFor, evalInterval, keepFiringFor time.Duration, annotation map[string]string) *AlertingRule {
 	rule := newTestAlertingRule(name, waitFor)
+	if evalInterval != 0 {
 		rule.EvalInterval = evalInterval
-	return rule
 	}
-
-func newTestAlertingRuleWithKeepFiring(name string, waitFor, keepFiringFor time.Duration) *AlertingRule {
-	rule := newTestAlertingRule(name, waitFor)
 	rule.KeepFiringFor = keepFiringFor
+	rule.Annotations = annotation
 	return rule
 }

--- a/docs/changelog/CHANGELOG.md
+++ b/docs/changelog/CHANGELOG.md
@ -39,6 +39,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
 * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): properly ingest stale NaN samples. Previously it could be dropped if series didn't exist at storage node. See this [issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5069) for details.
 * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): properly track `vm_missing_tsids_for_metric_id_total` metric. See this [issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6931) for details.
 * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert): do not send notifications without labels to Alertmanager. Such notifications are rejected by Alertmanager anyway. Before, vmalert could send alert notifications even if no label-value pairs left after applying `alert_relabel_configs` from [notifier config](https://docs.victoriametrics.com/vmalert/#notifier-configuration-file).
+* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert/): properly update value of variable `$activeAt` in rules annotation during replay mode. Before, `$activeAt` could have provided incorrect values during replay.
 * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/metricsql/): properly handle `c1 AND c2` and `c1 OR c1` queries for constants `c1` and `c2`. Previously such queries could return unexpected results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6637).
 * BUGFIX: all VictoriaMetrics components: increase default value of `-loggerMaxArgLen` cmd-line flag from 1000 to 5000. This should improve visibility on errors produced by very long queries.