mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
clear the code for alerts stale metrics
This commit is contained in:
parent
09e9f82758
commit
9e79fb2e11
3 changed files with 29 additions and 36 deletions
|
@ -456,17 +456,18 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||
ar.logDebugf(ts, a, "created in state PENDING")
|
||||
}
|
||||
var numActivePending int
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
// store alerts' labels which are `FIRING => INACTIVE`, `PENDING => INACTIVE` or `PENDING => FIRING` in this iteration,
|
||||
// need to create stale time series for them later.
|
||||
var pendingToFiring, pendingToInactive, firingToInactive []map[string]string
|
||||
for h, a := range ar.alerts {
|
||||
// if alert wasn't updated in this iteration
|
||||
// means it is resolved already
|
||||
if _, ok := updated[h]; !ok {
|
||||
if a.State == notifier.StatePending {
|
||||
// alert was in Pending state - it is not
|
||||
// active anymore
|
||||
pendingToInactive = append(pendingToInactive, a.Labels)
|
||||
// alert was in Pending state - it is not active anymore
|
||||
// add stale time series for it
|
||||
tss = append(tss, pendingAlertStaleTimeSeries(a.Labels, ts.Unix(), true)...)
|
||||
|
||||
delete(ar.alerts, h)
|
||||
ar.logDebugf(ts, a, "PENDING => DELETED: is absent in current evaluation round")
|
||||
continue
|
||||
|
@ -484,7 +485,9 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||
if ts.Sub(a.KeepFiringSince) >= ar.KeepFiringFor {
|
||||
a.State = notifier.StateInactive
|
||||
a.ResolvedAt = ts
|
||||
firingToInactive = append(firingToInactive, a.Labels)
|
||||
// add stale time series for it
|
||||
tss = append(tss, firingAlertStaleTimeSeries(a.Labels, ts.Unix())...)
|
||||
|
||||
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
|
||||
continue
|
||||
}
|
||||
|
@ -497,7 +500,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||
a.Start = ts
|
||||
alertsFired.Inc()
|
||||
if ar.For > 0 {
|
||||
pendingToFiring = append(pendingToFiring, a.Labels)
|
||||
// add stale time series for it
|
||||
tss = append(tss, pendingAlertStaleTimeSeries(a.Labels, ts.Unix(), false)...)
|
||||
}
|
||||
ar.logDebugf(ts, a, "PENDING => FIRING: %s since becoming active at %v", ts.Sub(a.ActiveAt), a.ActiveAt)
|
||||
}
|
||||
|
@ -507,8 +511,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
||||
return nil, curState.Err
|
||||
}
|
||||
|
||||
return ar.toTimeSeries(ts.Unix(), pendingToFiring, firingToInactive, pendingToInactive), nil
|
||||
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
|
||||
}
|
||||
|
||||
func (ar *AlertingRule) expandTemplates(m datasource.Metric, qFn templates.QueryFn, ts time.Time) (*labelSet, map[string]string, error) {
|
||||
|
@ -533,9 +536,8 @@ func (ar *AlertingRule) expandTemplates(m datasource.Metric, qFn templates.Query
|
|||
return ls, as, nil
|
||||
}
|
||||
|
||||
// toTimeSeries creates `ALERTS` and `ALERTS_FOR_STATE` for active alerts,
|
||||
// also includes stale metrics for alerts which changed their state.
|
||||
func (ar *AlertingRule) toTimeSeries(timestamp int64, pendingToFiring, firingToInactive, pendingToInactive []map[string]string) []prompbmarshal.TimeSeries {
|
||||
// toTimeSeries creates `ALERTS` and `ALERTS_FOR_STATE` for active alerts
|
||||
func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries {
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
for _, a := range ar.alerts {
|
||||
if a.State == notifier.StateInactive {
|
||||
|
@ -544,15 +546,6 @@ func (ar *AlertingRule) toTimeSeries(timestamp int64, pendingToFiring, firingToI
|
|||
ts := ar.alertToTimeSeries(a, timestamp)
|
||||
tss = append(tss, ts...)
|
||||
}
|
||||
for i := range pendingToFiring {
|
||||
tss = append(tss, pendingAlertStaleTimeSeries(pendingToFiring[i], timestamp, false)...)
|
||||
}
|
||||
for i := range pendingToInactive {
|
||||
tss = append(tss, pendingAlertStaleTimeSeries(pendingToInactive[i], timestamp, true)...)
|
||||
}
|
||||
for i := range firingToInactive {
|
||||
tss = append(tss, firingAlertStaleTimeSeries(firingToInactive[i], timestamp)...)
|
||||
}
|
||||
return tss
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ func TestAlertingRuleToTimeSeries(t *testing.T) {
|
|||
t.Helper()
|
||||
|
||||
rule.alerts[alert.ID] = alert
|
||||
tss := rule.toTimeSeries(timestamp.Unix(), nil, nil, nil)
|
||||
tss := rule.toTimeSeries(timestamp.Unix())
|
||||
if err := compareTimeSeries(t, tssExpected, tss); err != nil {
|
||||
t.Fatalf("timeseries mismatch for rule %q: %s", rule.Name, err)
|
||||
}
|
||||
|
@ -207,7 +207,7 @@ func TestAlertingRule_Exec(t *testing.T) {
|
|||
// check generate time series
|
||||
if _, ok := tssExpected[i]; ok {
|
||||
if err := compareTimeSeries(t, tssExpected[i], tss); err != nil {
|
||||
t.Fatalf("generated time series mismatch for rule %q: %s", rule.Name, err)
|
||||
t.Fatalf("generated time series mismatch for rule %q in step %d: %s", rule.Name, i, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -350,28 +350,28 @@ func TestAlertingRule_Exec(t *testing.T) {
|
|||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
|
||||
},
|
||||
1: {
|
||||
// new time series for foo1
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
// stale time series for foo, `firing -> inactive`
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
// new time series for foo1
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
},
|
||||
2: {
|
||||
// new time series for foo2
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo2"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo2"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(2 * defaultStep).Unix()), Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||
// stale time series for foo1
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||
// new time series for foo2
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo2"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo2"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(2 * defaultStep).Unix()), Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||
},
|
||||
})
|
||||
|
||||
|
@ -395,13 +395,13 @@ func TestAlertingRule_Exec(t *testing.T) {
|
|||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
|
||||
},
|
||||
1: {
|
||||
// stale time series for `pending -> firing`
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "name", Value: "foo"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
// stale time series for `pending -> firing`
|
||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
|
||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||
},
|
||||
})
|
||||
|
||||
|
|
|
@ -9,8 +9,8 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
|
|
Loading…
Reference in a new issue