mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
clear the code for alerts stale metrics
This commit is contained in:
parent
09e9f82758
commit
9e79fb2e11
3 changed files with 29 additions and 36 deletions
|
@ -456,17 +456,18 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||||
ar.logDebugf(ts, a, "created in state PENDING")
|
ar.logDebugf(ts, a, "created in state PENDING")
|
||||||
}
|
}
|
||||||
var numActivePending int
|
var numActivePending int
|
||||||
|
var tss []prompbmarshal.TimeSeries
|
||||||
// store alerts' labels which are `FIRING => INACTIVE`, `PENDING => INACTIVE` or `PENDING => FIRING` in this iteration,
|
// store alerts' labels which are `FIRING => INACTIVE`, `PENDING => INACTIVE` or `PENDING => FIRING` in this iteration,
|
||||||
// need to create stale time series for them later.
|
// need to create stale time series for them later.
|
||||||
var pendingToFiring, pendingToInactive, firingToInactive []map[string]string
|
|
||||||
for h, a := range ar.alerts {
|
for h, a := range ar.alerts {
|
||||||
// if alert wasn't updated in this iteration
|
// if alert wasn't updated in this iteration
|
||||||
// means it is resolved already
|
// means it is resolved already
|
||||||
if _, ok := updated[h]; !ok {
|
if _, ok := updated[h]; !ok {
|
||||||
if a.State == notifier.StatePending {
|
if a.State == notifier.StatePending {
|
||||||
// alert was in Pending state - it is not
|
// alert was in Pending state - it is not active anymore
|
||||||
// active anymore
|
// add stale time series for it
|
||||||
pendingToInactive = append(pendingToInactive, a.Labels)
|
tss = append(tss, pendingAlertStaleTimeSeries(a.Labels, ts.Unix(), true)...)
|
||||||
|
|
||||||
delete(ar.alerts, h)
|
delete(ar.alerts, h)
|
||||||
ar.logDebugf(ts, a, "PENDING => DELETED: is absent in current evaluation round")
|
ar.logDebugf(ts, a, "PENDING => DELETED: is absent in current evaluation round")
|
||||||
continue
|
continue
|
||||||
|
@ -484,7 +485,9 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||||
if ts.Sub(a.KeepFiringSince) >= ar.KeepFiringFor {
|
if ts.Sub(a.KeepFiringSince) >= ar.KeepFiringFor {
|
||||||
a.State = notifier.StateInactive
|
a.State = notifier.StateInactive
|
||||||
a.ResolvedAt = ts
|
a.ResolvedAt = ts
|
||||||
firingToInactive = append(firingToInactive, a.Labels)
|
// add stale time series for it
|
||||||
|
tss = append(tss, firingAlertStaleTimeSeries(a.Labels, ts.Unix())...)
|
||||||
|
|
||||||
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
|
ar.logDebugf(ts, a, "FIRING => INACTIVE: is absent in current evaluation round")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -497,7 +500,8 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||||
a.Start = ts
|
a.Start = ts
|
||||||
alertsFired.Inc()
|
alertsFired.Inc()
|
||||||
if ar.For > 0 {
|
if ar.For > 0 {
|
||||||
pendingToFiring = append(pendingToFiring, a.Labels)
|
// add stale time series for it
|
||||||
|
tss = append(tss, pendingAlertStaleTimeSeries(a.Labels, ts.Unix(), false)...)
|
||||||
}
|
}
|
||||||
ar.logDebugf(ts, a, "PENDING => FIRING: %s since becoming active at %v", ts.Sub(a.ActiveAt), a.ActiveAt)
|
ar.logDebugf(ts, a, "PENDING => FIRING: %s since becoming active at %v", ts.Sub(a.ActiveAt), a.ActiveAt)
|
||||||
}
|
}
|
||||||
|
@ -507,8 +511,7 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
||||||
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
curState.Err = fmt.Errorf("exec exceeded limit of %d with %d alerts", limit, numActivePending)
|
||||||
return nil, curState.Err
|
return nil, curState.Err
|
||||||
}
|
}
|
||||||
|
return append(tss, ar.toTimeSeries(ts.Unix())...), nil
|
||||||
return ar.toTimeSeries(ts.Unix(), pendingToFiring, firingToInactive, pendingToInactive), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ar *AlertingRule) expandTemplates(m datasource.Metric, qFn templates.QueryFn, ts time.Time) (*labelSet, map[string]string, error) {
|
func (ar *AlertingRule) expandTemplates(m datasource.Metric, qFn templates.QueryFn, ts time.Time) (*labelSet, map[string]string, error) {
|
||||||
|
@ -533,9 +536,8 @@ func (ar *AlertingRule) expandTemplates(m datasource.Metric, qFn templates.Query
|
||||||
return ls, as, nil
|
return ls, as, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// toTimeSeries creates `ALERTS` and `ALERTS_FOR_STATE` for active alerts,
|
// toTimeSeries creates `ALERTS` and `ALERTS_FOR_STATE` for active alerts
|
||||||
// also includes stale metrics for alerts which changed their state.
|
func (ar *AlertingRule) toTimeSeries(timestamp int64) []prompbmarshal.TimeSeries {
|
||||||
func (ar *AlertingRule) toTimeSeries(timestamp int64, pendingToFiring, firingToInactive, pendingToInactive []map[string]string) []prompbmarshal.TimeSeries {
|
|
||||||
var tss []prompbmarshal.TimeSeries
|
var tss []prompbmarshal.TimeSeries
|
||||||
for _, a := range ar.alerts {
|
for _, a := range ar.alerts {
|
||||||
if a.State == notifier.StateInactive {
|
if a.State == notifier.StateInactive {
|
||||||
|
@ -544,15 +546,6 @@ func (ar *AlertingRule) toTimeSeries(timestamp int64, pendingToFiring, firingToI
|
||||||
ts := ar.alertToTimeSeries(a, timestamp)
|
ts := ar.alertToTimeSeries(a, timestamp)
|
||||||
tss = append(tss, ts...)
|
tss = append(tss, ts...)
|
||||||
}
|
}
|
||||||
for i := range pendingToFiring {
|
|
||||||
tss = append(tss, pendingAlertStaleTimeSeries(pendingToFiring[i], timestamp, false)...)
|
|
||||||
}
|
|
||||||
for i := range pendingToInactive {
|
|
||||||
tss = append(tss, pendingAlertStaleTimeSeries(pendingToInactive[i], timestamp, true)...)
|
|
||||||
}
|
|
||||||
for i := range firingToInactive {
|
|
||||||
tss = append(tss, firingAlertStaleTimeSeries(firingToInactive[i], timestamp)...)
|
|
||||||
}
|
|
||||||
return tss
|
return tss
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ func TestAlertingRuleToTimeSeries(t *testing.T) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
rule.alerts[alert.ID] = alert
|
rule.alerts[alert.ID] = alert
|
||||||
tss := rule.toTimeSeries(timestamp.Unix(), nil, nil, nil)
|
tss := rule.toTimeSeries(timestamp.Unix())
|
||||||
if err := compareTimeSeries(t, tssExpected, tss); err != nil {
|
if err := compareTimeSeries(t, tssExpected, tss); err != nil {
|
||||||
t.Fatalf("timeseries mismatch for rule %q: %s", rule.Name, err)
|
t.Fatalf("timeseries mismatch for rule %q: %s", rule.Name, err)
|
||||||
}
|
}
|
||||||
|
@ -207,7 +207,7 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||||
// check generate time series
|
// check generate time series
|
||||||
if _, ok := tssExpected[i]; ok {
|
if _, ok := tssExpected[i]; ok {
|
||||||
if err := compareTimeSeries(t, tssExpected[i], tss); err != nil {
|
if err := compareTimeSeries(t, tssExpected[i], tss); err != nil {
|
||||||
t.Fatalf("generated time series mismatch for rule %q: %s", rule.Name, err)
|
t.Fatalf("generated time series mismatch for rule %q in step %d: %s", rule.Name, i, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -350,28 +350,28 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
|
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
|
||||||
},
|
},
|
||||||
1: {
|
1: {
|
||||||
// new time series for foo1
|
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
|
|
||||||
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
|
|
||||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
|
||||||
// stale time series for foo, `firing -> inactive`
|
// stale time series for foo, `firing -> inactive`
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
|
||||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo"}},
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo"}},
|
||||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||||
|
// new time series for foo1
|
||||||
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
|
||||||
|
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||||
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
|
||||||
|
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||||
},
|
},
|
||||||
2: {
|
2: {
|
||||||
// new time series for foo2
|
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo2"}},
|
|
||||||
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo2"}},
|
|
||||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(2 * defaultStep).Unix()), Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
|
||||||
// stale time series for foo1
|
// stale time series for foo1
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo1"}},
|
||||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo1"}},
|
||||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||||
|
// new time series for foo2
|
||||||
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo2"}},
|
||||||
|
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||||
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "multiple-steps-firing"}, {Name: "name", Value: "foo2"}},
|
||||||
|
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(2 * defaultStep).Unix()), Timestamp: ts.Add(2*defaultStep).UnixNano() / 1e6}}},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -395,13 +395,13 @@ func TestAlertingRule_Exec(t *testing.T) {
|
||||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
|
Samples: []prompbmarshal.Sample{{Value: float64(ts.Unix()), Timestamp: ts.UnixNano() / 1e6}}},
|
||||||
},
|
},
|
||||||
1: {
|
1: {
|
||||||
|
// stale time series for `pending -> firing`
|
||||||
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
|
||||||
|
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "firing"}, {Name: "name", Value: "foo"}},
|
||||||
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
Samples: []prompbmarshal.Sample{{Value: 1, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "name", Value: "foo"}},
|
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertForStateMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "name", Value: "foo"}},
|
||||||
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
Samples: []prompbmarshal.Sample{{Value: float64(ts.Add(defaultStep).Unix()), Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
||||||
// stale time series for `pending -> firing`
|
|
||||||
{Labels: []prompbmarshal.Label{{Name: "__name__", Value: alertMetricName}, {Name: "alertname", Value: "for-fired"}, {Name: "alertstate", Value: "pending"}, {Name: "name", Value: "foo"}},
|
|
||||||
Samples: []prompbmarshal.Sample{{Value: decimal.StaleNaN, Timestamp: ts.Add(defaultStep).UnixNano() / 1e6}}},
|
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -9,8 +9,8 @@ import (
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
|
|
Loading…
Reference in a new issue