mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
vmalert: fix error when rule didn't start if restore failed (#1279)
Previously, `startGroup` could exit on restore errors despite the `remoteRead.ignoreRestoreErrors` flag value. Now vmalert checks the flag value before deciding whether to return error or just log it.
This commit is contained in:
parent
2dddd68feb
commit
35237fe1f5
2 changed files with 6 additions and 13 deletions
|
@ -2,7 +2,6 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"sort"
|
||||
|
@ -405,9 +404,6 @@ func alertForToTimeSeries(name string, a *notifier.Alert, timestamp time.Time) p
|
|||
return newTimeSeries(float64(a.Start.Unix()), labels, timestamp)
|
||||
}
|
||||
|
||||
// ErrStateRestore indicates that the vmalert state failed to restore during startup.
|
||||
var ErrStateRestore = errors.New("failed to restore the state")
|
||||
|
||||
// Restore restores the state of active alerts basing on previously written timeseries.
|
||||
// Restore restores only Start field. Field State will be always Pending and supposed
|
||||
// to be updated on next Exec, as well as Value field.
|
||||
|
@ -432,7 +428,7 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, lookb
|
|||
alertForStateMetricName, ar.Name, labelsFilter, int(lookback.Seconds()))
|
||||
qMetrics, err := q.Query(ctx, expr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s: %w", err, ErrStateRestore)
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range qMetrics {
|
||||
|
|
|
@ -2,7 +2,6 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -52,12 +51,7 @@ func (m *manager) AlertAPI(gID, aID uint64) (*APIAlert, error) {
|
|||
}
|
||||
|
||||
func (m *manager) start(ctx context.Context, path []string, validateTpl, validateExpr bool) error {
|
||||
err := m.update(ctx, path, validateTpl, validateExpr, true)
|
||||
if *remoteReadIgnoreRestoreErrors && errors.Is(err, ErrStateRestore) {
|
||||
logger.Errorf("%s", err)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
return m.update(ctx, path, validateTpl, validateExpr, true)
|
||||
}
|
||||
|
||||
func (m *manager) close() {
|
||||
|
@ -74,7 +68,10 @@ func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) er
|
|||
if restore && m.rr != nil {
|
||||
err := group.Restore(ctx, m.rr, *remoteReadLookBack, m.labels)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while restoring state for group %q: %w", group.Name, err)
|
||||
if !*remoteReadIgnoreRestoreErrors {
|
||||
return fmt.Errorf("failed to restore state for group %q: %w", group.Name, err)
|
||||
}
|
||||
logger.Errorf("error while restoring state for group %q: %s", group.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue