vmalert: correctly return error for RW failures (#3452)

* vmalert: correctly return error for RW failures

By mistake, in 0989649ad0 the error
for remote write failures weren't return to user.
This change fixes it.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2022-12-06 15:36:46 +01:00 committed by Aliaksandr Valialkin
parent 2369096ffb
commit 024fe6218c
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
3 changed files with 36 additions and 7 deletions

View file

@ -412,20 +412,26 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
return fmt.Errorf("rule %q: failed to execute: %w", rule, err)
}
errGr := new(utils.ErrGroup)
if e.rw != nil {
pushToRW := func(tss []prompbmarshal.TimeSeries) {
pushToRW := func(tss []prompbmarshal.TimeSeries) error {
var lastErr error
for _, ts := range tss {
remoteWriteTotal.Inc()
if err := e.rw.Push(ts); err != nil {
remoteWriteErrors.Inc()
errGr.Add(fmt.Errorf("rule %q: remote write failure: %w", rule, err))
lastErr = fmt.Errorf("rule %q: remote write failure: %w", rule, err)
}
}
return lastErr
}
pushToRW(tss)
if err := pushToRW(tss); err != nil {
return err
}
staleSeries := e.getStaleSeries(rule, tss, ts)
pushToRW(staleSeries)
if err := pushToRW(staleSeries); err != nil {
return err
}
}
ar, ok := rule.(*AlertingRule)
@ -439,6 +445,7 @@ func (e *executor) exec(ctx context.Context, rule Rule, ts time.Time, resolveDur
}
wg := sync.WaitGroup{}
errGr := new(utils.ErrGroup)
for _, nt := range e.notifiers() {
wg.Add(1)
go func(nt notifier.Notifier) {

View file

@ -3,8 +3,6 @@ package main
import (
"context"
"fmt"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"reflect"
"sort"
"testing"
@ -12,6 +10,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
)
@ -452,3 +453,23 @@ func TestFaultyNotifier(t *testing.T) {
}
t.Fatalf("alive notifier didn't receive notification by %v", deadline)
}
func TestFaultyRW(t *testing.T) {
fq := &fakeQuerier{}
fq.add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
r := &RecordingRule{
Name: "test",
q: fq,
}
e := &executor{
rw: &remotewrite.Client{},
previouslySentSeriesToRW: make(map[uint64]map[string][]prompbmarshal.Label),
}
err := e.exec(context.Background(), r, time.Now(), 0, 10)
if err == nil {
t.Fatalf("expected to get an error from faulty RW client, got nil instead")
}
}

View file

@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
## v1.79.x long-time support release (LTS)
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly return the error message from remote-write failures. Before, error was ignored and only `vmalert_remotewrite_errors_total` was incremented.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly return an empty result from [limit_offset](https://docs.victoriametrics.com/MetricsQL.html#limit_offset) if the `offset` arg exceeds the number of inner time series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3312).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover GCE zones when `filter` option is set at [gce_sd_configs](https://docs.victoriametrics.com/sd_configs.html#gce_sd_configs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3202).