vmalert: properly interrupt remotewrite retries on shutdown (#4505)

Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2023-06-22 15:07:32 +02:00 committed by Aliaksandr Valialkin
parent dae0b428fd
commit 311a81c7b0
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
2 changed files with 5 additions and 1 deletions

View file

@ -147,6 +147,7 @@ func (c *Client) run(ctx context.Context) {
wr.Timeseries = append(wr.Timeseries, ts)
}
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
logger.Infof("shutting down remote write client and flushing remained %d series", len(wr.Timeseries))
c.flush(lastCtx, wr)
cancel()
}
@ -207,6 +208,7 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
}
b := snappy.Encode(nil, data)
L:
for attempts := 0; attempts < retryCount; attempts++ {
err := c.send(ctx, b)
if err == nil {
@ -226,7 +228,8 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
// check if request has been cancelled before backoff
select {
case <-ctx.Done():
break
logger.Errorf("interrupting retry attempt %d: context cancelled", attempts+1)
break L
default:
}

View file

@ -46,6 +46,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* BUGFIX: [storage](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html): Properly creates `parts.json` after migration from versions below `v1.90.0. It must fix errors on start-up after unclean shutdown. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4336) for details.
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix a memory leak issue associated with chart updates. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4455).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): retry all errors except 4XX status codes while pushing via remote-write to the remote storage. Previously, errors like broken connection could prevent vmalert from retrying the request.
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly interrupt retry attempts on vmalert shutdown. Before, vmalert could have waited for all retries to finish for shutdown.
## [v1.91.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.91.2)