mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
vmalert: follow-up after 669becd011
(#4318)
* vmalert: follow-up after669becd011
Signed-off-by: hagen1778 <roman@victoriametrics.com> * vmalert: follow-up after669becd011
Signed-off-by: hagen1778 <roman@victoriametrics.com> * vmalert: follow-up after669becd011
Signed-off-by: hagen1778 <roman@victoriametrics.com> --------- Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
242050ba94
commit
f68d93cca2
2 changed files with 34 additions and 16 deletions
|
@ -186,13 +186,9 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
// flush is a blocking function that marshals WriteRequest and sends
|
// flush is a blocking function that marshals WriteRequest and sends
|
||||||
// it to remote write endpoint. Flush performs limited amount of retries
|
// it to remote-write endpoint. Flush performs limited amount of retries
|
||||||
// if request fails.
|
// if request fails.
|
||||||
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||||
const (
|
|
||||||
retryCount = 5
|
|
||||||
retryBackoff = time.Second
|
|
||||||
)
|
|
||||||
if len(wr.Timeseries) < 1 {
|
if len(wr.Timeseries) < 1 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -207,29 +203,42 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) {
|
||||||
|
|
||||||
b := snappy.Encode(nil, data)
|
b := snappy.Encode(nil, data)
|
||||||
|
|
||||||
attempts := 0
|
const (
|
||||||
for ; attempts < retryCount; attempts++ {
|
retryCount = 5
|
||||||
|
retryBackoff = time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
for attempts := 0; attempts < retryCount; attempts++ {
|
||||||
err := c.send(ctx, b)
|
err := c.send(ctx, b)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
sentRows.Add(len(wr.Timeseries))
|
sentRows.Add(len(wr.Timeseries))
|
||||||
sentBytes.Add(len(b))
|
sentBytes.Add(len(b))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
logger.Warnf("attempt %d to send request failed: %s", attempts+1, err)
|
|
||||||
|
|
||||||
if _, ok := err.(*retriableError); ok {
|
_, isRetriable := err.(*retriableError)
|
||||||
// sleeping to avoid remote db hammering
|
logger.Warnf("attempt %d to send request failed: %s (retriable: %v)", attempts+1, err, isRetriable)
|
||||||
time.Sleep(retryBackoff)
|
|
||||||
continue
|
if !isRetriable {
|
||||||
} else {
|
// exit fast if error isn't retriable
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check if request has been cancelled before backoff
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
break
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
// sleeping to avoid remote db hammering
|
||||||
|
time.Sleep(retryBackoff)
|
||||||
}
|
}
|
||||||
|
|
||||||
droppedRows.Add(len(wr.Timeseries))
|
droppedRows.Add(len(wr.Timeseries))
|
||||||
droppedBytes.Add(len(b))
|
droppedBytes.Add(len(b))
|
||||||
logger.Errorf("all %d attempts to send request failed - dropping %d time series",
|
logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
|
||||||
attempts, len(wr.Timeseries))
|
len(wr.Timeseries))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) send(ctx context.Context, data []byte) error {
|
func (c *Client) send(ctx context.Context, data []byte) error {
|
||||||
|
@ -258,14 +267,22 @@ func (c *Client) send(ctx context.Context, data []byte) error {
|
||||||
req.URL.Redacted(), err, len(data), r.Size())
|
req.URL.Redacted(), err, len(data), r.Size())
|
||||||
}
|
}
|
||||||
defer func() { _ = resp.Body.Close() }()
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
|
||||||
body, _ := io.ReadAll(resp.Body)
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
|
||||||
|
// according to https://prometheus.io/docs/concepts/remote_write_spec/
|
||||||
|
// Prometheus remote Write compatible receivers MUST
|
||||||
switch resp.StatusCode / 100 {
|
switch resp.StatusCode / 100 {
|
||||||
case 2:
|
case 2:
|
||||||
|
// respond with a HTTP 2xx status code when the write is successful.
|
||||||
return nil
|
return nil
|
||||||
case 5:
|
case 5:
|
||||||
|
// respond with HTTP status code 5xx when the write fails and SHOULD be retried.
|
||||||
return &retriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
return &retriableError{fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||||
resp.StatusCode, req.URL.Redacted(), body)}
|
resp.StatusCode, req.URL.Redacted(), body)}
|
||||||
default:
|
default:
|
||||||
|
// respond with HTTP status code 4xx when the request is invalid, will never be able to succeed
|
||||||
|
// and should not be retried.
|
||||||
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
return fmt.Errorf("unexpected response code %d for %s. Response body %q",
|
||||||
resp.StatusCode, req.URL.Redacted(), body)
|
resp.StatusCode, req.URL.Redacted(), body)
|
||||||
}
|
}
|
||||||
|
@ -276,5 +293,5 @@ type retriableError struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *retriableError) Error() string {
|
func (e *retriableError) Error() string {
|
||||||
return e.Error()
|
return e.err.Error()
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
||||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): detect alerting rules which don't match any series. See [these docs](https://docs.victoriametrics.com/vmalert.html#never-firing-alerts) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): detect alerting rules which don't match any series. See [these docs](https://docs.victoriametrics.com/vmalert.html#never-firing-alerts) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
||||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): support loading rules via HTTP URL. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3352). Thanks to @Haleygo for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4212).
|
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): support loading rules via HTTP URL. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3352). Thanks to @Haleygo for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4212).
|
||||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add buttons for filtering groups/rules with errors or with no-match warning in web UI for page `/groups`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add buttons for filtering groups/rules with errors or with no-match warning in web UI for page `/groups`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
||||||
|
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): do not retry remote-write requests for responses with 4XX status codes. This aligns with [Prometheus remote write specification](https://prometheus.io/docs/concepts/remote_write_spec/). Thanks to @MichaHoffmann for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4134).
|
||||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to filter incoming requests by IP. See [these docs](https://docs.victoriametrics.com/vmauth.html#ip-filters) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3491).
|
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to filter incoming requests by IP. See [these docs](https://docs.victoriametrics.com/vmauth.html#ip-filters) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3491).
|
||||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to proxy requests to the specified backends for unauthorized users. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4083).
|
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to proxy requests to the specified backends for unauthorized users. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4083).
|
||||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to specify default route for unmatched requests. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4084).
|
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to specify default route for unmatched requests. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4084).
|
||||||
|
|
Loading…
Reference in a new issue