From 3661373cc2ac78311a3dab70d749b9f68a9a2849 Mon Sep 17 00:00:00 2001 From: viperstars Date: Fri, 17 May 2024 20:55:17 +0800 Subject: [PATCH] app/vmagent/remotewrite: skip sending empty block to downstream server (#6241) Occasionally, vmagent sends empty blocks to downstream servers. If a downstream server returns an unexpected response, vmagent gets stuck in a retry loop. While vmagent handles 400 and 409 errors, there are various prometheus remote write implementations that return different error codes. For example, vector returns a 422 error. To mitigate the risk of vmagent getting stuck in a retry loop, it is advisable to skip sending empty blocks to downstream servers. Co-authored-by: hao.peng Co-authored-by: Zhu Jiekun Co-authored-by: hagen1778 --- app/vmagent/remotewrite/client.go | 8 +++++++- docs/CHANGELOG.md | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/app/vmagent/remotewrite/client.go b/app/vmagent/remotewrite/client.go index ae5412c32..5148ec1b0 100644 --- a/app/vmagent/remotewrite/client.go +++ b/app/vmagent/remotewrite/client.go @@ -11,6 +11,8 @@ import ( "sync" "time" + "github.com/VictoriaMetrics/metrics" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" @@ -20,7 +22,6 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil" - "github.com/VictoriaMetrics/metrics" ) var ( @@ -298,6 +299,11 @@ func (c *client) runWorker() { if !ok { return } + if len(block) == 0 { + // skip empty data blocks from sending + // see https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6241 + continue + } go func() { startTime := time.Now() ch <- c.sendBlock(block) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ec40259ad..7340b8fd7 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -51,6 +51,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix calendar display when `UTC+00:00` timezone is set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6239). * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): remove redundant requests on the `Explore Cardinality` page. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6240). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/): prevent potential panic during [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) if more than one `--remoteWrite.streamAggr.dedupInterval` is configured. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6205). +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/): skip empty data blocks before sending to the remote write destination. Thanks to @viperstars for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6241). * BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): set correct suffix `_prometheus` for aggregation outputs [increase_prometheus](https://docs.victoriametrics.com/stream-aggregation/#increase_prometheus) and [total_prometheus](https://docs.victoriametrics.com/stream-aggregation/#total_prometheus). Before, outputs `total` and `total_prometheus` or `increase` and `increase_prometheus` had the same suffix. * BUGFIX: properly estimate the needed memory for query execution if it has the format [`aggr_func`](https://docs.victoriametrics.com/metricsql/#aggregate-functions)([`rollup_func[d]`](https://docs.victoriametrics.com/metricsql/#rollup-functions) (for example, `sum(rate(request_duration_seconds_bucket[5m]))`). This should allow performing aggregations over bigger number of time series when VictoriaMetrics runs in environments with small amounts of available memory. The issue has been introduced in [this commit](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/5138eaeea0791caa34bcfab410e0ca9cd253cd8f) in [v1.83.0](https://docs.victoriametrics.com/changelog_2022/#v1830). * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): correctly apply `-inmemoryDataFlushInterval` when it's set to minimum supported value 1s.