From 0204ce942dec5c8182900b629698cf0b46ef4211 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Tue, 22 Oct 2024 14:43:55 +0200 Subject: [PATCH] app/vmalert: update `-remoteWrite.concurrency` and `-remoteWrite.flushInterval` (#7272) Auto-adjust `-remoteWrite.concurrency` cmd-line flags with the number of available CPU cores in the same way as vmagent does. With this change the default behavior of vmalert in high-loaded installation should become more resilient. This change also reduces `-remoteWrite.flushInterval` from `5s` to `2s` to provide better data freshness. --------- Signed-off-by: hagen1778 Co-authored-by: Nikolay --- app/vmalert/remotewrite/client.go | 10 ++++++---- app/vmalert/remotewrite/init.go | 8 ++++---- docs/changelog/CHANGELOG.md | 1 + docs/vmalert.md | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/app/vmalert/remotewrite/client.go b/app/vmalert/remotewrite/client.go index 836a6ef85..1b2ec9d38 100644 --- a/app/vmalert/remotewrite/client.go +++ b/app/vmalert/remotewrite/client.go @@ -15,6 +15,7 @@ import ( "github.com/golang/snappy" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" @@ -22,11 +23,12 @@ import ( "github.com/VictoriaMetrics/metrics" ) +var defaultConcurrency = cgroup.AvailableCPUs() * 2 + const ( - defaultConcurrency = 4 - defaultMaxBatchSize = 1e3 - defaultMaxQueueSize = 1e5 - defaultFlushInterval = 5 * time.Second + defaultMaxBatchSize = 1e4 + defaultMaxQueueSize = 1e6 + defaultFlushInterval = 2 * time.Second defaultWriteTimeout = 30 * time.Second ) diff --git a/app/vmalert/remotewrite/init.go b/app/vmalert/remotewrite/init.go index 9112ef542..aa61cddfc 100644 --- a/app/vmalert/remotewrite/init.go +++ b/app/vmalert/remotewrite/init.go @@ -34,10 +34,10 @@ var ( idleConnectionTimeout = flag.Duration("remoteWrite.idleConnTimeout", 50*time.Second, `Defines a duration for idle (keep-alive connections) to exist. Consider settings this value less to the value of "-http.idleConnTimeout". It must prevent possible "write: broken pipe" and "read: connection reset by peer" errors.`) - maxQueueSize = flag.Int("remoteWrite.maxQueueSize", 1e6, "Defines the max number of pending datapoints to remote write endpoint") - maxBatchSize = flag.Int("remoteWrite.maxBatchSize", 1e4, "Defines max number of timeseries to be flushed at once") - concurrency = flag.Int("remoteWrite.concurrency", 4, "Defines number of writers for concurrent writing into remote write endpoint") - flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint") + maxQueueSize = flag.Int("remoteWrite.maxQueueSize", defaultMaxQueueSize, "Defines the max number of pending datapoints to remote write endpoint") + maxBatchSize = flag.Int("remoteWrite.maxBatchSize", defaultMaxBatchSize, "Defines max number of timeseries to be flushed at once") + concurrency = flag.Int("remoteWrite.concurrency", defaultConcurrency, "Defines number of writers for concurrent writing into remote write endpoint") + flushInterval = flag.Duration("remoteWrite.flushInterval", defaultFlushInterval, "Defines interval of flushes to remote write endpoint") tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url") tlsCertFile = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url") diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md index 50d3fb9cf..02f271523 100644 --- a/docs/changelog/CHANGELOG.md +++ b/docs/changelog/CHANGELOG.md @@ -17,6 +17,7 @@ The sandbox cluster installation runs under the constant load generated by See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). ## tip +* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): `-rule` cmd-line flag now supports multi-document YAML files. This could be useful when rules are retrieved via HTTP URL where multiple rule files were merged together in one response. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6753). Thanks to @Irene-123 for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6995). * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert): properly set `group_name` and `file` fields for recording rules in `/api/v1/rules`. diff --git a/docs/vmalert.md b/docs/vmalert.md index 107ff7f88..cce31e75a 100644 --- a/docs/vmalert.md +++ b/docs/vmalert.md @@ -1373,7 +1373,7 @@ The shortlist of configuration flags is the following: -remoteWrite.disablePathAppend Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url. -remoteWrite.flushInterval duration - Defines interval of flushes to remote write endpoint (default 5s) + Defines interval of flushes to remote write endpoint (default 2s) -remoteWrite.headers string Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2' -remoteWrite.idleConnTimeout duration