From fdb898f2493a3b2cc56f1c603e46f88ee5465012 Mon Sep 17 00:00:00 2001
From: Roman Khavronenko <roman@victoriametrics.com>
Date: Tue, 22 Oct 2024 14:43:55 +0200
Subject: [PATCH] app/vmalert: update `-remoteWrite.concurrency` and
 `-remoteWrite.flushInterval` (#7272)

Auto-adjust `-remoteWrite.concurrency` cmd-line flags with the number of
available CPU cores in the same way as vmagent does. With this change
the default behavior of vmalert in high-loaded installation should
become more resilient. This change also reduces
`-remoteWrite.flushInterval` from `5s` to `2s` to provide better data
freshness.


---------
Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Nikolay <nik@victoriametrics.com>
---
 app/vmalert/remotewrite/client.go | 10 ++++++----
 app/vmalert/remotewrite/init.go   |  8 ++++----
 docs/changelog/CHANGELOG.md       |  1 +
 docs/vmalert.md                   |  2 +-
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/app/vmalert/remotewrite/client.go b/app/vmalert/remotewrite/client.go
index 836a6ef85a..1b2ec9d380 100644
--- a/app/vmalert/remotewrite/client.go
+++ b/app/vmalert/remotewrite/client.go
@@ -15,6 +15,7 @@ import (
 
 	"github.com/golang/snappy"
 
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
@@ -22,11 +23,12 @@ import (
 	"github.com/VictoriaMetrics/metrics"
 )
 
+var defaultConcurrency = cgroup.AvailableCPUs() * 2
+
 const (
-	defaultConcurrency   = 4
-	defaultMaxBatchSize  = 1e3
-	defaultMaxQueueSize  = 1e5
-	defaultFlushInterval = 5 * time.Second
+	defaultMaxBatchSize  = 1e4
+	defaultMaxQueueSize  = 1e6
+	defaultFlushInterval = 2 * time.Second
 	defaultWriteTimeout  = 30 * time.Second
 )
 
diff --git a/app/vmalert/remotewrite/init.go b/app/vmalert/remotewrite/init.go
index 9112ef5428..aa61cddfc7 100644
--- a/app/vmalert/remotewrite/init.go
+++ b/app/vmalert/remotewrite/init.go
@@ -34,10 +34,10 @@ var (
 
 	idleConnectionTimeout = flag.Duration("remoteWrite.idleConnTimeout", 50*time.Second, `Defines a duration for idle (keep-alive connections) to exist. Consider settings this value less to the value of "-http.idleConnTimeout". It must prevent possible "write: broken pipe" and "read: connection reset by peer" errors.`)
 
-	maxQueueSize  = flag.Int("remoteWrite.maxQueueSize", 1e6, "Defines the max number of pending datapoints to remote write endpoint")
-	maxBatchSize  = flag.Int("remoteWrite.maxBatchSize", 1e4, "Defines max number of timeseries to be flushed at once")
-	concurrency   = flag.Int("remoteWrite.concurrency", 4, "Defines number of writers for concurrent writing into remote write endpoint")
-	flushInterval = flag.Duration("remoteWrite.flushInterval", 5*time.Second, "Defines interval of flushes to remote write endpoint")
+	maxQueueSize  = flag.Int("remoteWrite.maxQueueSize", defaultMaxQueueSize, "Defines the max number of pending datapoints to remote write endpoint")
+	maxBatchSize  = flag.Int("remoteWrite.maxBatchSize", defaultMaxBatchSize, "Defines max number of timeseries to be flushed at once")
+	concurrency   = flag.Int("remoteWrite.concurrency", defaultConcurrency, "Defines number of writers for concurrent writing into remote write endpoint")
+	flushInterval = flag.Duration("remoteWrite.flushInterval", defaultFlushInterval, "Defines interval of flushes to remote write endpoint")
 
 	tlsInsecureSkipVerify = flag.Bool("remoteWrite.tlsInsecureSkipVerify", false, "Whether to skip tls verification when connecting to -remoteWrite.url")
 	tlsCertFile           = flag.String("remoteWrite.tlsCertFile", "", "Optional path to client-side TLS certificate file to use when connecting to -remoteWrite.url")
diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md
index 50d3fb9cf8..02f2715234 100644
--- a/docs/changelog/CHANGELOG.md
+++ b/docs/changelog/CHANGELOG.md
@@ -17,6 +17,7 @@ The sandbox cluster installation runs under the constant load generated by
 See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
 
 ## tip
+* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): `-rule` cmd-line flag now supports multi-document YAML files. This could be useful when rules are retrieved via HTTP URL where multiple rule files were merged together in one response. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6753). Thanks to @Irene-123 for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6995).
 
 * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert): properly set `group_name` and `file` fields for recording rules in `/api/v1/rules`.
 
diff --git a/docs/vmalert.md b/docs/vmalert.md
index 107ff7f88e..cce31e75ab 100644
--- a/docs/vmalert.md
+++ b/docs/vmalert.md
@@ -1373,7 +1373,7 @@ The shortlist of configuration flags is the following:
   -remoteWrite.disablePathAppend
      Whether to disable automatic appending of '/api/v1/write' path to the configured -remoteWrite.url.
   -remoteWrite.flushInterval duration
-     Defines interval of flushes to remote write endpoint (default 5s)
+     Defines interval of flushes to remote write endpoint (default 2s)
   -remoteWrite.headers string
      Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'
   -remoteWrite.idleConnTimeout duration