From 112c9a8118c150bb7281fe6177c672c99b9e8b8e Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Thu, 13 Apr 2023 20:33:33 -0700
Subject: [PATCH] app/vmstorage: deprecate -bigMergeConcurrency command-line
 flag

Improperly configured -bigMergeConcurrency command-line flag usually leads to uncontrolled
growth of unmerged parts, which, in turn, increases CPU usage and query durations.

So it is better deprecating this flag. In rare cases -smallMergeConcurrency command-line flag
can be used instead for controlling the concurrency of background merges.
---
 README.md                             |  4 ++--
 app/vmstorage/main.go                 |  7 ++++---
 docs/Cluster-VictoriaMetrics.md       |  4 ++--
 docs/README.md                        |  4 ++--
 docs/Single-server-VictoriaMetrics.md |  4 ++--
 lib/storage/partition.go              | 23 ++---------------------
 6 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index b1b8452258..8e9c17d308 100644
--- a/README.md
+++ b/README.md
@@ -2130,7 +2130,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
 
 ```
   -bigMergeConcurrency int
-     The maximum number of CPU cores to use for big merges. Default value is used if set to 0
+     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
   -cacheExpireDuration duration
      Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
   -configAuthKey string
@@ -2469,7 +2469,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
   -selfScrapeJob string
      Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics")
   -smallMergeConcurrency int
-     The maximum number of CPU cores to use for small merges. Default value is used if set to 0
+     The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
   -snapshotAuthKey string
      authKey, which must be passed in query string to /snapshot* pages
   -snapshotsMaxAge value
diff --git a/app/vmstorage/main.go b/app/vmstorage/main.go
index 196e245d2b..47ef076047 100644
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -37,8 +37,10 @@ var (
 	finalMergeDelay = flag.Duration("finalMergeDelay", 0, "The delay before starting final merge for per-month partition after no new data is ingested into it. "+
 		"Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. "+
 		"Zero value disables final merge")
-	bigMergeConcurrency     = flag.Int("bigMergeConcurrency", 0, "The maximum number of CPU cores to use for big merges. Default value is used if set to 0")
-	smallMergeConcurrency   = flag.Int("smallMergeConcurrency", 0, "The maximum number of CPU cores to use for small merges. Default value is used if set to 0")
+	_ = flag.Int("bigMergeConcurrency", 0, "Deprecated: this flag does nothing. Please use -smallMergeConcurrency "+
+		"for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage")
+	smallMergeConcurrency = flag.Int("smallMergeConcurrency", 0, "The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . "+
+		"It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries")
 	retentionTimezoneOffset = flag.Duration("retentionTimezoneOffset", 0, "The offset for performing indexdb rotation. "+
 		"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. "+
 		"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)")
@@ -91,7 +93,6 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
 	resetResponseCacheIfNeeded = resetCacheIfNeeded
 	storage.SetLogNewSeries(*logNewSeries)
 	storage.SetFinalMergeDelay(*finalMergeDelay)
-	storage.SetBigMergeWorkersCount(*bigMergeConcurrency)
 	storage.SetMergeWorkersCount(*smallMergeConcurrency)
 	storage.SetRetentionTimezoneOffset(*retentionTimezoneOffset)
 	storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N)
diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md
index 5f147ff607..e085b98ab6 100644
--- a/docs/Cluster-VictoriaMetrics.md
+++ b/docs/Cluster-VictoriaMetrics.md
@@ -1224,7 +1224,7 @@ Below is the output for `/path/to/vmstorage -help`:
 
 ```
   -bigMergeConcurrency int
-     The maximum number of CPU cores to use for big merges. Default value is used if set to 0
+     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
   -cacheExpireDuration duration
      Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
   -cluster.tls
@@ -1355,7 +1355,7 @@ Below is the output for `/path/to/vmstorage -help`:
   -search.maxUniqueTimeseries int
      The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. Zero means 'no limit'. See also -search.max* command-line flags at vmselect
   -smallMergeConcurrency int
-     The maximum number of CPU cores to use for small merges. Default value is used if set to 0
+     The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
   -snapshotAuthKey string
      authKey, which must be passed in query string to /snapshot* pages
   -snapshotsMaxAge value
diff --git a/docs/README.md b/docs/README.md
index bcdc45b09e..e6e7f4e525 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -2131,7 +2131,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
 
 ```
   -bigMergeConcurrency int
-     The maximum number of CPU cores to use for big merges. Default value is used if set to 0
+     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
   -cacheExpireDuration duration
      Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
   -configAuthKey string
@@ -2470,7 +2470,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
   -selfScrapeJob string
      Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics")
   -smallMergeConcurrency int
-     The maximum number of CPU cores to use for small merges. Default value is used if set to 0
+     The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
   -snapshotAuthKey string
      authKey, which must be passed in query string to /snapshot* pages
   -snapshotsMaxAge value
diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md
index 6fcfc14c1c..25299fc4c5 100644
--- a/docs/Single-server-VictoriaMetrics.md
+++ b/docs/Single-server-VictoriaMetrics.md
@@ -2134,7 +2134,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
 
 ```
   -bigMergeConcurrency int
-     The maximum number of CPU cores to use for big merges. Default value is used if set to 0
+     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
   -cacheExpireDuration duration
      Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
   -configAuthKey string
@@ -2473,7 +2473,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
   -selfScrapeJob string
      Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics")
   -smallMergeConcurrency int
-     The maximum number of CPU cores to use for small merges. Default value is used if set to 0
+     The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
   -snapshotAuthKey string
      authKey, which must be passed in query string to /snapshot* pages
   -snapshotsMaxAge value
diff --git a/lib/storage/partition.go b/lib/storage/partition.go
index f9d9f8b51a..37b5e64fac 100644
--- a/lib/storage/partition.go
+++ b/lib/storage/partition.go
@@ -1006,8 +1006,6 @@ func hasActiveMerges(pws []*partWrapper) bool {
 
 var mergeWorkersLimitCh = make(chan struct{}, getDefaultMergeConcurrency(16))
 
-var bigMergeWorkersLimitCh = make(chan struct{}, getDefaultMergeConcurrency(4))
-
 func getDefaultMergeConcurrency(max int) int {
 	v := (cgroup.AvailableCPUs() + 1) / 2
 	if v > max {
@@ -1016,17 +1014,6 @@ func getDefaultMergeConcurrency(max int) int {
 	return adjustMergeWorkersLimit(v)
 }
 
-// SetBigMergeWorkersCount sets the maximum number of concurrent mergers for big blocks.
-//
-// The function must be called before opening or creating any storage.
-func SetBigMergeWorkersCount(n int) {
-	if n <= 0 {
-		// Do nothing
-		return
-	}
-	bigMergeWorkersLimitCh = make(chan struct{}, n)
-}
-
 // SetMergeWorkersCount sets the maximum number of concurrent mergers for parts.
 //
 // The function must be called before opening or creating any storage.
@@ -1145,7 +1132,8 @@ func (pt *partition) getMaxSmallPartSize() uint64 {
 }
 
 func (pt *partition) getMaxBigPartSize() uint64 {
-	return getMaxOutBytes(pt.bigPartsPath, cap(bigMergeWorkersLimitCh))
+	workersCount := getDefaultMergeConcurrency(4)
+	return getMaxOutBytes(pt.bigPartsPath, workersCount)
 }
 
 func getMaxOutBytes(path string, workersCount int) uint64 {
@@ -1278,13 +1266,6 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isFi
 	dstPartType := pt.getDstPartType(pws, isFinal)
 	ptPath, tmpPartPath, mergeIdx := pt.getDstPartPaths(dstPartType)
 
-	if dstPartType == partBig {
-		bigMergeWorkersLimitCh <- struct{}{}
-		defer func() {
-			<-bigMergeWorkersLimitCh
-		}()
-	}
-
 	if !isDedupEnabled() && isFinal && len(pws) == 1 && pws[0].mp != nil {
 		// Fast path: flush a single in-memory part to disk.
 		mp := pws[0].mp