From 5f33445f66efe4097c81ac90bf77c046168f4ebf Mon Sep 17 00:00:00 2001
From: Roman Khavronenko <roman@victoriametrics.com>
Date: Tue, 7 Jun 2022 13:55:09 +0200
Subject: [PATCH] lib/storage: limit max mergeConcurrency value for systems
 with high number of CPUs (#2673)

Workers count for merges affects the max part size during merges. Such behaviour
protects storage from running out of disk space for scenario when all workers
are merging parts with the max size.

This works very well for most cases. But for systems where high number of CPUs
is allocated for vmstorage components this could significantly impact the max
part size and result in more unmerged parts than expected.

While checking multiple production highly loaded setups it was discovered that
`max_over_time(vm_active_merges{type="storage/big}[1h]}"` rarely exceeds 2,
and `max_over_time(vm_active_merges{type="storage/small}[1h]}"` rarely exceeds 4.
The change in this commit limits the max value for concurrency accordingly.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
---
 lib/storage/partition.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/lib/storage/partition.go b/lib/storage/partition.go
index c61e281f10..ddd13fc48e 100644
--- a/lib/storage/partition.go
+++ b/lib/storage/partition.go
@@ -869,10 +869,18 @@ func hasActiveMerges(pws []*partWrapper) bool {
 }
 
 var (
-	bigMergeWorkersCount   = (cgroup.AvailableCPUs() + 1) / 2
-	smallMergeWorkersCount = (cgroup.AvailableCPUs() + 1) / 2
+	bigMergeWorkersCount   = getDefaultMergeConcurrency(4)
+	smallMergeWorkersCount = getDefaultMergeConcurrency(8)
 )
 
+func getDefaultMergeConcurrency(max int) int {
+	v := (cgroup.AvailableCPUs() + 1) / 2
+	if v > max {
+		v = max
+	}
+	return v
+}
+
 // SetBigMergeWorkersCount sets the maximum number of concurrent mergers for big blocks.
 //
 // The function must be called before opening or creating any storage.