From 48656dcc383cdd86ad6468dc8a5fa8e834a0fdfd Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@gmail.com>
Date: Sun, 21 Feb 2021 21:25:58 +0200
Subject: [PATCH] lib/{mergeset,storage}: allow merging smaller number of small
 parts

While this may increase CPU and disk IO usage needed for background merge,
this also recudes CPU usage during queries in production. This is because
such queries tend to read recently added data and it is better to have lower number
of parts for such data in order to reduce CPU usage.

This partially reverts ebf8da3730b0823f4b3804fede125f607c340f5b
---
 lib/mergeset/table.go         | 6 ------
 lib/storage/partition.go      | 6 ------
 lib/storage/partition_test.go | 6 ++++--
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/lib/mergeset/table.go b/lib/mergeset/table.go
index d666392536..ac57e975da 100644
--- a/lib/mergeset/table.go
+++ b/lib/mergeset/table.go
@@ -1333,12 +1333,6 @@ func appendPartsToMerge(dst, src []*partWrapper, maxPartsToMerge int, maxItems u
 			for _, pw := range a {
 				itemsSum += pw.p.ph.itemsCount
 			}
-			if itemsSum < 1e6 && len(a) < maxPartsToMerge {
-				// Do not merge parts with too small number of items if the number of source parts
-				// isn't equal to maxPartsToMerge. This should reduce CPU usage and disk IO usage
-				// for small parts merge.
-				continue
-			}
 			if itemsSum > maxItems {
 				// There is no sense in checking the remaining bigger parts.
 				break
diff --git a/lib/storage/partition.go b/lib/storage/partition.go
index ff2ca08ab8..3d9518c552 100644
--- a/lib/storage/partition.go
+++ b/lib/storage/partition.go
@@ -1469,12 +1469,6 @@ func appendPartsToMerge(dst, src []*partWrapper, maxPartsToMerge int, maxRows ui
 				continue
 			}
 			rowsCount := getRowsCount(a)
-			if rowsCount < 1e6 && len(a) < maxPartsToMerge {
-				// Do not merge parts with too small number of rows if the number of source parts
-				// isn't equal to maxPartsToMerge. This should reduce CPU usage and disk IO usage
-				// for small parts merge.
-				continue
-			}
 			if rowsCount > maxRows {
 				// There is no need in verifying remaining parts with higher number of rows
 				needFreeSpace = true
diff --git a/lib/storage/partition_test.go b/lib/storage/partition_test.go
index 0deebbadd5..60c3a84d2d 100644
--- a/lib/storage/partition_test.go
+++ b/lib/storage/partition_test.go
@@ -26,9 +26,11 @@ func TestAppendPartsToMerge(t *testing.T) {
 	testAppendPartsToMerge(t, 2, []uint64{4, 2, 4}, []uint64{4, 4})
 	testAppendPartsToMerge(t, 2, []uint64{1, 3, 7, 2}, nil)
 	testAppendPartsToMerge(t, 3, []uint64{1, 3, 7, 2}, []uint64{1, 2, 3})
-	testAppendPartsToMerge(t, 4, []uint64{1, 3, 7, 2}, nil)
+	testAppendPartsToMerge(t, 4, []uint64{1, 3, 7, 2}, []uint64{1, 2, 3})
+	testAppendPartsToMerge(t, 5, []uint64{1, 3, 7, 2}, nil)
 	testAppendPartsToMerge(t, 4, []uint64{1e6, 3e6, 7e6, 2e6}, []uint64{1e6, 2e6, 3e6})
-	testAppendPartsToMerge(t, 4, []uint64{2, 3, 7, 2}, []uint64{2, 2, 3, 7})
+	testAppendPartsToMerge(t, 4, []uint64{2, 3, 7, 2}, []uint64{2, 2, 3})
+	testAppendPartsToMerge(t, 5, []uint64{2, 3, 7, 2}, nil)
 	testAppendPartsToMerge(t, 3, []uint64{11, 1, 10, 100, 10}, []uint64{10, 10, 11})
 }