From 48656dcc383cdd86ad6468dc8a5fa8e834a0fdfd Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Sun, 21 Feb 2021 21:25:58 +0200 Subject: [PATCH] lib/{mergeset,storage}: allow merging smaller number of small parts While this may increase CPU and disk IO usage needed for background merge, this also recudes CPU usage during queries in production. This is because such queries tend to read recently added data and it is better to have lower number of parts for such data in order to reduce CPU usage. This partially reverts ebf8da3730b0823f4b3804fede125f607c340f5b --- lib/mergeset/table.go | 6 ------ lib/storage/partition.go | 6 ------ lib/storage/partition_test.go | 6 ++++-- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/lib/mergeset/table.go b/lib/mergeset/table.go index d666392536..ac57e975da 100644 --- a/lib/mergeset/table.go +++ b/lib/mergeset/table.go @@ -1333,12 +1333,6 @@ func appendPartsToMerge(dst, src []*partWrapper, maxPartsToMerge int, maxItems u for _, pw := range a { itemsSum += pw.p.ph.itemsCount } - if itemsSum < 1e6 && len(a) < maxPartsToMerge { - // Do not merge parts with too small number of items if the number of source parts - // isn't equal to maxPartsToMerge. This should reduce CPU usage and disk IO usage - // for small parts merge. - continue - } if itemsSum > maxItems { // There is no sense in checking the remaining bigger parts. break diff --git a/lib/storage/partition.go b/lib/storage/partition.go index ff2ca08ab8..3d9518c552 100644 --- a/lib/storage/partition.go +++ b/lib/storage/partition.go @@ -1469,12 +1469,6 @@ func appendPartsToMerge(dst, src []*partWrapper, maxPartsToMerge int, maxRows ui continue } rowsCount := getRowsCount(a) - if rowsCount < 1e6 && len(a) < maxPartsToMerge { - // Do not merge parts with too small number of rows if the number of source parts - // isn't equal to maxPartsToMerge. This should reduce CPU usage and disk IO usage - // for small parts merge. - continue - } if rowsCount > maxRows { // There is no need in verifying remaining parts with higher number of rows needFreeSpace = true diff --git a/lib/storage/partition_test.go b/lib/storage/partition_test.go index 0deebbadd5..60c3a84d2d 100644 --- a/lib/storage/partition_test.go +++ b/lib/storage/partition_test.go @@ -26,9 +26,11 @@ func TestAppendPartsToMerge(t *testing.T) { testAppendPartsToMerge(t, 2, []uint64{4, 2, 4}, []uint64{4, 4}) testAppendPartsToMerge(t, 2, []uint64{1, 3, 7, 2}, nil) testAppendPartsToMerge(t, 3, []uint64{1, 3, 7, 2}, []uint64{1, 2, 3}) - testAppendPartsToMerge(t, 4, []uint64{1, 3, 7, 2}, nil) + testAppendPartsToMerge(t, 4, []uint64{1, 3, 7, 2}, []uint64{1, 2, 3}) + testAppendPartsToMerge(t, 5, []uint64{1, 3, 7, 2}, nil) testAppendPartsToMerge(t, 4, []uint64{1e6, 3e6, 7e6, 2e6}, []uint64{1e6, 2e6, 3e6}) - testAppendPartsToMerge(t, 4, []uint64{2, 3, 7, 2}, []uint64{2, 2, 3, 7}) + testAppendPartsToMerge(t, 4, []uint64{2, 3, 7, 2}, []uint64{2, 2, 3}) + testAppendPartsToMerge(t, 5, []uint64{2, 3, 7, 2}, nil) testAppendPartsToMerge(t, 3, []uint64{11, 1, 10, 100, 10}, []uint64{10, 10, 11}) }