From af5bdb9254ec80e64b0ba5a999415de3b91dcd2d Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Thu, 24 Feb 2022 11:17:36 +0200
Subject: [PATCH] lib/mergeset: remove superflouos sorting of
 inmemoryBlock.data at inmemoryBlock.sort()

There is no need to sort the underlying data according to sorted items there.
This should reduce cpu usage when registering new time series in `indexdb`.

Thanks to @ahfuzhang for the suggestion at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2245
---
 lib/mergeset/encoding.go             | 25 +++----------------------
 lib/mergeset/encoding_test.go        |  2 +-
 lib/mergeset/encoding_timing_test.go |  3 ++-
 3 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/lib/mergeset/encoding.go b/lib/mergeset/encoding.go
index ffdeb1eac9..b7774d720b 100644
--- a/lib/mergeset/encoding.go
+++ b/lib/mergeset/encoding.go
@@ -137,25 +137,6 @@ func (ib *inmemoryBlock) Add(x []byte) bool {
 // It must fit CPU cache size, i.e. 64KB for the current CPUs.
 const maxInmemoryBlockSize = 64 * 1024
 
-func (ib *inmemoryBlock) sort() {
-	sort.Sort(ib)
-	data := ib.data
-	items := ib.items
-	bb := bbPool.Get()
-	b := bytesutil.ResizeNoCopyMayOverallocate(bb.B, len(data))
-	b = b[:0]
-	for i, it := range items {
-		bLen := len(b)
-		b = append(b, it.String(data)...)
-		items[i] = Item{
-			Start: uint32(bLen),
-			End:   uint32(len(b)),
-		}
-	}
-	bb.B, ib.data = data, b
-	bbPool.Put(bb)
-}
-
 // storageBlock represents a block of data on the storage.
 type storageBlock struct {
 	itemsData []byte
@@ -195,7 +176,7 @@ func (ib *inmemoryBlock) isSorted() bool {
 // - returns the marshal type used for the encoding.
 func (ib *inmemoryBlock) MarshalUnsortedData(sb *storageBlock, firstItemDst, commonPrefixDst []byte, compressLevel int) ([]byte, []byte, uint32, marshalType) {
 	if !ib.isSorted() {
-		ib.sort()
+		sort.Sort(ib)
 	}
 	ib.updateCommonPrefix()
 	return ib.marshalData(sb, firstItemDst, commonPrefixDst, compressLevel)
@@ -251,7 +232,7 @@ func (ib *inmemoryBlock) marshalData(sb *storageBlock, firstItemDst, commonPrefi
 	firstItemDst = append(firstItemDst, firstItem...)
 	commonPrefixDst = append(commonPrefixDst, ib.commonPrefix...)
 
-	if len(ib.data)-len(ib.commonPrefix)*len(ib.items) < 64 || len(ib.items) < 2 {
+	if len(data)-len(ib.commonPrefix)*len(ib.items) < 64 || len(ib.items) < 2 {
 		// Use plain encoding form small block, since it is cheaper.
 		ib.marshalDataPlain(sb)
 		return firstItemDst, commonPrefixDst, uint32(len(ib.items)), marshalTypePlain
@@ -302,7 +283,7 @@ func (ib *inmemoryBlock) marshalData(sb *storageBlock, firstItemDst, commonPrefi
 	bbLens.B = bLens
 	bbPool.Put(bbLens)
 
-	if float64(len(sb.itemsData)) > 0.9*float64(len(ib.data)-len(ib.commonPrefix)*len(ib.items)) {
+	if float64(len(sb.itemsData)) > 0.9*float64(len(data)-len(ib.commonPrefix)*len(ib.items)) {
 		// Bad compression rate. It is cheaper to use plain encoding.
 		ib.marshalDataPlain(sb)
 		return firstItemDst, commonPrefixDst, uint32(len(ib.items)), marshalTypePlain
diff --git a/lib/mergeset/encoding_test.go b/lib/mergeset/encoding_test.go
index 993c4b6efd..646901a73c 100644
--- a/lib/mergeset/encoding_test.go
+++ b/lib/mergeset/encoding_test.go
@@ -67,7 +67,7 @@ func TestInmemoryBlockSort(t *testing.T) {
 		}
 
 		// Sort ib.
-		ib.sort()
+		sort.Sort(&ib)
 		sort.Strings(items)
 
 		// Verify items are sorted.
diff --git a/lib/mergeset/encoding_timing_test.go b/lib/mergeset/encoding_timing_test.go
index 10774fc085..76369e3e38 100644
--- a/lib/mergeset/encoding_timing_test.go
+++ b/lib/mergeset/encoding_timing_test.go
@@ -2,6 +2,7 @@ package mergeset
 
 import (
 	"fmt"
+	"sort"
 	"testing"
 
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@@ -16,7 +17,7 @@ func BenchmarkInmemoryBlockMarshal(b *testing.B) {
 			b.Fatalf("cannot add more than %d items", i)
 		}
 	}
-	ibSrc.sort()
+	sort.Sort(&ibSrc)
 
 	b.ResetTimer()
 	b.SetBytes(itemsCount)