lib/mergeset: remove superflouos sorting of inmemoryBlock.data at inmemoryBlock.sort()

There is no need to sort the underlying data according to sorted items there. This should reduce cpu usage when registering new time series in `indexdb`. Thanks to @ahfuzhang for the suggestion at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2245
2025-01-10 15:14:09 +00:00 · 2022-02-24 11:17:36 +02:00 · 2022-02-24 11:17:36 +02:00 · 1967b9c211
commit 1967b9c211
parent 99ee1c13c0
3 changed files with 6 additions and 24 deletions
--- a/lib/mergeset/encoding.go
+++ b/lib/mergeset/encoding.go
@ -137,25 +137,6 @@ func (ib *inmemoryBlock) Add(x []byte) bool {
 // It must fit CPU cache size, i.e. 64KB for the current CPUs.
 const maxInmemoryBlockSize = 64 * 1024

-func (ib *inmemoryBlock) sort() {
-	sort.Sort(ib)
-	data := ib.data
-	items := ib.items
-	bb := bbPool.Get()
-	b := bytesutil.ResizeNoCopyMayOverallocate(bb.B, len(data))
-	b = b[:0]
-	for i, it := range items {
-		bLen := len(b)
-		b = append(b, it.String(data)...)
-		items[i] = Item{
-			Start: uint32(bLen),
-			End:   uint32(len(b)),
-		}
-	}
-	bb.B, ib.data = data, b
-	bbPool.Put(bb)
-}
-
 // storageBlock represents a block of data on the storage.
 type storageBlock struct {
 	itemsData []byte
@ -195,7 +176,7 @@ func (ib *inmemoryBlock) isSorted() bool {
 // - returns the marshal type used for the encoding.
 func (ib *inmemoryBlock) MarshalUnsortedData(sb *storageBlock, firstItemDst, commonPrefixDst []byte, compressLevel int) ([]byte, []byte, uint32, marshalType) {
 	if !ib.isSorted() {
-		ib.sort()
+		sort.Sort(ib)
 	}
 	ib.updateCommonPrefix()
 	return ib.marshalData(sb, firstItemDst, commonPrefixDst, compressLevel)
@ -251,7 +232,7 @@ func (ib *inmemoryBlock) marshalData(sb *storageBlock, firstItemDst, commonPrefi
 	firstItemDst = append(firstItemDst, firstItem...)
 	commonPrefixDst = append(commonPrefixDst, ib.commonPrefix...)

-	if len(ib.data)-len(ib.commonPrefix)*len(ib.items) < 64 || len(ib.items) < 2 {
+	if len(data)-len(ib.commonPrefix)*len(ib.items) < 64 || len(ib.items) < 2 {
 		// Use plain encoding form small block, since it is cheaper.
 		ib.marshalDataPlain(sb)
 		return firstItemDst, commonPrefixDst, uint32(len(ib.items)), marshalTypePlain
@ -302,7 +283,7 @@ func (ib *inmemoryBlock) marshalData(sb *storageBlock, firstItemDst, commonPrefi
 	bbLens.B = bLens
 	bbPool.Put(bbLens)

-	if float64(len(sb.itemsData)) > 0.9*float64(len(ib.data)-len(ib.commonPrefix)*len(ib.items)) {
+	if float64(len(sb.itemsData)) > 0.9*float64(len(data)-len(ib.commonPrefix)*len(ib.items)) {
 		// Bad compression rate. It is cheaper to use plain encoding.
 		ib.marshalDataPlain(sb)
 		return firstItemDst, commonPrefixDst, uint32(len(ib.items)), marshalTypePlain
--- a/lib/mergeset/encoding_test.go
+++ b/lib/mergeset/encoding_test.go
@ -67,7 +67,7 @@ func TestInmemoryBlockSort(t *testing.T) {
 		}

 		// Sort ib.
-		ib.sort()
+		sort.Sort(&ib)
 		sort.Strings(items)

 		// Verify items are sorted.
--- a/lib/mergeset/encoding_timing_test.go
+++ b/lib/mergeset/encoding_timing_test.go
@ -2,6 +2,7 @@ package mergeset

 import (
 	"fmt"
+	"sort"
 	"testing"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
@ -16,7 +17,7 @@ func BenchmarkInmemoryBlockMarshal(b *testing.B) {
 			b.Fatalf("cannot add more than %d items", i)
 		}
 	}
-	ibSrc.sort()
+	sort.Sort(&ibSrc)

 	b.ResetTimer()
 	b.SetBytes(itemsCount)