From 4b86522f4c327688ab66bb81e1a24e8b81805da6 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Thu, 13 Jul 2023 21:53:59 -0700
Subject: [PATCH] lib/mergeset: skip common prefix in binarySearchKey()
 function

This should improve performance a bit when the search if performed among items with long common prefix
---
 lib/mergeset/part_search.go | 44 ++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/lib/mergeset/part_search.go b/lib/mergeset/part_search.go
index 65504ec156..8ee302a3cc 100644
--- a/lib/mergeset/part_search.go
+++ b/lib/mergeset/part_search.go
@@ -138,16 +138,7 @@ func (ps *partSearch) Seek(k []byte) {
 	items := ps.ib.items
 	data := ps.ib.data
 	cpLen := commonPrefixLen(ps.ib.commonPrefix, k)
-	if cpLen > 0 {
-		keySuffix := k[cpLen:]
-		ps.ibItemIdx = sort.Search(len(items), func(i int) bool {
-			it := items[i]
-			it.Start += uint32(cpLen)
-			return string(keySuffix) <= it.String(data)
-		})
-	} else {
-		ps.ibItemIdx = binarySearchKey(data, items, k)
-	}
+	ps.ibItemIdx = binarySearchKey(data, items, k, cpLen)
 	if ps.ibItemIdx < len(items) {
 		// The item has been found.
 		return
@@ -165,14 +156,18 @@ func (ps *partSearch) tryFastSeek(k []byte) bool {
 	if ps.ib == nil {
 		return false
 	}
-	data := ps.ib.data
 	items := ps.ib.items
 	idx := ps.ibItemIdx
 	if idx >= len(items) {
 		// The ib is exhausted.
 		return false
 	}
-	if string(k) > items[len(items)-1].String(data) {
+	cpLen := commonPrefixLen(ps.ib.commonPrefix, k)
+	suffix := k[cpLen:]
+	it := items[len(items)-1]
+	it.Start += uint32(cpLen)
+	data := ps.ib.data
+	if string(suffix) > it.String(data) {
 		// The item is located in next blocks.
 		return false
 	}
@@ -181,8 +176,16 @@ func (ps *partSearch) tryFastSeek(k []byte) bool {
 	if idx > 0 {
 		idx--
 	}
-	if string(k) < items[idx].String(data) {
-		if string(k) < items[0].String(data) {
+	it = items[idx]
+	it.Start += uint32(cpLen)
+	if string(suffix) < it.String(data) {
+		items = items[:idx]
+		if len(items) == 0 {
+			return false
+		}
+		it = items[0]
+		it.Start += uint32(cpLen)
+		if string(suffix) < it.String(data) {
 			// The item is located in previous blocks.
 			return false
 		}
@@ -190,7 +193,7 @@ func (ps *partSearch) tryFastSeek(k []byte) bool {
 	}
 
 	// The item is located in the current block
-	ps.ibItemIdx = idx + binarySearchKey(data, items[idx:], k)
+	ps.ibItemIdx = idx + binarySearchKey(data, items[idx:], k, cpLen)
 	return true
 }
 
@@ -330,11 +333,14 @@ func (ps *partSearch) readInmemoryBlock(bh *blockHeader) (*inmemoryBlock, error)
 	return ib, nil
 }
 
-func binarySearchKey(data []byte, items []Item, key []byte) int {
+func binarySearchKey(data []byte, items []Item, k []byte, cpLen int) int {
 	if len(items) == 0 {
 		return 0
 	}
-	if string(key) <= items[0].String(data) {
+	suffix := k[cpLen:]
+	it := items[0]
+	it.Start += uint32(cpLen)
+	if string(suffix) <= it.String(data) {
 		// Fast path - the item is the first.
 		return 0
 	}
@@ -346,7 +352,9 @@ func binarySearchKey(data []byte, items []Item, key []byte) int {
 	i, j := uint(0), n
 	for i < j {
 		h := uint(i+j) >> 1
-		if h >= 0 && h < uint(len(items)) && string(key) > items[h].String(data) {
+		it := items[h]
+		it.Start += uint32(cpLen)
+		if h >= 0 && h < uint(len(items)) && string(suffix) > it.String(data) {
 			i = h + 1
 		} else {
 			j = h