From 0c49552849cc966c348b289ec20775b0ec3b44c8 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 13 Jul 2023 21:53:59 -0700 Subject: [PATCH] lib/mergeset: skip common prefix in binarySearchKey() function This should improve performance a bit when the search if performed among items with long common prefix --- lib/mergeset/part_search.go | 44 ++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/lib/mergeset/part_search.go b/lib/mergeset/part_search.go index 65504ec15..8ee302a3c 100644 --- a/lib/mergeset/part_search.go +++ b/lib/mergeset/part_search.go @@ -138,16 +138,7 @@ func (ps *partSearch) Seek(k []byte) { items := ps.ib.items data := ps.ib.data cpLen := commonPrefixLen(ps.ib.commonPrefix, k) - if cpLen > 0 { - keySuffix := k[cpLen:] - ps.ibItemIdx = sort.Search(len(items), func(i int) bool { - it := items[i] - it.Start += uint32(cpLen) - return string(keySuffix) <= it.String(data) - }) - } else { - ps.ibItemIdx = binarySearchKey(data, items, k) - } + ps.ibItemIdx = binarySearchKey(data, items, k, cpLen) if ps.ibItemIdx < len(items) { // The item has been found. return @@ -165,14 +156,18 @@ func (ps *partSearch) tryFastSeek(k []byte) bool { if ps.ib == nil { return false } - data := ps.ib.data items := ps.ib.items idx := ps.ibItemIdx if idx >= len(items) { // The ib is exhausted. return false } - if string(k) > items[len(items)-1].String(data) { + cpLen := commonPrefixLen(ps.ib.commonPrefix, k) + suffix := k[cpLen:] + it := items[len(items)-1] + it.Start += uint32(cpLen) + data := ps.ib.data + if string(suffix) > it.String(data) { // The item is located in next blocks. return false } @@ -181,8 +176,16 @@ func (ps *partSearch) tryFastSeek(k []byte) bool { if idx > 0 { idx-- } - if string(k) < items[idx].String(data) { - if string(k) < items[0].String(data) { + it = items[idx] + it.Start += uint32(cpLen) + if string(suffix) < it.String(data) { + items = items[:idx] + if len(items) == 0 { + return false + } + it = items[0] + it.Start += uint32(cpLen) + if string(suffix) < it.String(data) { // The item is located in previous blocks. return false } @@ -190,7 +193,7 @@ func (ps *partSearch) tryFastSeek(k []byte) bool { } // The item is located in the current block - ps.ibItemIdx = idx + binarySearchKey(data, items[idx:], k) + ps.ibItemIdx = idx + binarySearchKey(data, items[idx:], k, cpLen) return true } @@ -330,11 +333,14 @@ func (ps *partSearch) readInmemoryBlock(bh *blockHeader) (*inmemoryBlock, error) return ib, nil } -func binarySearchKey(data []byte, items []Item, key []byte) int { +func binarySearchKey(data []byte, items []Item, k []byte, cpLen int) int { if len(items) == 0 { return 0 } - if string(key) <= items[0].String(data) { + suffix := k[cpLen:] + it := items[0] + it.Start += uint32(cpLen) + if string(suffix) <= it.String(data) { // Fast path - the item is the first. return 0 } @@ -346,7 +352,9 @@ func binarySearchKey(data []byte, items []Item, key []byte) int { i, j := uint(0), n for i < j { h := uint(i+j) >> 1 - if h >= 0 && h < uint(len(items)) && string(key) > items[h].String(data) { + it := items[h] + it.Start += uint32(cpLen) + if h >= 0 && h < uint(len(items)) && string(suffix) > it.String(data) { i = h + 1 } else { j = h