mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/mergeset: reduce memory usage for inmemoryBlock by using more compact items representation
This also should reduce CPU time spent by GC, since inmemoryBlock.items don't have pointers now, so GC doesn't need visiting them.
This commit is contained in:
parent
388cdb1980
commit
636c55b526
13 changed files with 248 additions and 150 deletions
|
@ -195,7 +195,8 @@ func (bsr *blockStreamReader) Next() bool {
|
||||||
if err := bsr.readNextBHS(); err != nil {
|
if err := bsr.readNextBHS(); err != nil {
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
// Check the last item.
|
// Check the last item.
|
||||||
lastItem := bsr.Block.items[len(bsr.Block.items)-1]
|
b := &bsr.Block
|
||||||
|
lastItem := b.items[len(b.items)-1].Bytes(b.data)
|
||||||
if string(bsr.ph.lastItem) != string(lastItem) {
|
if string(bsr.ph.lastItem) != string(lastItem) {
|
||||||
err = fmt.Errorf("unexpected last item; got %X; want %X", lastItem, bsr.ph.lastItem)
|
err = fmt.Errorf("unexpected last item; got %X; want %X", lastItem, bsr.ph.lastItem)
|
||||||
}
|
}
|
||||||
|
@ -240,12 +241,13 @@ func (bsr *blockStreamReader) Next() bool {
|
||||||
}
|
}
|
||||||
if !bsr.firstItemChecked {
|
if !bsr.firstItemChecked {
|
||||||
bsr.firstItemChecked = true
|
bsr.firstItemChecked = true
|
||||||
if string(bsr.ph.firstItem) != string(bsr.Block.items[0]) {
|
b := &bsr.Block
|
||||||
bsr.err = fmt.Errorf("unexpected first item; got %X; want %X", bsr.Block.items[0], bsr.ph.firstItem)
|
firstItem := b.items[0].Bytes(b.data)
|
||||||
|
if string(bsr.ph.firstItem) != string(firstItem) {
|
||||||
|
bsr.err = fmt.Errorf("unexpected first item; got %X; want %X", firstItem, bsr.ph.firstItem)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,8 +44,10 @@ func testBlockStreamReaderRead(ip *inmemoryPart, items []string) error {
|
||||||
bsr := newTestBlockStreamReader(ip)
|
bsr := newTestBlockStreamReader(ip)
|
||||||
i := 0
|
i := 0
|
||||||
for bsr.Next() {
|
for bsr.Next() {
|
||||||
for _, item := range bsr.Block.items {
|
data := bsr.Block.data
|
||||||
if string(item) != items[i] {
|
for _, it := range bsr.Block.items {
|
||||||
|
item := it.String(data)
|
||||||
|
if item != items[i] {
|
||||||
return fmt.Errorf("unexpected item[%d]; got %q; want %q", i, item, items[i])
|
return fmt.Errorf("unexpected item[%d]; got %q; want %q", i, item, items[i])
|
||||||
}
|
}
|
||||||
i++
|
i++
|
||||||
|
|
|
@ -3,6 +3,7 @@ package mergeset
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -13,35 +14,62 @@ import (
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
type byteSliceSorter [][]byte
|
// Item represents a single item for storing in a mergeset.
|
||||||
|
type Item struct {
|
||||||
|
// Start is start offset for the item in data.
|
||||||
|
Start uint32
|
||||||
|
|
||||||
func (s byteSliceSorter) Len() int { return len(s) }
|
// End is end offset for the item in data.
|
||||||
func (s byteSliceSorter) Less(i, j int) bool {
|
End uint32
|
||||||
return string(s[i]) < string(s[j])
|
|
||||||
}
|
}
|
||||||
func (s byteSliceSorter) Swap(i, j int) {
|
|
||||||
s[i], s[j] = s[j], s[i]
|
// Bytes returns bytes representation of it obtained from data.
|
||||||
|
//
|
||||||
|
// The returned bytes representation belongs to data.
|
||||||
|
func (it Item) Bytes(data []byte) []byte {
|
||||||
|
sh := (*reflect.SliceHeader)(unsafe.Pointer(&data))
|
||||||
|
sh.Cap = int(it.End - it.Start)
|
||||||
|
sh.Len = int(it.End - it.Start)
|
||||||
|
sh.Data += uintptr(it.Start)
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns string represetnation of it obtained from data.
|
||||||
|
//
|
||||||
|
// The returned string representation belongs to data.
|
||||||
|
func (it Item) String(data []byte) string {
|
||||||
|
sh := (*reflect.SliceHeader)(unsafe.Pointer(&data))
|
||||||
|
sh.Data += uintptr(it.Start)
|
||||||
|
sh.Len = int(it.End - it.Start)
|
||||||
|
return *(*string)(unsafe.Pointer(sh))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ib *inmemoryBlock) Len() int { return len(ib.items) }
|
||||||
|
|
||||||
|
func (ib *inmemoryBlock) Less(i, j int) bool {
|
||||||
|
data := ib.data
|
||||||
|
items := ib.items
|
||||||
|
return string(items[i].Bytes(data)) < string(items[j].Bytes(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ib *inmemoryBlock) Swap(i, j int) {
|
||||||
|
items := ib.items
|
||||||
|
items[i], items[j] = items[j], items[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
type inmemoryBlock struct {
|
type inmemoryBlock struct {
|
||||||
commonPrefix []byte
|
commonPrefix []byte
|
||||||
data []byte
|
data []byte
|
||||||
items byteSliceSorter
|
items []Item
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ib *inmemoryBlock) SizeBytes() int {
|
func (ib *inmemoryBlock) SizeBytes() int {
|
||||||
return int(unsafe.Sizeof(*ib)) + cap(ib.commonPrefix) + cap(ib.data) + cap(ib.items)*int(unsafe.Sizeof([]byte{}))
|
return int(unsafe.Sizeof(*ib)) + cap(ib.commonPrefix) + cap(ib.data) + cap(ib.items)*int(unsafe.Sizeof(Item{}))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ib *inmemoryBlock) Reset() {
|
func (ib *inmemoryBlock) Reset() {
|
||||||
ib.commonPrefix = ib.commonPrefix[:0]
|
ib.commonPrefix = ib.commonPrefix[:0]
|
||||||
ib.data = ib.data[:0]
|
ib.data = ib.data[:0]
|
||||||
|
|
||||||
items := ib.items
|
|
||||||
for i := range items {
|
|
||||||
// Remove reference to by slice, so GC could free the byte slice.
|
|
||||||
items[i] = nil
|
|
||||||
}
|
|
||||||
ib.items = ib.items[:0]
|
ib.items = ib.items[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,12 +78,14 @@ func (ib *inmemoryBlock) updateCommonPrefix() {
|
||||||
if len(ib.items) == 0 {
|
if len(ib.items) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
cp := ib.items[0]
|
items := ib.items
|
||||||
|
data := ib.data
|
||||||
|
cp := items[0].Bytes(data)
|
||||||
if len(cp) == 0 {
|
if len(cp) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, item := range ib.items[1:] {
|
for _, it := range items[1:] {
|
||||||
cpLen := commonPrefixLen(cp, item)
|
cpLen := commonPrefixLen(cp, it.Bytes(data))
|
||||||
if cpLen == 0 {
|
if cpLen == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -82,15 +112,21 @@ func commonPrefixLen(a, b []byte) int {
|
||||||
//
|
//
|
||||||
// false is returned if x isn't added to ib due to block size contraints.
|
// false is returned if x isn't added to ib due to block size contraints.
|
||||||
func (ib *inmemoryBlock) Add(x []byte) bool {
|
func (ib *inmemoryBlock) Add(x []byte) bool {
|
||||||
if len(x)+len(ib.data) > maxInmemoryBlockSize {
|
data := ib.data
|
||||||
|
if len(x)+len(data) > maxInmemoryBlockSize {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if cap(ib.data) < maxInmemoryBlockSize {
|
if cap(data) < maxInmemoryBlockSize {
|
||||||
dataLen := len(ib.data)
|
dataLen := len(data)
|
||||||
ib.data = bytesutil.Resize(ib.data, maxInmemoryBlockSize)[:dataLen]
|
data = bytesutil.Resize(data, maxInmemoryBlockSize)[:dataLen]
|
||||||
}
|
}
|
||||||
ib.data = append(ib.data, x...)
|
dataLen := len(data)
|
||||||
ib.items = append(ib.items, ib.data[len(ib.data)-len(x):])
|
data = append(data, x...)
|
||||||
|
ib.items = append(ib.items, Item{
|
||||||
|
Start: uint32(dataLen),
|
||||||
|
End: uint32(len(data)),
|
||||||
|
})
|
||||||
|
ib.data = data
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,16 +136,21 @@ func (ib *inmemoryBlock) Add(x []byte) bool {
|
||||||
const maxInmemoryBlockSize = 64 * 1024
|
const maxInmemoryBlockSize = 64 * 1024
|
||||||
|
|
||||||
func (ib *inmemoryBlock) sort() {
|
func (ib *inmemoryBlock) sort() {
|
||||||
// Use sort.Sort instead of sort.Slice in order to eliminate memory allocation.
|
sort.Sort(ib)
|
||||||
sort.Sort(&ib.items)
|
data := ib.data
|
||||||
|
items := ib.items
|
||||||
bb := bbPool.Get()
|
bb := bbPool.Get()
|
||||||
b := bytesutil.Resize(bb.B, len(ib.data))
|
b := bytesutil.Resize(bb.B, len(data))
|
||||||
b = b[:0]
|
b = b[:0]
|
||||||
for i, item := range ib.items {
|
for i, it := range items {
|
||||||
b = append(b, item...)
|
bLen := len(b)
|
||||||
ib.items[i] = b[len(b)-len(item):]
|
b = append(b, it.String(data)...)
|
||||||
|
items[i] = Item{
|
||||||
|
Start: uint32(bLen),
|
||||||
|
End: uint32(len(b)),
|
||||||
}
|
}
|
||||||
bb.B, ib.data = ib.data, b
|
}
|
||||||
|
bb.B, ib.data = data, b
|
||||||
bbPool.Put(bb)
|
bbPool.Put(bb)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -140,7 +181,7 @@ func checkMarshalType(mt marshalType) error {
|
||||||
|
|
||||||
func (ib *inmemoryBlock) isSorted() bool {
|
func (ib *inmemoryBlock) isSorted() bool {
|
||||||
// Use sort.IsSorted instead of sort.SliceIsSorted in order to eliminate memory allocation.
|
// Use sort.IsSorted instead of sort.SliceIsSorted in order to eliminate memory allocation.
|
||||||
return sort.IsSorted(&ib.items)
|
return sort.IsSorted(ib)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MarshalUnsortedData marshals unsorted items from ib to sb.
|
// MarshalUnsortedData marshals unsorted items from ib to sb.
|
||||||
|
@ -179,9 +220,11 @@ func (ib *inmemoryBlock) MarshalSortedData(sb *storageBlock, firstItemDst, commo
|
||||||
|
|
||||||
func (ib *inmemoryBlock) debugItemsString() string {
|
func (ib *inmemoryBlock) debugItemsString() string {
|
||||||
var sb strings.Builder
|
var sb strings.Builder
|
||||||
var prevItem []byte
|
var prevItem string
|
||||||
for i, item := range ib.items {
|
data := ib.data
|
||||||
if string(item) < string(prevItem) {
|
for i, it := range ib.items {
|
||||||
|
item := it.String(data)
|
||||||
|
if item < prevItem {
|
||||||
fmt.Fprintf(&sb, "!!! the next item is smaller than the previous item !!!\n")
|
fmt.Fprintf(&sb, "!!! the next item is smaller than the previous item !!!\n")
|
||||||
}
|
}
|
||||||
fmt.Fprintf(&sb, "%05d %X\n", i, item)
|
fmt.Fprintf(&sb, "%05d %X\n", i, item)
|
||||||
|
@ -201,7 +244,9 @@ func (ib *inmemoryBlock) marshalData(sb *storageBlock, firstItemDst, commonPrefi
|
||||||
logger.Panicf("BUG: the number of items in the block must be smaller than %d; got %d items", uint64(1<<32), len(ib.items))
|
logger.Panicf("BUG: the number of items in the block must be smaller than %d; got %d items", uint64(1<<32), len(ib.items))
|
||||||
}
|
}
|
||||||
|
|
||||||
firstItemDst = append(firstItemDst, ib.items[0]...)
|
data := ib.data
|
||||||
|
firstItem := ib.items[0].Bytes(data)
|
||||||
|
firstItemDst = append(firstItemDst, firstItem...)
|
||||||
commonPrefixDst = append(commonPrefixDst, ib.commonPrefix...)
|
commonPrefixDst = append(commonPrefixDst, ib.commonPrefix...)
|
||||||
|
|
||||||
if len(ib.data)-len(ib.commonPrefix)*len(ib.items) < 64 || len(ib.items) < 2 {
|
if len(ib.data)-len(ib.commonPrefix)*len(ib.items) < 64 || len(ib.items) < 2 {
|
||||||
|
@ -221,10 +266,11 @@ func (ib *inmemoryBlock) marshalData(sb *storageBlock, firstItemDst, commonPrefi
|
||||||
defer encoding.PutUint64s(xs)
|
defer encoding.PutUint64s(xs)
|
||||||
|
|
||||||
cpLen := len(ib.commonPrefix)
|
cpLen := len(ib.commonPrefix)
|
||||||
prevItem := ib.items[0][cpLen:]
|
prevItem := firstItem[cpLen:]
|
||||||
prevPrefixLen := uint64(0)
|
prevPrefixLen := uint64(0)
|
||||||
for i, item := range ib.items[1:] {
|
for i, it := range ib.items[1:] {
|
||||||
item := item[cpLen:]
|
it.Start += uint32(cpLen)
|
||||||
|
item := it.Bytes(data)
|
||||||
prefixLen := uint64(commonPrefixLen(prevItem, item))
|
prefixLen := uint64(commonPrefixLen(prevItem, item))
|
||||||
bItems = append(bItems, item[prefixLen:]...)
|
bItems = append(bItems, item[prefixLen:]...)
|
||||||
xLen := prefixLen ^ prevPrefixLen
|
xLen := prefixLen ^ prevPrefixLen
|
||||||
|
@ -240,9 +286,9 @@ func (ib *inmemoryBlock) marshalData(sb *storageBlock, firstItemDst, commonPrefi
|
||||||
bbPool.Put(bbItems)
|
bbPool.Put(bbItems)
|
||||||
|
|
||||||
// Marshal lens data.
|
// Marshal lens data.
|
||||||
prevItemLen := uint64(len(ib.items[0]) - cpLen)
|
prevItemLen := uint64(len(firstItem) - cpLen)
|
||||||
for i, item := range ib.items[1:] {
|
for i, it := range ib.items[1:] {
|
||||||
itemLen := uint64(len(item) - cpLen)
|
itemLen := uint64(int(it.End-it.Start) - cpLen)
|
||||||
xLen := itemLen ^ prevItemLen
|
xLen := itemLen ^ prevItemLen
|
||||||
prevItemLen = itemLen
|
prevItemLen = itemLen
|
||||||
|
|
||||||
|
@ -346,11 +392,15 @@ func (ib *inmemoryBlock) UnmarshalData(sb *storageBlock, firstItem, commonPrefix
|
||||||
}
|
}
|
||||||
data := bytesutil.Resize(ib.data, maxInmemoryBlockSize)
|
data := bytesutil.Resize(ib.data, maxInmemoryBlockSize)
|
||||||
if n := int(itemsCount) - cap(ib.items); n > 0 {
|
if n := int(itemsCount) - cap(ib.items); n > 0 {
|
||||||
ib.items = append(ib.items[:cap(ib.items)], make([][]byte, n)...)
|
ib.items = append(ib.items[:cap(ib.items)], make([]Item, n)...)
|
||||||
}
|
}
|
||||||
ib.items = ib.items[:itemsCount]
|
ib.items = ib.items[:itemsCount]
|
||||||
data = append(data[:0], firstItem...)
|
data = append(data[:0], firstItem...)
|
||||||
ib.items[0] = data
|
items := ib.items
|
||||||
|
items[0] = Item{
|
||||||
|
Start: 0,
|
||||||
|
End: uint32(len(data)),
|
||||||
|
}
|
||||||
prevItem := data[len(commonPrefix):]
|
prevItem := data[len(commonPrefix):]
|
||||||
b := bb.B
|
b := bb.B
|
||||||
for i := 1; i < int(itemsCount); i++ {
|
for i := 1; i < int(itemsCount); i++ {
|
||||||
|
@ -363,17 +413,19 @@ func (ib *inmemoryBlock) UnmarshalData(sb *storageBlock, firstItem, commonPrefix
|
||||||
if uint64(len(b)) < suffixLen {
|
if uint64(len(b)) < suffixLen {
|
||||||
return fmt.Errorf("not enough data for decoding item from itemsData; want %d bytes; remained %d bytes", suffixLen, len(b))
|
return fmt.Errorf("not enough data for decoding item from itemsData; want %d bytes; remained %d bytes", suffixLen, len(b))
|
||||||
}
|
}
|
||||||
data = append(data, commonPrefix...)
|
|
||||||
|
|
||||||
if prefixLen > uint64(len(prevItem)) {
|
if prefixLen > uint64(len(prevItem)) {
|
||||||
return fmt.Errorf("prefixLen cannot exceed %d; got %d", len(prevItem), prefixLen)
|
return fmt.Errorf("prefixLen cannot exceed %d; got %d", len(prevItem), prefixLen)
|
||||||
}
|
}
|
||||||
|
dataLen := len(data)
|
||||||
|
data = append(data, commonPrefix...)
|
||||||
data = append(data, prevItem[:prefixLen]...)
|
data = append(data, prevItem[:prefixLen]...)
|
||||||
data = append(data, b[:suffixLen]...)
|
data = append(data, b[:suffixLen]...)
|
||||||
item := data[len(data)-int(itemLen)-len(commonPrefix):]
|
items[i] = Item{
|
||||||
ib.items[i] = item
|
Start: uint32(dataLen),
|
||||||
|
End: uint32(len(data)),
|
||||||
|
}
|
||||||
b = b[suffixLen:]
|
b = b[suffixLen:]
|
||||||
prevItem = item[len(commonPrefix):]
|
prevItem = data[len(data)-int(itemLen):]
|
||||||
}
|
}
|
||||||
if len(b) > 0 {
|
if len(b) > 0 {
|
||||||
return fmt.Errorf("unexpected tail left after itemsData with len %d: %q", len(b), b)
|
return fmt.Errorf("unexpected tail left after itemsData with len %d: %q", len(b), b)
|
||||||
|
@ -381,30 +433,33 @@ func (ib *inmemoryBlock) UnmarshalData(sb *storageBlock, firstItem, commonPrefix
|
||||||
if uint64(len(data)) != dataLen {
|
if uint64(len(data)) != dataLen {
|
||||||
return fmt.Errorf("unexpected data len; got %d; want %d", len(data), dataLen)
|
return fmt.Errorf("unexpected data len; got %d; want %d", len(data), dataLen)
|
||||||
}
|
}
|
||||||
|
ib.data = data
|
||||||
if !ib.isSorted() {
|
if !ib.isSorted() {
|
||||||
return fmt.Errorf("decoded data block contains unsorted items; items:\n%s", ib.debugItemsString())
|
return fmt.Errorf("decoded data block contains unsorted items; items:\n%s", ib.debugItemsString())
|
||||||
}
|
}
|
||||||
ib.data = data
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var bbPool bytesutil.ByteBufferPool
|
var bbPool bytesutil.ByteBufferPool
|
||||||
|
|
||||||
func (ib *inmemoryBlock) marshalDataPlain(sb *storageBlock) {
|
func (ib *inmemoryBlock) marshalDataPlain(sb *storageBlock) {
|
||||||
|
data := ib.data
|
||||||
|
|
||||||
// Marshal items data.
|
// Marshal items data.
|
||||||
// There is no need in marshaling the first item, since it is returned
|
// There is no need in marshaling the first item, since it is returned
|
||||||
// to the caller in marshalData.
|
// to the caller in marshalData.
|
||||||
cpLen := len(ib.commonPrefix)
|
cpLen := len(ib.commonPrefix)
|
||||||
b := sb.itemsData[:0]
|
b := sb.itemsData[:0]
|
||||||
for _, item := range ib.items[1:] {
|
for _, it := range ib.items[1:] {
|
||||||
b = append(b, item[cpLen:]...)
|
it.Start += uint32(cpLen)
|
||||||
|
b = append(b, it.String(data)...)
|
||||||
}
|
}
|
||||||
sb.itemsData = b
|
sb.itemsData = b
|
||||||
|
|
||||||
// Marshal length data.
|
// Marshal length data.
|
||||||
b = sb.lensData[:0]
|
b = sb.lensData[:0]
|
||||||
for _, item := range ib.items[1:] {
|
for _, it := range ib.items[1:] {
|
||||||
b = encoding.MarshalUint64(b, uint64(len(item)-cpLen))
|
b = encoding.MarshalUint64(b, uint64(int(it.End-it.Start)-cpLen))
|
||||||
}
|
}
|
||||||
sb.lensData = b
|
sb.lensData = b
|
||||||
}
|
}
|
||||||
|
@ -431,26 +486,34 @@ func (ib *inmemoryBlock) unmarshalDataPlain(sb *storageBlock, firstItem []byte,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unmarshal items data.
|
// Unmarshal items data.
|
||||||
ib.data = bytesutil.Resize(ib.data, len(firstItem)+len(sb.itemsData)+len(commonPrefix)*int(itemsCount))
|
data := ib.data
|
||||||
ib.data = append(ib.data[:0], firstItem...)
|
items := ib.items
|
||||||
ib.items = append(ib.items[:0], ib.data)
|
data = bytesutil.Resize(data, len(firstItem)+len(sb.itemsData)+len(commonPrefix)*int(itemsCount))
|
||||||
|
data = append(data[:0], firstItem...)
|
||||||
|
items = append(items[:0], Item{
|
||||||
|
Start: 0,
|
||||||
|
End: uint32(len(data)),
|
||||||
|
})
|
||||||
b = sb.itemsData
|
b = sb.itemsData
|
||||||
for i := 1; i < int(itemsCount); i++ {
|
for i := 1; i < int(itemsCount); i++ {
|
||||||
itemLen := lb.lens[i]
|
itemLen := lb.lens[i]
|
||||||
if uint64(len(b)) < itemLen {
|
if uint64(len(b)) < itemLen {
|
||||||
return fmt.Errorf("not enough data for decoding item from itemsData; want %d bytes; remained %d bytes", itemLen, len(b))
|
return fmt.Errorf("not enough data for decoding item from itemsData; want %d bytes; remained %d bytes", itemLen, len(b))
|
||||||
}
|
}
|
||||||
ib.data = append(ib.data, commonPrefix...)
|
dataLen := len(data)
|
||||||
ib.data = append(ib.data, b[:itemLen]...)
|
data = append(data, commonPrefix...)
|
||||||
item := ib.data[len(ib.data)-int(itemLen)-len(commonPrefix):]
|
data = append(data, b[:itemLen]...)
|
||||||
ib.items = append(ib.items, item)
|
items = append(items, Item{
|
||||||
|
Start: uint32(dataLen),
|
||||||
|
End: uint32(len(data)),
|
||||||
|
})
|
||||||
b = b[itemLen:]
|
b = b[itemLen:]
|
||||||
}
|
}
|
||||||
|
ib.data = data
|
||||||
|
ib.items = items
|
||||||
if len(b) > 0 {
|
if len(b) > 0 {
|
||||||
return fmt.Errorf("unexpected tail left after itemsData with len %d: %q", len(b), b)
|
return fmt.Errorf("unexpected tail left after itemsData with len %d: %q", len(b), b)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,8 +37,10 @@ func TestInmemoryBlockAdd(t *testing.T) {
|
||||||
if len(ib.data) != totalLen {
|
if len(ib.data) != totalLen {
|
||||||
t.Fatalf("unexpected ib.data len; got %d; want %d", len(ib.data), totalLen)
|
t.Fatalf("unexpected ib.data len; got %d; want %d", len(ib.data), totalLen)
|
||||||
}
|
}
|
||||||
for j, item := range ib.items {
|
data := ib.data
|
||||||
if items[j] != string(item) {
|
for j, it := range ib.items {
|
||||||
|
item := it.String(data)
|
||||||
|
if items[j] != item {
|
||||||
t.Fatalf("unexpected item at index %d out of %d, loop %d\ngot\n%X\nwant\n%X", j, len(items), i, item, items[j])
|
t.Fatalf("unexpected item at index %d out of %d, loop %d\ngot\n%X\nwant\n%X", j, len(items), i, item, items[j])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -75,8 +77,10 @@ func TestInmemoryBlockSort(t *testing.T) {
|
||||||
if len(ib.data) != totalLen {
|
if len(ib.data) != totalLen {
|
||||||
t.Fatalf("unexpected ib.data len; got %d; want %d", len(ib.data), totalLen)
|
t.Fatalf("unexpected ib.data len; got %d; want %d", len(ib.data), totalLen)
|
||||||
}
|
}
|
||||||
for j, item := range ib.items {
|
data := ib.data
|
||||||
if items[j] != string(item) {
|
for j, it := range ib.items {
|
||||||
|
item := it.String(data)
|
||||||
|
if items[j] != item {
|
||||||
t.Fatalf("unexpected item at index %d out of %d, loop %d\ngot\n%X\nwant\n%X", j, len(items), i, item, items[j])
|
t.Fatalf("unexpected item at index %d out of %d, loop %d\ngot\n%X\nwant\n%X", j, len(items), i, item, items[j])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -122,8 +126,9 @@ func TestInmemoryBlockMarshalUnmarshal(t *testing.T) {
|
||||||
if int(itemsLen) != len(ib.items) {
|
if int(itemsLen) != len(ib.items) {
|
||||||
t.Fatalf("unexpected number of items marshaled; got %d; want %d", itemsLen, len(ib.items))
|
t.Fatalf("unexpected number of items marshaled; got %d; want %d", itemsLen, len(ib.items))
|
||||||
}
|
}
|
||||||
if string(firstItem) != string(ib.items[0]) {
|
firstItemExpected := ib.items[0].String(ib.data)
|
||||||
t.Fatalf("unexpected the first item\ngot\n%q\nwant\n%q", firstItem, ib.items[0])
|
if string(firstItem) != firstItemExpected {
|
||||||
|
t.Fatalf("unexpected the first item\ngot\n%q\nwant\n%q", firstItem, firstItemExpected)
|
||||||
}
|
}
|
||||||
if err := checkMarshalType(mt); err != nil {
|
if err := checkMarshalType(mt); err != nil {
|
||||||
t.Fatalf("invalid mt: %s", err)
|
t.Fatalf("invalid mt: %s", err)
|
||||||
|
@ -143,12 +148,15 @@ func TestInmemoryBlockMarshalUnmarshal(t *testing.T) {
|
||||||
t.Fatalf("unexpected ib.data len; got %d; want %d", len(ib2.data), totalLen)
|
t.Fatalf("unexpected ib.data len; got %d; want %d", len(ib2.data), totalLen)
|
||||||
}
|
}
|
||||||
for j := range items {
|
for j := range items {
|
||||||
if len(items[j]) != len(ib2.items[j]) {
|
it2 := ib2.items[j]
|
||||||
|
item2 := it2.String(ib2.data)
|
||||||
|
if len(items[j]) != len(item2) {
|
||||||
t.Fatalf("items length mismatch at index %d out of %d, loop %d\ngot\n(len=%d) %X\nwant\n(len=%d) %X",
|
t.Fatalf("items length mismatch at index %d out of %d, loop %d\ngot\n(len=%d) %X\nwant\n(len=%d) %X",
|
||||||
j, len(items), i, len(ib2.items[j]), ib2.items[j], len(items[j]), items[j])
|
j, len(items), i, len(item2), item2, len(items[j]), items[j])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for j, item := range ib2.items {
|
for j, it := range ib2.items {
|
||||||
|
item := it.String(ib2.data)
|
||||||
if items[j] != string(item) {
|
if items[j] != string(item) {
|
||||||
t.Fatalf("unexpected item at index %d out of %d, loop %d\ngot\n(len=%d) %X\nwant\n(len=%d) %X",
|
t.Fatalf("unexpected item at index %d out of %d, loop %d\ngot\n(len=%d) %X\nwant\n(len=%d) %X",
|
||||||
j, len(items), i, len(item), item, len(items[j]), items[j])
|
j, len(items), i, len(item), item, len(items[j]), items[j])
|
||||||
|
|
|
@ -56,8 +56,8 @@ func (ip *inmemoryPart) Init(ib *inmemoryBlock) {
|
||||||
|
|
||||||
ip.ph.itemsCount = uint64(len(ib.items))
|
ip.ph.itemsCount = uint64(len(ib.items))
|
||||||
ip.ph.blocksCount = 1
|
ip.ph.blocksCount = 1
|
||||||
ip.ph.firstItem = append(ip.ph.firstItem[:0], ib.items[0]...)
|
ip.ph.firstItem = append(ip.ph.firstItem[:0], ib.items[0].String(ib.data)...)
|
||||||
ip.ph.lastItem = append(ip.ph.lastItem[:0], ib.items[len(ib.items)-1]...)
|
ip.ph.lastItem = append(ip.ph.lastItem[:0], ib.items[len(ib.items)-1].String(ib.data)...)
|
||||||
|
|
||||||
fs.MustWriteData(&ip.itemsData, ip.sb.itemsData)
|
fs.MustWriteData(&ip.itemsData, ip.sb.itemsData)
|
||||||
ip.bh.itemsBlockOffset = 0
|
ip.bh.itemsBlockOffset = 0
|
||||||
|
|
|
@ -16,7 +16,7 @@ import (
|
||||||
//
|
//
|
||||||
// The callback must return sorted items. The first and the last item must be unchanged.
|
// The callback must return sorted items. The first and the last item must be unchanged.
|
||||||
// The callback can re-use data and items for storing the result.
|
// The callback can re-use data and items for storing the result.
|
||||||
type PrepareBlockCallback func(data []byte, items [][]byte) ([]byte, [][]byte)
|
type PrepareBlockCallback func(data []byte, items []Item) ([]byte, []Item)
|
||||||
|
|
||||||
// mergeBlockStreams merges bsrs and writes result to bsw.
|
// mergeBlockStreams merges bsrs and writes result to bsw.
|
||||||
//
|
//
|
||||||
|
@ -122,8 +122,10 @@ again:
|
||||||
nextItem = bsm.bsrHeap[0].bh.firstItem
|
nextItem = bsm.bsrHeap[0].bh.firstItem
|
||||||
hasNextItem = true
|
hasNextItem = true
|
||||||
}
|
}
|
||||||
|
items := bsr.Block.items
|
||||||
|
data := bsr.Block.data
|
||||||
for bsr.blockItemIdx < len(bsr.Block.items) {
|
for bsr.blockItemIdx < len(bsr.Block.items) {
|
||||||
item := bsr.Block.items[bsr.blockItemIdx]
|
item := items[bsr.blockItemIdx].Bytes(data)
|
||||||
if hasNextItem && string(item) > string(nextItem) {
|
if hasNextItem && string(item) > string(nextItem) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -148,32 +150,36 @@ again:
|
||||||
|
|
||||||
// The next item in the bsr.Block exceeds nextItem.
|
// The next item in the bsr.Block exceeds nextItem.
|
||||||
// Adjust bsr.bh.firstItem and return bsr to heap.
|
// Adjust bsr.bh.firstItem and return bsr to heap.
|
||||||
bsr.bh.firstItem = append(bsr.bh.firstItem[:0], bsr.Block.items[bsr.blockItemIdx]...)
|
bsr.bh.firstItem = append(bsr.bh.firstItem[:0], bsr.Block.items[bsr.blockItemIdx].String(bsr.Block.data)...)
|
||||||
heap.Push(&bsm.bsrHeap, bsr)
|
heap.Push(&bsm.bsrHeap, bsr)
|
||||||
goto again
|
goto again
|
||||||
}
|
}
|
||||||
|
|
||||||
func (bsm *blockStreamMerger) flushIB(bsw *blockStreamWriter, ph *partHeader, itemsMerged *uint64) {
|
func (bsm *blockStreamMerger) flushIB(bsw *blockStreamWriter, ph *partHeader, itemsMerged *uint64) {
|
||||||
if len(bsm.ib.items) == 0 {
|
items := bsm.ib.items
|
||||||
|
data := bsm.ib.data
|
||||||
|
if len(items) == 0 {
|
||||||
// Nothing to flush.
|
// Nothing to flush.
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
atomic.AddUint64(itemsMerged, uint64(len(bsm.ib.items)))
|
atomic.AddUint64(itemsMerged, uint64(len(items)))
|
||||||
if bsm.prepareBlock != nil {
|
if bsm.prepareBlock != nil {
|
||||||
bsm.firstItem = append(bsm.firstItem[:0], bsm.ib.items[0]...)
|
bsm.firstItem = append(bsm.firstItem[:0], items[0].String(data)...)
|
||||||
bsm.lastItem = append(bsm.lastItem[:0], bsm.ib.items[len(bsm.ib.items)-1]...)
|
bsm.lastItem = append(bsm.lastItem[:0], items[len(items)-1].String(data)...)
|
||||||
bsm.ib.data, bsm.ib.items = bsm.prepareBlock(bsm.ib.data, bsm.ib.items)
|
data, items = bsm.prepareBlock(data, items)
|
||||||
if len(bsm.ib.items) == 0 {
|
bsm.ib.data = data
|
||||||
|
bsm.ib.items = items
|
||||||
|
if len(items) == 0 {
|
||||||
// Nothing to flush
|
// Nothing to flush
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Consistency checks after prepareBlock call.
|
// Consistency checks after prepareBlock call.
|
||||||
firstItem := bsm.ib.items[0]
|
firstItem := items[0].String(data)
|
||||||
if string(firstItem) != string(bsm.firstItem) {
|
if firstItem != string(bsm.firstItem) {
|
||||||
logger.Panicf("BUG: prepareBlock must return first item equal to the original first item;\ngot\n%X\nwant\n%X", firstItem, bsm.firstItem)
|
logger.Panicf("BUG: prepareBlock must return first item equal to the original first item;\ngot\n%X\nwant\n%X", firstItem, bsm.firstItem)
|
||||||
}
|
}
|
||||||
lastItem := bsm.ib.items[len(bsm.ib.items)-1]
|
lastItem := items[len(items)-1].String(data)
|
||||||
if string(lastItem) != string(bsm.lastItem) {
|
if lastItem != string(bsm.lastItem) {
|
||||||
logger.Panicf("BUG: prepareBlock must return last item equal to the original last item;\ngot\n%X\nwant\n%X", lastItem, bsm.lastItem)
|
logger.Panicf("BUG: prepareBlock must return last item equal to the original last item;\ngot\n%X\nwant\n%X", lastItem, bsm.lastItem)
|
||||||
}
|
}
|
||||||
// Verify whether the bsm.ib.items are sorted only in tests, since this
|
// Verify whether the bsm.ib.items are sorted only in tests, since this
|
||||||
|
@ -182,12 +188,12 @@ func (bsm *blockStreamMerger) flushIB(bsw *blockStreamWriter, ph *partHeader, it
|
||||||
logger.Panicf("BUG: prepareBlock must return sorted items;\ngot\n%s", bsm.ib.debugItemsString())
|
logger.Panicf("BUG: prepareBlock must return sorted items;\ngot\n%s", bsm.ib.debugItemsString())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ph.itemsCount += uint64(len(bsm.ib.items))
|
ph.itemsCount += uint64(len(items))
|
||||||
if !bsm.phFirstItemCaught {
|
if !bsm.phFirstItemCaught {
|
||||||
ph.firstItem = append(ph.firstItem[:0], bsm.ib.items[0]...)
|
ph.firstItem = append(ph.firstItem[:0], items[0].String(data)...)
|
||||||
bsm.phFirstItemCaught = true
|
bsm.phFirstItemCaught = true
|
||||||
}
|
}
|
||||||
ph.lastItem = append(ph.lastItem[:0], bsm.ib.items[len(bsm.ib.items)-1]...)
|
ph.lastItem = append(ph.lastItem[:0], items[len(items)-1].String(data)...)
|
||||||
bsw.WriteBlock(&bsm.ib)
|
bsw.WriteBlock(&bsm.ib)
|
||||||
bsm.ib.Reset()
|
bsm.ib.Reset()
|
||||||
ph.blocksCount++
|
ph.blocksCount++
|
||||||
|
|
|
@ -157,10 +157,12 @@ func testCheckItems(dstIP *inmemoryPart, items []string) error {
|
||||||
if bh.itemsCount <= 0 {
|
if bh.itemsCount <= 0 {
|
||||||
return fmt.Errorf("unexpected empty block")
|
return fmt.Errorf("unexpected empty block")
|
||||||
}
|
}
|
||||||
if string(bh.firstItem) != string(dstBsr.Block.items[0]) {
|
item := dstBsr.Block.items[0].Bytes(dstBsr.Block.data)
|
||||||
return fmt.Errorf("unexpected blockHeader.firstItem; got %q; want %q", bh.firstItem, dstBsr.Block.items[0])
|
if string(bh.firstItem) != string(item) {
|
||||||
|
return fmt.Errorf("unexpected blockHeader.firstItem; got %q; want %q", bh.firstItem, item)
|
||||||
}
|
}
|
||||||
for _, item := range dstBsr.Block.items {
|
for _, it := range dstBsr.Block.items {
|
||||||
|
item := it.Bytes(dstBsr.Block.data)
|
||||||
dstItems = append(dstItems, string(item))
|
dstItems = append(dstItems, string(item))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -142,14 +142,17 @@ func (ps *partSearch) Seek(k []byte) {
|
||||||
|
|
||||||
// Locate the first item to scan in the block.
|
// Locate the first item to scan in the block.
|
||||||
items := ps.ib.items
|
items := ps.ib.items
|
||||||
|
data := ps.ib.data
|
||||||
cpLen := commonPrefixLen(ps.ib.commonPrefix, k)
|
cpLen := commonPrefixLen(ps.ib.commonPrefix, k)
|
||||||
if cpLen > 0 {
|
if cpLen > 0 {
|
||||||
keySuffix := k[cpLen:]
|
keySuffix := k[cpLen:]
|
||||||
ps.ibItemIdx = sort.Search(len(items), func(i int) bool {
|
ps.ibItemIdx = sort.Search(len(items), func(i int) bool {
|
||||||
return string(keySuffix) <= string(items[i][cpLen:])
|
it := items[i]
|
||||||
|
it.Start += uint32(cpLen)
|
||||||
|
return string(keySuffix) <= it.String(data)
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
ps.ibItemIdx = binarySearchKey(items, k)
|
ps.ibItemIdx = binarySearchKey(data, items, k)
|
||||||
}
|
}
|
||||||
if ps.ibItemIdx < len(items) {
|
if ps.ibItemIdx < len(items) {
|
||||||
// The item has been found.
|
// The item has been found.
|
||||||
|
@ -168,13 +171,14 @@ func (ps *partSearch) tryFastSeek(k []byte) bool {
|
||||||
if ps.ib == nil {
|
if ps.ib == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
data := ps.ib.data
|
||||||
items := ps.ib.items
|
items := ps.ib.items
|
||||||
idx := ps.ibItemIdx
|
idx := ps.ibItemIdx
|
||||||
if idx >= len(items) {
|
if idx >= len(items) {
|
||||||
// The ib is exhausted.
|
// The ib is exhausted.
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if string(k) > string(items[len(items)-1]) {
|
if string(k) > items[len(items)-1].String(data) {
|
||||||
// The item is located in next blocks.
|
// The item is located in next blocks.
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -183,8 +187,8 @@ func (ps *partSearch) tryFastSeek(k []byte) bool {
|
||||||
if idx > 0 {
|
if idx > 0 {
|
||||||
idx--
|
idx--
|
||||||
}
|
}
|
||||||
if string(k) < string(items[idx]) {
|
if string(k) < items[idx].String(data) {
|
||||||
if string(k) < string(items[0]) {
|
if string(k) < items[0].String(data) {
|
||||||
// The item is located in previous blocks.
|
// The item is located in previous blocks.
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -192,7 +196,7 @@ func (ps *partSearch) tryFastSeek(k []byte) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// The item is located in the current block
|
// The item is located in the current block
|
||||||
ps.ibItemIdx = idx + binarySearchKey(items[idx:], k)
|
ps.ibItemIdx = idx + binarySearchKey(data, items[idx:], k)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,10 +208,11 @@ func (ps *partSearch) NextItem() bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if ps.ibItemIdx < len(ps.ib.items) {
|
items := ps.ib.items
|
||||||
|
if ps.ibItemIdx < len(items) {
|
||||||
// Fast path - the current block contains more items.
|
// Fast path - the current block contains more items.
|
||||||
// Proceed to the next item.
|
// Proceed to the next item.
|
||||||
ps.Item = ps.ib.items[ps.ibItemIdx]
|
ps.Item = items[ps.ibItemIdx].Bytes(ps.ib.data)
|
||||||
ps.ibItemIdx++
|
ps.ibItemIdx++
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -219,7 +224,7 @@ func (ps *partSearch) NextItem() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Invariant: len(ps.ib.items) > 0 after nextBlock.
|
// Invariant: len(ps.ib.items) > 0 after nextBlock.
|
||||||
ps.Item = ps.ib.items[0]
|
ps.Item = ps.ib.items[0].Bytes(ps.ib.data)
|
||||||
ps.ibItemIdx++
|
ps.ibItemIdx++
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -319,11 +324,11 @@ func (ps *partSearch) readInmemoryBlock(bh *blockHeader) (*inmemoryBlock, error)
|
||||||
return ib, nil
|
return ib, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func binarySearchKey(items [][]byte, key []byte) int {
|
func binarySearchKey(data []byte, items []Item, key []byte) int {
|
||||||
if len(items) == 0 {
|
if len(items) == 0 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
if string(key) <= string(items[0]) {
|
if string(key) <= items[0].String(data) {
|
||||||
// Fast path - the item is the first.
|
// Fast path - the item is the first.
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
@ -335,7 +340,7 @@ func binarySearchKey(items [][]byte, key []byte) int {
|
||||||
i, j := uint(0), n
|
i, j := uint(0), n
|
||||||
for i < j {
|
for i < j {
|
||||||
h := uint(i+j) >> 1
|
h := uint(i+j) >> 1
|
||||||
if h >= 0 && h < uint(len(items)) && string(key) > string(items[h]) {
|
if h >= 0 && h < uint(len(items)) && string(key) > items[h].String(data) {
|
||||||
i = h + 1
|
i = h + 1
|
||||||
} else {
|
} else {
|
||||||
j = h
|
j = h
|
||||||
|
|
|
@ -46,7 +46,7 @@ func benchmarkTableSearch(b *testing.B, itemsCount int) {
|
||||||
b.Run("sequential-keys-exact", func(b *testing.B) {
|
b.Run("sequential-keys-exact", func(b *testing.B) {
|
||||||
benchmarkTableSearchKeys(b, tb, keys, 0)
|
benchmarkTableSearchKeys(b, tb, keys, 0)
|
||||||
})
|
})
|
||||||
b.Run("sequential-keys-without-siffux", func(b *testing.B) {
|
b.Run("sequential-keys-without-suffix", func(b *testing.B) {
|
||||||
benchmarkTableSearchKeys(b, tb, keys, 4)
|
benchmarkTableSearchKeys(b, tb, keys, 4)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ func benchmarkTableSearch(b *testing.B, itemsCount int) {
|
||||||
b.Run("random-keys-exact", func(b *testing.B) {
|
b.Run("random-keys-exact", func(b *testing.B) {
|
||||||
benchmarkTableSearchKeys(b, tb, randKeys, 0)
|
benchmarkTableSearchKeys(b, tb, randKeys, 0)
|
||||||
})
|
})
|
||||||
b.Run("random-keys-without-siffux", func(b *testing.B) {
|
b.Run("random-keys-without-suffix", func(b *testing.B) {
|
||||||
benchmarkTableSearchKeys(b, tb, randKeys, 4)
|
benchmarkTableSearchKeys(b, tb, randKeys, 4)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -218,7 +218,7 @@ func TestTableAddItemsConcurrent(t *testing.T) {
|
||||||
atomic.AddUint64(&flushes, 1)
|
atomic.AddUint64(&flushes, 1)
|
||||||
}
|
}
|
||||||
var itemsMerged uint64
|
var itemsMerged uint64
|
||||||
prepareBlock := func(data []byte, items [][]byte) ([]byte, [][]byte) {
|
prepareBlock := func(data []byte, items []Item) ([]byte, []Item) {
|
||||||
atomic.AddUint64(&itemsMerged, uint64(len(items)))
|
atomic.AddUint64(&itemsMerged, uint64(len(items)))
|
||||||
return data, items
|
return data, items
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,6 +76,6 @@ func testBlockHeaderMarshalUnmarshal(t *testing.T, bh *blockHeader) {
|
||||||
t.Fatalf("unexpected tail after unmarshaling bh=%+v; got\n%x; want\n%x", bh, tail, suffix)
|
t.Fatalf("unexpected tail after unmarshaling bh=%+v; got\n%x; want\n%x", bh, tail, suffix)
|
||||||
}
|
}
|
||||||
if !reflect.DeepEqual(bh, &bh2) {
|
if !reflect.DeepEqual(bh, &bh2) {
|
||||||
t.Fatalf("unexpected bh unmarshaled after adding siffux; got\n%+v; want\n%+v", &bh2, bh)
|
t.Fatalf("unexpected bh unmarshaled after adding suffix; got\n%+v; want\n%+v", &bh2, bh)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3461,24 +3461,24 @@ func (mp *tagToMetricIDsRowParser) IsDeletedTag(dmis *uint64set.Set) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func mergeTagToMetricIDsRows(data []byte, items [][]byte) ([]byte, [][]byte) {
|
func mergeTagToMetricIDsRows(data []byte, items []mergeset.Item) ([]byte, []mergeset.Item) {
|
||||||
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixTagToMetricIDs)
|
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixTagToMetricIDs)
|
||||||
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixDateTagToMetricIDs)
|
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixDateTagToMetricIDs)
|
||||||
return data, items
|
return data, items
|
||||||
}
|
}
|
||||||
|
|
||||||
func mergeTagToMetricIDsRowsInternal(data []byte, items [][]byte, nsPrefix byte) ([]byte, [][]byte) {
|
func mergeTagToMetricIDsRowsInternal(data []byte, items []mergeset.Item, nsPrefix byte) ([]byte, []mergeset.Item) {
|
||||||
// Perform quick checks whether items contain rows starting from nsPrefix
|
// Perform quick checks whether items contain rows starting from nsPrefix
|
||||||
// based on the fact that items are sorted.
|
// based on the fact that items are sorted.
|
||||||
if len(items) <= 2 {
|
if len(items) <= 2 {
|
||||||
// The first and the last row must remain unchanged.
|
// The first and the last row must remain unchanged.
|
||||||
return data, items
|
return data, items
|
||||||
}
|
}
|
||||||
firstItem := items[0]
|
firstItem := items[0].Bytes(data)
|
||||||
if len(firstItem) > 0 && firstItem[0] > nsPrefix {
|
if len(firstItem) > 0 && firstItem[0] > nsPrefix {
|
||||||
return data, items
|
return data, items
|
||||||
}
|
}
|
||||||
lastItem := items[len(items)-1]
|
lastItem := items[len(items)-1].Bytes(data)
|
||||||
if len(lastItem) > 0 && lastItem[0] < nsPrefix {
|
if len(lastItem) > 0 && lastItem[0] < nsPrefix {
|
||||||
return data, items
|
return data, items
|
||||||
}
|
}
|
||||||
|
@ -3491,14 +3491,18 @@ func mergeTagToMetricIDsRowsInternal(data []byte, items [][]byte, nsPrefix byte)
|
||||||
mpPrev := &tmm.mpPrev
|
mpPrev := &tmm.mpPrev
|
||||||
dstData := data[:0]
|
dstData := data[:0]
|
||||||
dstItems := items[:0]
|
dstItems := items[:0]
|
||||||
for i, item := range items {
|
for i, it := range items {
|
||||||
|
item := it.Bytes(data)
|
||||||
if len(item) == 0 || item[0] != nsPrefix || i == 0 || i == len(items)-1 {
|
if len(item) == 0 || item[0] != nsPrefix || i == 0 || i == len(items)-1 {
|
||||||
// Write rows not starting with nsPrefix as-is.
|
// Write rows not starting with nsPrefix as-is.
|
||||||
// Additionally write the first and the last row as-is in order to preserve
|
// Additionally write the first and the last row as-is in order to preserve
|
||||||
// sort order for adjancent blocks.
|
// sort order for adjancent blocks.
|
||||||
dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
|
dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
|
||||||
dstData = append(dstData, item...)
|
dstData = append(dstData, item...)
|
||||||
dstItems = append(dstItems, dstData[len(dstData)-len(item):])
|
dstItems = append(dstItems, mergeset.Item{
|
||||||
|
Start: uint32(len(dstData) - len(item)),
|
||||||
|
End: uint32(len(dstData)),
|
||||||
|
})
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := mp.Init(item, nsPrefix); err != nil {
|
if err := mp.Init(item, nsPrefix); err != nil {
|
||||||
|
@ -3507,7 +3511,10 @@ func mergeTagToMetricIDsRowsInternal(data []byte, items [][]byte, nsPrefix byte)
|
||||||
if mp.MetricIDsLen() >= maxMetricIDsPerRow {
|
if mp.MetricIDsLen() >= maxMetricIDsPerRow {
|
||||||
dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
|
dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
|
||||||
dstData = append(dstData, item...)
|
dstData = append(dstData, item...)
|
||||||
dstItems = append(dstItems, dstData[len(dstData)-len(item):])
|
dstItems = append(dstItems, mergeset.Item{
|
||||||
|
Start: uint32(len(dstData) - len(item)),
|
||||||
|
End: uint32(len(dstData)),
|
||||||
|
})
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if !mp.EqualPrefix(mpPrev) {
|
if !mp.EqualPrefix(mpPrev) {
|
||||||
|
@ -3523,7 +3530,7 @@ func mergeTagToMetricIDsRowsInternal(data []byte, items [][]byte, nsPrefix byte)
|
||||||
if len(tmm.pendingMetricIDs) > 0 {
|
if len(tmm.pendingMetricIDs) > 0 {
|
||||||
logger.Panicf("BUG: tmm.pendingMetricIDs must be empty at this point; got %d items: %d", len(tmm.pendingMetricIDs), tmm.pendingMetricIDs)
|
logger.Panicf("BUG: tmm.pendingMetricIDs must be empty at this point; got %d items: %d", len(tmm.pendingMetricIDs), tmm.pendingMetricIDs)
|
||||||
}
|
}
|
||||||
if !checkItemsSorted(dstItems) {
|
if !checkItemsSorted(dstData, dstItems) {
|
||||||
// Items could become unsorted if initial items contain duplicate metricIDs:
|
// Items could become unsorted if initial items contain duplicate metricIDs:
|
||||||
//
|
//
|
||||||
// item1: 1, 1, 5
|
// item1: 1, 1, 5
|
||||||
|
@ -3541,15 +3548,8 @@ func mergeTagToMetricIDsRowsInternal(data []byte, items [][]byte, nsPrefix byte)
|
||||||
// into the same new time series from multiple concurrent goroutines.
|
// into the same new time series from multiple concurrent goroutines.
|
||||||
atomic.AddUint64(&indexBlocksWithMetricIDsIncorrectOrder, 1)
|
atomic.AddUint64(&indexBlocksWithMetricIDsIncorrectOrder, 1)
|
||||||
dstData = append(dstData[:0], tmm.dataCopy...)
|
dstData = append(dstData[:0], tmm.dataCopy...)
|
||||||
dstItems = dstItems[:0]
|
dstItems = append(dstItems[:0], tmm.itemsCopy...)
|
||||||
// tmm.itemsCopy can point to overwritten data, so it must be updated
|
if !checkItemsSorted(dstData, dstItems) {
|
||||||
// to point to real data from tmm.dataCopy.
|
|
||||||
buf := dstData
|
|
||||||
for _, item := range tmm.itemsCopy {
|
|
||||||
dstItems = append(dstItems, buf[:len(item)])
|
|
||||||
buf = buf[len(item):]
|
|
||||||
}
|
|
||||||
if !checkItemsSorted(dstItems) {
|
|
||||||
logger.Panicf("BUG: the original items weren't sorted; items=%q", dstItems)
|
logger.Panicf("BUG: the original items weren't sorted; items=%q", dstItems)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3561,13 +3561,14 @@ func mergeTagToMetricIDsRowsInternal(data []byte, items [][]byte, nsPrefix byte)
|
||||||
var indexBlocksWithMetricIDsIncorrectOrder uint64
|
var indexBlocksWithMetricIDsIncorrectOrder uint64
|
||||||
var indexBlocksWithMetricIDsProcessed uint64
|
var indexBlocksWithMetricIDsProcessed uint64
|
||||||
|
|
||||||
func checkItemsSorted(items [][]byte) bool {
|
func checkItemsSorted(data []byte, items []mergeset.Item) bool {
|
||||||
if len(items) == 0 {
|
if len(items) == 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
prevItem := items[0]
|
prevItem := items[0].String(data)
|
||||||
for _, currItem := range items[1:] {
|
for _, it := range items[1:] {
|
||||||
if string(prevItem) > string(currItem) {
|
currItem := it.String(data)
|
||||||
|
if prevItem > currItem {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
prevItem = currItem
|
prevItem = currItem
|
||||||
|
@ -3595,7 +3596,7 @@ type tagToMetricIDsRowsMerger struct {
|
||||||
mp tagToMetricIDsRowParser
|
mp tagToMetricIDsRowParser
|
||||||
mpPrev tagToMetricIDsRowParser
|
mpPrev tagToMetricIDsRowParser
|
||||||
|
|
||||||
itemsCopy [][]byte
|
itemsCopy []mergeset.Item
|
||||||
dataCopy []byte
|
dataCopy []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3608,7 +3609,7 @@ func (tmm *tagToMetricIDsRowsMerger) Reset() {
|
||||||
tmm.dataCopy = tmm.dataCopy[:0]
|
tmm.dataCopy = tmm.dataCopy[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tmm *tagToMetricIDsRowsMerger) flushPendingMetricIDs(dstData []byte, dstItems [][]byte, mp *tagToMetricIDsRowParser) ([]byte, [][]byte) {
|
func (tmm *tagToMetricIDsRowsMerger) flushPendingMetricIDs(dstData []byte, dstItems []mergeset.Item, mp *tagToMetricIDsRowParser) ([]byte, []mergeset.Item) {
|
||||||
if len(tmm.pendingMetricIDs) == 0 {
|
if len(tmm.pendingMetricIDs) == 0 {
|
||||||
// Nothing to flush
|
// Nothing to flush
|
||||||
return dstData, dstItems
|
return dstData, dstItems
|
||||||
|
@ -3623,7 +3624,10 @@ func (tmm *tagToMetricIDsRowsMerger) flushPendingMetricIDs(dstData []byte, dstIt
|
||||||
for _, metricID := range tmm.pendingMetricIDs {
|
for _, metricID := range tmm.pendingMetricIDs {
|
||||||
dstData = encoding.MarshalUint64(dstData, metricID)
|
dstData = encoding.MarshalUint64(dstData, metricID)
|
||||||
}
|
}
|
||||||
dstItems = append(dstItems, dstData[dstDataLen:])
|
dstItems = append(dstItems, mergeset.Item{
|
||||||
|
Start: uint32(dstDataLen),
|
||||||
|
End: uint32(len(dstData)),
|
||||||
|
})
|
||||||
tmm.pendingMetricIDs = tmm.pendingMetricIDs[:0]
|
tmm.pendingMetricIDs = tmm.pendingMetricIDs[:0]
|
||||||
return dstData, dstItems
|
return dstData, dstItems
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ import (
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
|
||||||
)
|
)
|
||||||
|
@ -36,33 +37,38 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
|
||||||
f := func(items []string, expectedItems []string) {
|
f := func(items []string, expectedItems []string) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
var data []byte
|
var data []byte
|
||||||
var itemsB [][]byte
|
var itemsB []mergeset.Item
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
data = append(data, item...)
|
data = append(data, item...)
|
||||||
itemsB = append(itemsB, data[len(data)-len(item):])
|
itemsB = append(itemsB, mergeset.Item{
|
||||||
|
Start: uint32(len(data) - len(item)),
|
||||||
|
End: uint32(len(data)),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
if !checkItemsSorted(itemsB) {
|
if !checkItemsSorted(data, itemsB) {
|
||||||
t.Fatalf("source items aren't sorted; items:\n%q", itemsB)
|
t.Fatalf("source items aren't sorted; items:\n%q", itemsB)
|
||||||
}
|
}
|
||||||
resultData, resultItemsB := mergeTagToMetricIDsRows(data, itemsB)
|
resultData, resultItemsB := mergeTagToMetricIDsRows(data, itemsB)
|
||||||
if len(resultItemsB) != len(expectedItems) {
|
if len(resultItemsB) != len(expectedItems) {
|
||||||
t.Fatalf("unexpected len(resultItemsB); got %d; want %d", len(resultItemsB), len(expectedItems))
|
t.Fatalf("unexpected len(resultItemsB); got %d; want %d", len(resultItemsB), len(expectedItems))
|
||||||
}
|
}
|
||||||
if !checkItemsSorted(resultItemsB) {
|
if !checkItemsSorted(resultData, resultItemsB) {
|
||||||
t.Fatalf("result items aren't sorted; items:\n%q", resultItemsB)
|
t.Fatalf("result items aren't sorted; items:\n%q", resultItemsB)
|
||||||
}
|
}
|
||||||
for i, item := range resultItemsB {
|
buf := resultData
|
||||||
if !bytes.HasPrefix(resultData, item) {
|
for i, it := range resultItemsB {
|
||||||
t.Fatalf("unexpected prefix for resultData #%d;\ngot\n%X\nwant\n%X", i, resultData, item)
|
item := it.Bytes(resultData)
|
||||||
|
if !bytes.HasPrefix(buf, item) {
|
||||||
|
t.Fatalf("unexpected prefix for resultData #%d;\ngot\n%X\nwant\n%X", i, buf, item)
|
||||||
}
|
}
|
||||||
resultData = resultData[len(item):]
|
buf = buf[len(item):]
|
||||||
}
|
}
|
||||||
if len(resultData) != 0 {
|
if len(buf) != 0 {
|
||||||
t.Fatalf("unexpected tail left in resultData: %X", resultData)
|
t.Fatalf("unexpected tail left in resultData: %X", buf)
|
||||||
}
|
}
|
||||||
var resultItems []string
|
var resultItems []string
|
||||||
for _, item := range resultItemsB {
|
for _, it := range resultItemsB {
|
||||||
resultItems = append(resultItems, string(item))
|
resultItems = append(resultItems, string(it.Bytes(resultData)))
|
||||||
}
|
}
|
||||||
if !reflect.DeepEqual(expectedItems, resultItems) {
|
if !reflect.DeepEqual(expectedItems, resultItems) {
|
||||||
t.Fatalf("unexpected items;\ngot\n%X\nwant\n%X", resultItems, expectedItems)
|
t.Fatalf("unexpected items;\ngot\n%X\nwant\n%X", resultItems, expectedItems)
|
||||||
|
|
Loading…
Reference in a new issue