2019-05-22 21:16:55 +00:00
|
|
|
package mergeset
|
|
|
|
|
|
|
|
import (
|
2020-09-17 00:02:35 +00:00
|
|
|
"errors"
|
2019-05-22 21:16:55 +00:00
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
|
|
|
"reflect"
|
|
|
|
"sort"
|
2024-02-23 21:29:23 +00:00
|
|
|
"sync/atomic"
|
2019-05-22 21:16:55 +00:00
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestMergeBlockStreams(t *testing.T) {
|
|
|
|
for _, blocksToMerge := range []int{1, 2, 3, 4, 5, 10, 20} {
|
|
|
|
t.Run(fmt.Sprintf("blocks-%d", blocksToMerge), func(t *testing.T) {
|
|
|
|
for _, maxItemsPerBlock := range []int{1, 2, 10, 100, 1000, 10000} {
|
|
|
|
t.Run(fmt.Sprintf("maxItemsPerBlock-%d", maxItemsPerBlock), func(t *testing.T) {
|
|
|
|
testMergeBlockStreams(t, blocksToMerge, maxItemsPerBlock)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestMultilevelMerge(t *testing.T) {
|
2023-01-24 03:43:39 +00:00
|
|
|
r := rand.New(rand.NewSource(1))
|
|
|
|
|
2019-05-22 21:16:55 +00:00
|
|
|
// Prepare blocks to merge.
|
2023-01-24 03:43:39 +00:00
|
|
|
bsrs, items := newTestInmemoryBlockStreamReaders(r, 10, 4000)
|
2024-02-23 21:29:23 +00:00
|
|
|
var itemsMerged atomic.Uint64
|
2019-05-22 21:16:55 +00:00
|
|
|
|
|
|
|
// First level merge
|
|
|
|
var dstIP1 inmemoryPart
|
|
|
|
var bsw1 blockStreamWriter
|
2023-04-14 22:46:09 +00:00
|
|
|
bsw1.MustInitFromInmemoryPart(&dstIP1, -5)
|
2020-07-30 16:57:25 +00:00
|
|
|
if err := mergeBlockStreams(&dstIP1.ph, &bsw1, bsrs[:5], nil, nil, &itemsMerged); err != nil {
|
2019-05-22 21:16:55 +00:00
|
|
|
t.Fatalf("cannot merge first level part 1: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var dstIP2 inmemoryPart
|
|
|
|
var bsw2 blockStreamWriter
|
2023-04-14 22:46:09 +00:00
|
|
|
bsw2.MustInitFromInmemoryPart(&dstIP2, -5)
|
2020-07-30 16:57:25 +00:00
|
|
|
if err := mergeBlockStreams(&dstIP2.ph, &bsw2, bsrs[5:], nil, nil, &itemsMerged); err != nil {
|
2019-05-22 21:16:55 +00:00
|
|
|
t.Fatalf("cannot merge first level part 2: %s", err)
|
|
|
|
}
|
|
|
|
|
2024-02-23 21:29:23 +00:00
|
|
|
if n := itemsMerged.Load(); n != uint64(len(items)) {
|
|
|
|
t.Fatalf("unexpected itemsMerged; got %d; want %d", n, len(items))
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Second level merge (aka final merge)
|
2024-02-23 21:29:23 +00:00
|
|
|
itemsMerged.Store(0)
|
2019-05-22 21:16:55 +00:00
|
|
|
var dstIP inmemoryPart
|
|
|
|
var bsw blockStreamWriter
|
|
|
|
bsrsTop := []*blockStreamReader{
|
|
|
|
newTestBlockStreamReader(&dstIP1),
|
|
|
|
newTestBlockStreamReader(&dstIP2),
|
|
|
|
}
|
2023-04-14 22:46:09 +00:00
|
|
|
bsw.MustInitFromInmemoryPart(&dstIP, 1)
|
2020-07-30 16:57:25 +00:00
|
|
|
if err := mergeBlockStreams(&dstIP.ph, &bsw, bsrsTop, nil, nil, &itemsMerged); err != nil {
|
2019-05-22 21:16:55 +00:00
|
|
|
t.Fatalf("cannot merge second level: %s", err)
|
|
|
|
}
|
2024-02-23 21:29:23 +00:00
|
|
|
if n := itemsMerged.Load(); n != uint64(len(items)) {
|
|
|
|
t.Fatalf("unexpected itemsMerged after final merge; got %d; want %d", n, len(items))
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Verify the resulting part (dstIP) contains all the items
|
|
|
|
// in the correct order.
|
|
|
|
if err := testCheckItems(&dstIP, items); err != nil {
|
|
|
|
t.Fatalf("error checking items: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestMergeForciblyStop(t *testing.T) {
|
2023-01-24 03:43:39 +00:00
|
|
|
r := rand.New(rand.NewSource(1))
|
|
|
|
bsrs, _ := newTestInmemoryBlockStreamReaders(r, 20, 4000)
|
2019-05-22 21:16:55 +00:00
|
|
|
var dstIP inmemoryPart
|
|
|
|
var bsw blockStreamWriter
|
2023-04-14 22:46:09 +00:00
|
|
|
bsw.MustInitFromInmemoryPart(&dstIP, 1)
|
2019-05-22 21:16:55 +00:00
|
|
|
ch := make(chan struct{})
|
2024-02-23 21:29:23 +00:00
|
|
|
var itemsMerged atomic.Uint64
|
2019-05-22 21:16:55 +00:00
|
|
|
close(ch)
|
2020-09-17 00:02:35 +00:00
|
|
|
if err := mergeBlockStreams(&dstIP.ph, &bsw, bsrs, nil, ch, &itemsMerged); !errors.Is(err, errForciblyStopped) {
|
2019-05-22 21:16:55 +00:00
|
|
|
t.Fatalf("unexpected error during merge: got %v; want %v", err, errForciblyStopped)
|
|
|
|
}
|
2024-02-23 21:29:23 +00:00
|
|
|
if n := itemsMerged.Load(); n != 0 {
|
|
|
|
t.Fatalf("unexpected itemsMerged; got %d; want %d", n, 0)
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func testMergeBlockStreams(t *testing.T, blocksToMerge, maxItemsPerBlock int) {
|
|
|
|
t.Helper()
|
|
|
|
|
2023-01-24 03:43:39 +00:00
|
|
|
r := rand.New(rand.NewSource(1))
|
|
|
|
if err := testMergeBlockStreamsSerial(r, blocksToMerge, maxItemsPerBlock); err != nil {
|
2019-05-22 21:16:55 +00:00
|
|
|
t.Fatalf("unexpected error in serial test: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
const concurrency = 3
|
|
|
|
ch := make(chan error, concurrency)
|
|
|
|
for i := 0; i < concurrency; i++ {
|
2023-01-24 03:43:39 +00:00
|
|
|
go func(n int) {
|
|
|
|
rLocal := rand.New(rand.NewSource(int64(n)))
|
|
|
|
ch <- testMergeBlockStreamsSerial(rLocal, blocksToMerge, maxItemsPerBlock)
|
|
|
|
}(i)
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i < concurrency; i++ {
|
|
|
|
select {
|
|
|
|
case err := <-ch:
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("unexpected error in concurrent test: %s", err)
|
|
|
|
}
|
|
|
|
case <-time.After(10 * time.Second):
|
|
|
|
t.Fatalf("timeout in concurrent test")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-24 03:43:39 +00:00
|
|
|
func testMergeBlockStreamsSerial(r *rand.Rand, blocksToMerge, maxItemsPerBlock int) error {
|
2019-05-22 21:16:55 +00:00
|
|
|
// Prepare blocks to merge.
|
2023-01-24 03:43:39 +00:00
|
|
|
bsrs, items := newTestInmemoryBlockStreamReaders(r, blocksToMerge, maxItemsPerBlock)
|
2019-05-22 21:16:55 +00:00
|
|
|
|
|
|
|
// Merge blocks.
|
2024-02-23 21:29:23 +00:00
|
|
|
var itemsMerged atomic.Uint64
|
2019-05-22 21:16:55 +00:00
|
|
|
var dstIP inmemoryPart
|
|
|
|
var bsw blockStreamWriter
|
2023-04-14 22:46:09 +00:00
|
|
|
bsw.MustInitFromInmemoryPart(&dstIP, -4)
|
2020-07-30 16:57:25 +00:00
|
|
|
if err := mergeBlockStreams(&dstIP.ph, &bsw, bsrs, nil, nil, &itemsMerged); err != nil {
|
2020-06-30 19:58:18 +00:00
|
|
|
return fmt.Errorf("cannot merge block streams: %w", err)
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
2024-02-23 21:29:23 +00:00
|
|
|
if n := itemsMerged.Load(); n != uint64(len(items)) {
|
|
|
|
return fmt.Errorf("unexpected itemsMerged; got %d; want %d", n, len(items))
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Verify the resulting part (dstIP) contains all the items
|
|
|
|
// in the correct order.
|
|
|
|
if err := testCheckItems(&dstIP, items); err != nil {
|
2020-06-30 19:58:18 +00:00
|
|
|
return fmt.Errorf("error checking items: %w", err)
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func testCheckItems(dstIP *inmemoryPart, items []string) error {
|
|
|
|
if int(dstIP.ph.itemsCount) != len(items) {
|
|
|
|
return fmt.Errorf("unexpected number of items in the part; got %d; want %d", dstIP.ph.itemsCount, len(items))
|
|
|
|
}
|
|
|
|
if string(dstIP.ph.firstItem) != string(items[0]) {
|
|
|
|
return fmt.Errorf("unexpected first item; got %q; want %q", dstIP.ph.firstItem, items[0])
|
|
|
|
}
|
|
|
|
if string(dstIP.ph.lastItem) != string(items[len(items)-1]) {
|
|
|
|
return fmt.Errorf("unexpected last item; got %q; want %q", dstIP.ph.lastItem, items[len(items)-1])
|
|
|
|
}
|
|
|
|
|
|
|
|
var dstItems []string
|
|
|
|
dstBsr := newTestBlockStreamReader(dstIP)
|
|
|
|
for dstBsr.Next() {
|
|
|
|
bh := dstBsr.bh
|
|
|
|
if int(bh.itemsCount) != len(dstBsr.Block.items) {
|
|
|
|
return fmt.Errorf("unexpected number of items in the block; got %d; want %d", len(dstBsr.Block.items), bh.itemsCount)
|
|
|
|
}
|
|
|
|
if bh.itemsCount <= 0 {
|
|
|
|
return fmt.Errorf("unexpected empty block")
|
|
|
|
}
|
2021-02-21 20:06:45 +00:00
|
|
|
item := dstBsr.Block.items[0].Bytes(dstBsr.Block.data)
|
|
|
|
if string(bh.firstItem) != string(item) {
|
|
|
|
return fmt.Errorf("unexpected blockHeader.firstItem; got %q; want %q", bh.firstItem, item)
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
2021-02-21 20:06:45 +00:00
|
|
|
for _, it := range dstBsr.Block.items {
|
|
|
|
item := it.Bytes(dstBsr.Block.data)
|
2019-05-22 21:16:55 +00:00
|
|
|
dstItems = append(dstItems, string(item))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err := dstBsr.Error(); err != nil {
|
2020-06-30 19:58:18 +00:00
|
|
|
return fmt.Errorf("unexpected error in dstBsr: %w", err)
|
2019-05-22 21:16:55 +00:00
|
|
|
}
|
|
|
|
if !reflect.DeepEqual(items, dstItems) {
|
|
|
|
return fmt.Errorf("unequal items\ngot\n%q\nwant\n%q", dstItems, items)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-01-24 03:43:39 +00:00
|
|
|
func newTestInmemoryBlockStreamReaders(r *rand.Rand, blocksCount, maxItemsPerBlock int) ([]*blockStreamReader, []string) {
|
2019-05-22 21:16:55 +00:00
|
|
|
var items []string
|
|
|
|
var bsrs []*blockStreamReader
|
|
|
|
for i := 0; i < blocksCount; i++ {
|
|
|
|
var ib inmemoryBlock
|
2023-01-24 03:43:39 +00:00
|
|
|
itemsPerBlock := r.Intn(maxItemsPerBlock) + 1
|
2019-05-22 21:16:55 +00:00
|
|
|
for j := 0; j < itemsPerBlock; j++ {
|
2023-01-24 03:43:39 +00:00
|
|
|
item := getRandomBytes(r)
|
2019-05-22 21:16:55 +00:00
|
|
|
if !ib.Add(item) {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
items = append(items, string(item))
|
|
|
|
}
|
|
|
|
var ip inmemoryPart
|
|
|
|
ip.Init(&ib)
|
|
|
|
bsr := newTestBlockStreamReader(&ip)
|
|
|
|
bsrs = append(bsrs, bsr)
|
|
|
|
}
|
|
|
|
sort.Strings(items)
|
|
|
|
return bsrs, items
|
|
|
|
}
|
|
|
|
|
|
|
|
func newTestBlockStreamReader(ip *inmemoryPart) *blockStreamReader {
|
|
|
|
var bsr blockStreamReader
|
2023-04-14 22:46:09 +00:00
|
|
|
bsr.MustInitFromInmemoryPart(ip)
|
2019-05-22 21:16:55 +00:00
|
|
|
return &bsr
|
|
|
|
}
|