VictoriaMetrics/lib/logstorage/inmemory_part_test.go
Nikolay e45556fc05
lib/logstorage: fixes panic at Block.MustInitFromRows (#7695)
Previously Block columns wasn't properly limited by maxColumnsPerBlock.
And it was possible a case, when more columns per block added than
expected.
 For example, if ingested log stream has many unuqie fields
and it's sum exceed maxColumnsPerBlock.
 We only enforce fieldsPerBlock limit during row parsing, which limits
isn't enough to mitigate this issue. Also it
would be very expensive to apply maxColumnsPerBlock limit during
ingestion, since it requires to track all possible field tags
combinations.

 This commit adds check for maxColumnsPerBlock limit during
MustInitFromRows function call. And it returns offset of the rows and
timestamps added to the block.
 Function caller must create another block and ingest remaining rows
into it.

Related issue:
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7568

### Describe Your Changes

Please provide a brief description of the changes you made. Be as
specific as possible to help others understand the purpose and impact of
your modifications.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: f41gh7 <nik@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2024-11-30 18:06:58 +01:00

381 lines
12 KiB
Go

package logstorage
import (
"fmt"
"math"
"math/rand"
"reflect"
"sort"
"testing"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
func TestInmemoryPartMustInitFromRows(t *testing.T) {
f := func(lr *LogRows, blocksCountExpected int, compressionRateExpected float64) {
t.Helper()
uncompressedSizeBytesExpected := uncompressedRowsSizeBytes(lr.rows)
rowsCountExpected := len(lr.timestamps)
minTimestampExpected := int64(math.MaxInt64)
maxTimestampExpected := int64(math.MinInt64)
// make a copy of lr - it is used for comapring the results later,
// since lr may be modified by inmemoryPart.mustInitFromRows()
lrOrig := GetLogRows(nil, nil, nil, "")
for i, timestamp := range lr.timestamps {
if timestamp < minTimestampExpected {
minTimestampExpected = timestamp
}
if timestamp > maxTimestampExpected {
maxTimestampExpected = timestamp
}
lrOrig.mustAddInternal(lr.streamIDs[i], timestamp, lr.rows[i], lr.streamTagsCanonicals[i])
}
// Create inmemory part from lr
mp := getInmemoryPart()
mp.mustInitFromRows(lr)
// Check mp.ph
ph := &mp.ph
checkCompressionRate(t, ph, compressionRateExpected)
if ph.UncompressedSizeBytes != uncompressedSizeBytesExpected {
t.Fatalf("unexpected UncompressedSizeBytes in partHeader; got %d; want %d", ph.UncompressedSizeBytes, uncompressedSizeBytesExpected)
}
if ph.RowsCount != uint64(rowsCountExpected) {
t.Fatalf("unexpected rowsCount in partHeader; got %d; want %d", ph.RowsCount, rowsCountExpected)
}
if ph.BlocksCount != uint64(blocksCountExpected) {
t.Fatalf("unexpected blocksCount in partHeader; got %d; want %d", ph.BlocksCount, blocksCountExpected)
}
if ph.RowsCount > 0 {
if ph.MinTimestamp != minTimestampExpected {
t.Fatalf("unexpected minTimestamp in partHeader; got %d; want %d", ph.MinTimestamp, minTimestampExpected)
}
if ph.MaxTimestamp != maxTimestampExpected {
t.Fatalf("unexpected maxTimestamp in partHeader; got %d; want %d", ph.MaxTimestamp, maxTimestampExpected)
}
}
// Read log entries from mp to rrsResult
sbu := getStringsBlockUnmarshaler()
defer putStringsBlockUnmarshaler(sbu)
vd := getValuesDecoder()
defer putValuesDecoder(vd)
lrResult := mp.readLogRows(sbu, vd)
putInmemoryPart(mp)
// compare lrOrig to lrResult
if err := checkEqualRows(lrResult, lrOrig); err != nil {
t.Fatalf("unequal log entries: %s", err)
}
}
f(GetLogRows(nil, nil, nil, ""), 0, 0)
// Check how inmemoryPart works with a single stream
f(newTestLogRows(1, 1, 0), 1, 0.7)
f(newTestLogRows(1, 2, 0), 1, 0.9)
f(newTestLogRows(1, 10, 0), 1, 2.0)
f(newTestLogRows(1, 1000, 0), 1, 7.1)
f(newTestLogRows(1, 20000, 0), 2, 7.2)
// Check how inmemoryPart works with multiple streams
f(newTestLogRows(2, 1, 0), 2, 0.8)
f(newTestLogRows(10, 1, 0), 10, 1.1)
f(newTestLogRows(100, 1, 0), 100, 1.2)
f(newTestLogRows(10, 5, 0), 10, 1.5)
f(newTestLogRows(10, 1000, 0), 10, 7.2)
f(newTestLogRows(100, 100, 0), 100, 5.0)
// check block overflow with unique tag rows
f(newTestLogRowsUniqTags(5, 21, 100), 10, 0.4)
f(newTestLogRowsUniqTags(5, 10, 100), 5, 0.5)
f(newTestLogRowsUniqTags(1, 2001, 1), 2, 1.4)
f(newTestLogRowsUniqTags(15, 20, 250), 45, 0.6)
}
func checkCompressionRate(t *testing.T, ph *partHeader, compressionRateExpected float64) {
t.Helper()
compressionRate := float64(ph.UncompressedSizeBytes) / float64(ph.CompressedSizeBytes)
if math.Abs(compressionRate-compressionRateExpected) > math.Abs(compressionRate+compressionRateExpected)*0.05 {
t.Fatalf("unexpected compression rate; got %.1f; want %.1f", compressionRate, compressionRateExpected)
}
}
func TestInmemoryPartInitFromBlockStreamReaders(t *testing.T) {
f := func(lrs []*LogRows, blocksCountExpected int, compressionRateExpected float64) {
t.Helper()
uncompressedSizeBytesExpected := uint64(0)
rowsCountExpected := 0
minTimestampExpected := int64(math.MaxInt64)
maxTimestampExpected := int64(math.MinInt64)
// make a copy of rrss in order to compare the results after merge.
lrOrig := GetLogRows(nil, nil, nil, "")
for _, lr := range lrs {
uncompressedSizeBytesExpected += uncompressedRowsSizeBytes(lr.rows)
rowsCountExpected += len(lr.timestamps)
for j, timestamp := range lr.timestamps {
if timestamp < minTimestampExpected {
minTimestampExpected = timestamp
}
if timestamp > maxTimestampExpected {
maxTimestampExpected = timestamp
}
lrOrig.mustAddInternal(lr.streamIDs[j], timestamp, lr.rows[j], lr.streamTagsCanonicals[j])
}
}
// Initialize readers from lrs
var mpsSrc []*inmemoryPart
var bsrs []*blockStreamReader
for _, lr := range lrs {
mp := getInmemoryPart()
mp.mustInitFromRows(lr)
mpsSrc = append(mpsSrc, mp)
bsr := getBlockStreamReader()
bsr.MustInitFromInmemoryPart(mp)
bsrs = append(bsrs, bsr)
}
defer func() {
for _, bsr := range bsrs {
putBlockStreamReader(bsr)
}
for _, mp := range mpsSrc {
putInmemoryPart(mp)
}
}()
// Merge data from bsrs into mpDst
mpDst := getInmemoryPart()
bsw := getBlockStreamWriter()
bsw.MustInitForInmemoryPart(mpDst)
mustMergeBlockStreams(&mpDst.ph, bsw, bsrs, nil)
putBlockStreamWriter(bsw)
// Check mpDst.ph stats
ph := &mpDst.ph
checkCompressionRate(t, ph, compressionRateExpected)
if ph.UncompressedSizeBytes != uncompressedSizeBytesExpected {
t.Fatalf("unexpected uncompressedSizeBytes in partHeader; got %d; want %d", ph.UncompressedSizeBytes, uncompressedSizeBytesExpected)
}
if ph.RowsCount != uint64(rowsCountExpected) {
t.Fatalf("unexpected number of entries in partHeader; got %d; want %d", ph.RowsCount, rowsCountExpected)
}
if ph.BlocksCount != uint64(blocksCountExpected) {
t.Fatalf("unexpected blocksCount in partHeader; got %d; want %d", ph.BlocksCount, blocksCountExpected)
}
if ph.RowsCount > 0 {
if ph.MinTimestamp != minTimestampExpected {
t.Fatalf("unexpected minTimestamp in partHeader; got %d; want %d", ph.MinTimestamp, minTimestampExpected)
}
if ph.MaxTimestamp != maxTimestampExpected {
t.Fatalf("unexpected maxTimestamp in partHeader; got %d; want %d", ph.MaxTimestamp, maxTimestampExpected)
}
}
// Read log entries from mpDst to rrsResult
sbu := getStringsBlockUnmarshaler()
defer putStringsBlockUnmarshaler(sbu)
vd := getValuesDecoder()
defer putValuesDecoder(vd)
lrResult := mpDst.readLogRows(sbu, vd)
putInmemoryPart(mpDst)
// compare rrsOrig to rrsResult
if err := checkEqualRows(lrResult, lrOrig); err != nil {
t.Fatalf("unequal log entries: %s", err)
}
}
// Check empty readers
f(nil, 0, 0)
f([]*LogRows{GetLogRows(nil, nil, nil, "")}, 0, 0)
f([]*LogRows{GetLogRows(nil, nil, nil, ""), GetLogRows(nil, nil, nil, "")}, 0, 0)
// Check merge with a single reader
f([]*LogRows{newTestLogRows(1, 1, 0)}, 1, 0.7)
f([]*LogRows{newTestLogRows(1, 10, 0)}, 1, 2.0)
f([]*LogRows{newTestLogRows(1, 100, 0)}, 1, 4.9)
f([]*LogRows{newTestLogRows(1, 1000, 0)}, 1, 7.1)
f([]*LogRows{newTestLogRows(1, 10000, 0)}, 1, 7.4)
f([]*LogRows{newTestLogRows(10, 1, 0)}, 10, 1.1)
f([]*LogRows{newTestLogRows(100, 1, 0)}, 100, 1.3)
f([]*LogRows{newTestLogRows(1000, 1, 0)}, 1000, 1.2)
f([]*LogRows{newTestLogRows(10, 10, 0)}, 10, 2.1)
f([]*LogRows{newTestLogRows(10, 100, 0)}, 10, 4.9)
//Check merge with multiple readers
f([]*LogRows{
newTestLogRows(1, 1, 0),
newTestLogRows(1, 1, 1),
}, 2, 0.9)
f([]*LogRows{
newTestLogRows(2, 2, 0),
newTestLogRows(2, 2, 0),
}, 2, 1.8)
f([]*LogRows{
newTestLogRows(1, 20, 0),
newTestLogRows(1, 10, 1),
newTestLogRows(1, 5, 2),
}, 3, 2.2)
f([]*LogRows{
newTestLogRows(10, 20, 0),
newTestLogRows(20, 10, 1),
newTestLogRows(30, 5, 2),
}, 60, 2.0)
f([]*LogRows{
newTestLogRows(10, 20, 0),
newTestLogRows(20, 10, 1),
newTestLogRows(30, 5, 2),
newTestLogRows(20, 7, 3),
newTestLogRows(10, 9, 4),
}, 90, 1.9)
}
func newTestLogRows(streams, rowsPerStream int, seed int64) *LogRows {
streamTags := []string{
"some-stream-tag",
}
lr := GetLogRows(streamTags, nil, nil, "")
rng := rand.New(rand.NewSource(seed))
var fields []Field
for i := 0; i < streams; i++ {
tenantID := TenantID{
AccountID: rng.Uint32(),
ProjectID: rng.Uint32(),
}
for j := 0; j < rowsPerStream; j++ {
// Add stream tags
fields = append(fields[:0], Field{
Name: "some-stream-tag",
Value: fmt.Sprintf("some-stream-value-%d", i),
})
// Add the remaining tags
for k := 0; k < 5; k++ {
if rng.Float64() < 0.5 {
fields = append(fields, Field{
Name: fmt.Sprintf("field_%d", k),
Value: fmt.Sprintf("value_%d_%d_%d", i, j, k),
})
}
}
// add a message field
fields = append(fields, Field{
Name: "",
Value: fmt.Sprintf("some row number %d at stream %d", j, i),
})
// add a field with constant value
fields = append(fields, Field{
Name: "job",
Value: "foobar",
})
// add a field with uint value
fields = append(fields, Field{
Name: "response_size_bytes",
Value: fmt.Sprintf("%d", rng.Intn(1234)),
})
// shuffle fields in order to check de-shuffling algorithm
rng.Shuffle(len(fields), func(i, j int) {
fields[i], fields[j] = fields[j], fields[i]
})
timestamp := rng.Int63()
lr.MustAdd(tenantID, timestamp, fields)
}
}
return lr
}
func checkEqualRows(lrResult, lrOrig *LogRows) error {
if len(lrResult.timestamps) != len(lrOrig.timestamps) {
return fmt.Errorf("unexpected length LogRows; got %d; want %d", len(lrResult.timestamps), len(lrOrig.timestamps))
}
sort.Sort(lrResult)
sort.Sort(lrOrig)
sortFieldNames := func(fields []Field) {
sort.Slice(fields, func(i, j int) bool {
return fields[i].Name < fields[j].Name
})
}
for i := range lrOrig.timestamps {
if !lrOrig.streamIDs[i].equal(&lrResult.streamIDs[i]) {
return fmt.Errorf("unexpected streamID for log entry %d\ngot\n%s\nwant\n%s", i, &lrResult.streamIDs[i], &lrOrig.streamIDs[i])
}
if lrOrig.timestamps[i] != lrResult.timestamps[i] {
return fmt.Errorf("unexpected timestamp for log entry %d\ngot\n%d\nwant\n%d", i, lrResult.timestamps[i], lrOrig.timestamps[i])
}
fieldsOrig := lrOrig.rows[i]
fieldsResult := lrResult.rows[i]
if len(fieldsOrig) != len(fieldsResult) {
return fmt.Errorf("unexpected number of fields at log entry %d\ngot\n%s\nwant\n%s", i, fieldsResult, fieldsOrig)
}
sortFieldNames(fieldsOrig)
sortFieldNames(fieldsResult)
if !reflect.DeepEqual(fieldsOrig, fieldsResult) {
return fmt.Errorf("unexpected fields for log entry %d\ngot\n%s\nwant\n%s", i, fieldsResult, fieldsOrig)
}
}
return nil
}
// readLogRows reads log entries from mp.
//
// This function is for testing and debugging purposes only.
func (mp *inmemoryPart) readLogRows(sbu *stringsBlockUnmarshaler, vd *valuesDecoder) *LogRows {
lr := GetLogRows(nil, nil, nil, "")
bsr := getBlockStreamReader()
defer putBlockStreamReader(bsr)
bsr.MustInitFromInmemoryPart(mp)
var tmp rows
for bsr.NextBlock() {
bd := &bsr.blockData
streamID := bd.streamID
if err := bd.unmarshalRows(&tmp, sbu, vd); err != nil {
logger.Panicf("BUG: cannot unmarshal log entries from inmemoryPart: %s", err)
}
for i, timestamp := range tmp.timestamps {
lr.MustAdd(streamID.tenantID, timestamp, tmp.rows[i])
lr.streamIDs[len(lr.streamIDs)-1] = streamID
}
tmp.reset()
}
return lr
}
func newTestLogRowsUniqTags(streams, rowsPerStream, uniqFieldsPerRow int) *LogRows {
streamTags := []string{
"some-stream-tag",
}
lr := GetLogRows(streamTags, nil, nil, "")
var fields []Field
for i := 0; i < streams; i++ {
tenantID := TenantID{
AccountID: 0,
ProjectID: 0,
}
for j := 0; j < rowsPerStream; j++ {
// Add stream tags
fields = append(fields[:0], Field{
Name: "some-stream-tag",
Value: fmt.Sprintf("some-stream-value-%d", i),
})
// Add the remaining unique tags
for k := 0; k < uniqFieldsPerRow; k++ {
fields = append(fields, Field{
Name: fmt.Sprintf("field_%d_%d_%d", i, j, k),
Value: fmt.Sprintf("value_%d_%d_%d", i, j, k),
})
}
lr.MustAdd(tenantID, time.Now().UnixMilli(), fields)
}
}
return lr
}