mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-11 14:53:49 +00:00
e45556fc05
Previously Block columns wasn't properly limited by maxColumnsPerBlock. And it was possible a case, when more columns per block added than expected. For example, if ingested log stream has many unuqie fields and it's sum exceed maxColumnsPerBlock. We only enforce fieldsPerBlock limit during row parsing, which limits isn't enough to mitigate this issue. Also it would be very expensive to apply maxColumnsPerBlock limit during ingestion, since it requires to track all possible field tags combinations. This commit adds check for maxColumnsPerBlock limit during MustInitFromRows function call. And it returns offset of the rows and timestamps added to the block. Function caller must create another block and ingest remaining rows into it. Related issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7568 ### Describe Your Changes Please provide a brief description of the changes you made. Be as specific as possible to help others understand the purpose and impact of your modifications. ### Checklist The following checks are **mandatory**: - [ ] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). --------- Signed-off-by: f41gh7 <nik@victoriametrics.com> Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
217 lines
4.3 KiB
Go
217 lines
4.3 KiB
Go
package logstorage
|
|
|
|
import (
|
|
"fmt"
|
|
"reflect"
|
|
"testing"
|
|
)
|
|
|
|
func TestBlockMustInitFromRows(t *testing.T) {
|
|
f := func(timestamps []int64, rows [][]Field, bExpected *block) {
|
|
t.Helper()
|
|
b := getBlock()
|
|
defer putBlock(b)
|
|
|
|
offset := b.MustInitFromRows(timestamps, rows)
|
|
if offset != len(rows) {
|
|
t.Fatalf("expected offset: %d to match processed rows: %d", offset, len(rows))
|
|
}
|
|
if b.uncompressedSizeBytes() >= maxUncompressedBlockSize {
|
|
t.Fatalf("expecting non-full block")
|
|
}
|
|
if !reflect.DeepEqual(b, bExpected) {
|
|
t.Fatalf("unexpected block;\ngot\n%v\nwant\n%v", b, bExpected)
|
|
}
|
|
if n := b.Len(); n != len(timestamps) {
|
|
t.Fatalf("unexpected block len; got %d; want %d", n, len(timestamps))
|
|
}
|
|
b.assertValid()
|
|
}
|
|
|
|
// An empty log entries
|
|
f(nil, nil, &block{})
|
|
f([]int64{}, [][]Field{}, &block{})
|
|
|
|
// A single row
|
|
timestamps := []int64{1234}
|
|
rows := [][]Field{
|
|
{
|
|
{
|
|
Name: "msg",
|
|
Value: "foo",
|
|
},
|
|
{
|
|
Name: "level",
|
|
Value: "error",
|
|
},
|
|
},
|
|
}
|
|
bExpected := &block{
|
|
timestamps: []int64{1234},
|
|
constColumns: []Field{
|
|
{
|
|
Name: "level",
|
|
Value: "error",
|
|
},
|
|
{
|
|
Name: "msg",
|
|
Value: "foo",
|
|
},
|
|
},
|
|
}
|
|
f(timestamps, rows, bExpected)
|
|
|
|
// Multiple log entries with the same set of fields
|
|
timestamps = []int64{3, 5}
|
|
rows = [][]Field{
|
|
{
|
|
{
|
|
Name: "job",
|
|
Value: "foo",
|
|
},
|
|
{
|
|
Name: "instance",
|
|
Value: "host1",
|
|
},
|
|
},
|
|
{
|
|
{
|
|
Name: "job",
|
|
Value: "foo",
|
|
},
|
|
{
|
|
Name: "instance",
|
|
Value: "host2",
|
|
},
|
|
},
|
|
}
|
|
bExpected = &block{
|
|
timestamps: []int64{3, 5},
|
|
columns: []column{
|
|
{
|
|
name: "instance",
|
|
values: []string{"host1", "host2"},
|
|
},
|
|
},
|
|
constColumns: []Field{
|
|
{
|
|
Name: "job",
|
|
Value: "foo",
|
|
},
|
|
},
|
|
}
|
|
f(timestamps, rows, bExpected)
|
|
|
|
// Multiple log entries with distinct set of fields
|
|
timestamps = []int64{3, 5, 10}
|
|
rows = [][]Field{
|
|
{
|
|
{
|
|
Name: "msg",
|
|
Value: "foo",
|
|
},
|
|
{
|
|
Name: "b",
|
|
Value: "xyz",
|
|
},
|
|
},
|
|
{
|
|
{
|
|
Name: "b",
|
|
Value: "xyz",
|
|
},
|
|
{
|
|
Name: "a",
|
|
Value: "aaa",
|
|
},
|
|
},
|
|
{
|
|
{
|
|
Name: "b",
|
|
Value: "xyz",
|
|
},
|
|
},
|
|
}
|
|
bExpected = &block{
|
|
timestamps: []int64{3, 5, 10},
|
|
columns: []column{
|
|
{
|
|
name: "a",
|
|
values: []string{"", "aaa", ""},
|
|
},
|
|
{
|
|
name: "msg",
|
|
values: []string{"foo", "", ""},
|
|
},
|
|
},
|
|
constColumns: []Field{
|
|
{
|
|
Name: "b",
|
|
Value: "xyz",
|
|
},
|
|
},
|
|
}
|
|
f(timestamps, rows, bExpected)
|
|
}
|
|
|
|
func TestBlockMustInitFromRowsFullBlock(t *testing.T) {
|
|
const rowsCount = 2000
|
|
timestamps := make([]int64, rowsCount)
|
|
rows := make([][]Field, rowsCount)
|
|
for i := range timestamps {
|
|
fields := make([]Field, 10)
|
|
for j := range fields {
|
|
fields[j] = Field{
|
|
Name: fmt.Sprintf("field_%d", j),
|
|
Value: "very very looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong value",
|
|
}
|
|
}
|
|
rows[i] = fields
|
|
}
|
|
|
|
b := getBlock()
|
|
defer putBlock(b)
|
|
offset := b.MustInitFromRows(timestamps, rows)
|
|
if offset != len(rows) {
|
|
t.Fatalf("expected offset: %d to match processed rows: %d", offset, len(rows))
|
|
}
|
|
b.assertValid()
|
|
if n := b.Len(); n != len(rows) {
|
|
t.Fatalf("unexpected total log entries; got %d; want %d", n, len(rows))
|
|
}
|
|
if n := b.uncompressedSizeBytes(); n < maxUncompressedBlockSize {
|
|
t.Fatalf("expecting full block with %d bytes; got %d bytes", maxUncompressedBlockSize, n)
|
|
}
|
|
}
|
|
|
|
func TestBlockMustInitWithNonEmptyOffset(t *testing.T) {
|
|
f := func(rowsCount int, fieldsPerRow int, expectedOffset int) {
|
|
t.Helper()
|
|
timestamps := make([]int64, rowsCount)
|
|
rows := make([][]Field, rowsCount)
|
|
for i := range timestamps {
|
|
fields := make([]Field, fieldsPerRow)
|
|
for j := range fields {
|
|
fields[j] = Field{
|
|
Name: fmt.Sprintf("field_%d_%d", i, j),
|
|
Value: "very very looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong value",
|
|
}
|
|
}
|
|
rows[i] = fields
|
|
}
|
|
b := getBlock()
|
|
defer putBlock(b)
|
|
offset := b.MustInitFromRows(timestamps, rows)
|
|
if offset != expectedOffset {
|
|
t.Fatalf("unexpected processed rows offset; got %d; want: %d", offset, expectedOffset)
|
|
}
|
|
b.assertValid()
|
|
if n := b.Len(); n != len(rows[:offset]) {
|
|
t.Fatalf("unexpected total log entries; got %d; want %d", n, len(rows[:offset]))
|
|
}
|
|
}
|
|
f(10, 300, 6)
|
|
f(10, 10, 10)
|
|
f(15, 30, 15)
|
|
f(maxColumnsPerBlock+1000, 1, maxColumnsPerBlock)
|
|
}
|