From 812dfd9465117e31142fdbeb04dcc4d9fa9d4520 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 13 May 2024 23:25:29 +0200 Subject: [PATCH] wip --- lib/encoding/encoding.go | 8 +++---- lib/encoding/int.go | 23 +++++++----------- lib/encoding/int_test.go | 14 ++++++----- lib/encoding/int_timing_test.go | 16 ++++++------- lib/logstorage/block_header.go | 21 ++++++++-------- lib/logstorage/encoding.go | 3 +-- lib/logstorage/indexdb.go | 3 +-- lib/logstorage/stream_tags.go | 3 +-- lib/promutils/labelscompressor.go | 8 ++++--- lib/storage/block_header.go | 40 +++++++++++++++++++------------ lib/storage/index_db.go | 16 ++++++------- lib/storage/search.go | 32 ++++++++++++------------- lib/streamaggr/streamaggr.go | 19 ++++++++------- 13 files changed, 105 insertions(+), 101 deletions(-) diff --git a/lib/encoding/encoding.go b/lib/encoding/encoding.go index 89168529d..6effdb41c 100644 --- a/lib/encoding/encoding.go +++ b/lib/encoding/encoding.go @@ -231,12 +231,12 @@ func unmarshalInt64Array(dst []int64, src []byte, mt MarshalType, firstValue int return dst, nil case MarshalTypeDeltaConst: v := firstValue - tail, d, err := UnmarshalVarInt64(src) - if err != nil { + d, nLen := UnmarshalVarInt64(src) + if nLen <= 0 { return nil, fmt.Errorf("cannot unmarshal delta value for delta const: %w", err) } - if len(tail) > 0 { - return nil, fmt.Errorf("unexpected trailing data after delta const (d=%d): %d bytes", d, len(tail)) + if nLen < len(src) { + return nil, fmt.Errorf("unexpected trailing data after delta const (d=%d): %d bytes", d, len(src)-nLen) } for itemsCount > 0 { dst = append(dst, v) diff --git a/lib/encoding/int.go b/lib/encoding/int.go index c9f92a50a..110ecaed9 100644 --- a/lib/encoding/int.go +++ b/lib/encoding/int.go @@ -167,16 +167,13 @@ func marshalVarInt64sSlow(dst []byte, vs []int64) []byte { return dst } -// UnmarshalVarInt64 returns unmarshaled int64 from src and returns the remaining tail from src. -func UnmarshalVarInt64(src []byte) ([]byte, int64, error) { +// UnmarshalVarInt64 returns unmarshaled int64 from src and its size in bytes. +func UnmarshalVarInt64(src []byte) (int64, int) { // TODO substitute binary.Uvarint with binary.Varint when benchmark results will show it is faster. // It is slower on amd64/linux Go1.22. - u64, offset := binary.Uvarint(src) - if offset <= 0 { - return src, 0, fmt.Errorf("cannot unmarshal varint") - } + u64, nSize := binary.Uvarint(src) i64 := int64(int64(u64>>1) ^ (int64(u64<<63) >> 63)) - return src[offset:], i64, nil + return i64, nSize } // UnmarshalVarInt64s unmarshals len(dst) int64 values from src to dst and returns the remaining tail from src. @@ -363,13 +360,9 @@ func marshalVarUint64sSlow(dst []byte, us []uint64) []byte { return dst } -// UnmarshalVarUint64 returns unmarshaled uint64 from src and returns the remaining tail from src. -func UnmarshalVarUint64(src []byte) ([]byte, uint64, error) { - u64, offset := binary.Uvarint(src) - if offset <= 0 { - return src, 0, fmt.Errorf("cannot read varuint") - } - return src[offset:], u64, nil +// UnmarshalVarUint64 returns unmarshaled uint64 from src and its size in bytes +func UnmarshalVarUint64(src []byte) (uint64, int) { + return binary.Uvarint(src) } // UnmarshalVarUint64s unmarshals len(dst) uint64 values from src to dst and returns the remaining tail from src. @@ -498,7 +491,7 @@ func MarshalBytes(dst, b []byte) []byte { // UnmarshalBytes returns unmarshaled bytes from src. func UnmarshalBytes(src []byte) ([]byte, []byte, error) { - n, nSize := binary.Uvarint(src) + n, nSize := UnmarshalVarUint64(src) if nSize <= 0 { return nil, nil, fmt.Errorf("cannot unmarshal string size from uvarint") } diff --git a/lib/encoding/int_test.go b/lib/encoding/int_test.go index d38bbe81d..72349287f 100644 --- a/lib/encoding/int_test.go +++ b/lib/encoding/int_test.go @@ -224,10 +224,11 @@ func testMarshalUnmarshalVarInt64(t *testing.T, v int64) { t.Helper() b := MarshalVarInt64(nil, v) - tail, vNew, err := UnmarshalVarInt64(b) - if err != nil { - t.Fatalf("unexpected error when unmarshaling v=%d from b=%x: %s", v, b, err) + vNew, nSize := UnmarshalVarInt64(b) + if nSize <= 0 { + t.Fatalf("unexpected error when unmarshaling v=%d from b=%x", v, b) } + tail := b[nSize:] if vNew != v { t.Fatalf("unexpected vNew from b=%x; got %d; expecting %d", b, vNew, v) } @@ -272,10 +273,11 @@ func testMarshalUnmarshalVarUint64(t *testing.T, u uint64) { t.Helper() b := MarshalVarUint64(nil, u) - tail, uNew, err := UnmarshalVarUint64(b) - if err != nil { - t.Fatalf("unexpected error when unmarshaling u=%d from b=%x: %s", u, b, err) + uNew, nSize := UnmarshalVarUint64(b) + if nSize <= 0 { + t.Fatalf("unexpected error when unmarshaling u=%d from b=%x", u, b) } + tail := b[nSize:] if uNew != u { t.Fatalf("unexpected uNew from b=%x; got %d; expecting %d", b, uNew, u) } diff --git a/lib/encoding/int_timing_test.go b/lib/encoding/int_timing_test.go index 481316e2e..602cf8486 100644 --- a/lib/encoding/int_timing_test.go +++ b/lib/encoding/int_timing_test.go @@ -173,12 +173,12 @@ func benchmarkUnmarshalVarUint64(b *testing.B, maxValue uint64) { for pb.Next() { src := data for len(src) > 0 { - tail, n, err := UnmarshalVarUint64(src) - if err != nil { - panic(fmt.Errorf("unexpected error: %w", err)) + n, nSize := UnmarshalVarUint64(src) + if nSize <= 0 { + panic(fmt.Errorf("unexpected error")) } + src = src[nSize:] sink += n - src = tail } } Sink.Add(sink) @@ -265,12 +265,12 @@ func benchmarkUnmarshalVarInt64(b *testing.B, maxValue int64) { for pb.Next() { src := data for len(src) > 0 { - tail, n, err := UnmarshalVarInt64(src) - if err != nil { - panic(fmt.Errorf("unexpected error: %w", err)) + n, nSize := UnmarshalVarInt64(src) + if nSize <= 0 { + panic(fmt.Errorf("unexpected error")) } + src = src[nSize:] sink += uint64(n) - src = tail } } Sink.Add(sink) diff --git a/lib/logstorage/block_header.go b/lib/logstorage/block_header.go index 0e5525ebb..37b471b3c 100644 --- a/lib/logstorage/block_header.go +++ b/lib/logstorage/block_header.go @@ -1,7 +1,6 @@ package logstorage import ( - "encoding/binary" "fmt" "math" "sync" @@ -86,7 +85,7 @@ func (bh *blockHeader) unmarshal(src []byte) ([]byte, error) { src = tail // unmarshal bh.uncompressedSizeBytes - n, nSize := binary.Uvarint(src) + n, nSize := encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal uncompressedSizeBytes from uvarint") } @@ -94,7 +93,7 @@ func (bh *blockHeader) unmarshal(src []byte) ([]byte, error) { bh.uncompressedSizeBytes = n // unmarshal bh.rowsCount - n, nSize = binary.Uvarint(src) + n, nSize = encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal rowsCount from uvarint") } @@ -112,7 +111,7 @@ func (bh *blockHeader) unmarshal(src []byte) ([]byte, error) { src = tail // unmarshal columnsHeaderOffset - n, nSize = binary.Uvarint(src) + n, nSize = encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal columnsHeaderOffset from uvarint") } @@ -120,7 +119,7 @@ func (bh *blockHeader) unmarshal(src []byte) ([]byte, error) { bh.columnsHeaderOffset = n // unmarshal columnsHeaderSize - n, nSize = binary.Uvarint(src) + n, nSize = encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal columnsHeaderSize from uvarint") } @@ -297,7 +296,7 @@ func (csh *columnsHeader) unmarshal(a *arena, src []byte) error { csh.reset() // unmarshal columnHeaders - n, nSize := binary.Uvarint(src) + n, nSize := encoding.UnmarshalVarUint64(src) if nSize <= 0 { return fmt.Errorf("cannot unmarshal columnHeaders len from uvarint") } @@ -316,7 +315,7 @@ func (csh *columnsHeader) unmarshal(a *arena, src []byte) error { csh.columnHeaders = chs // unmarshal constColumns - n, nSize = binary.Uvarint(src) + n, nSize = encoding.UnmarshalVarUint64(src) if nSize <= 0 { return fmt.Errorf("cannot unmarshal constColumns len from uvarint") } @@ -660,14 +659,14 @@ func (ch *columnHeader) unmarshalValuesAndBloomFilters(src []byte) ([]byte, erro func (ch *columnHeader) unmarshalValues(src []byte) ([]byte, error) { srcOrig := src - n, nSize := binary.Uvarint(src) + n, nSize := encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal valuesOffset from uvarint") } src = src[nSize:] ch.valuesOffset = n - n, nSize = binary.Uvarint(src) + n, nSize = encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal valuesSize from uvarint") } @@ -683,14 +682,14 @@ func (ch *columnHeader) unmarshalValues(src []byte) ([]byte, error) { func (ch *columnHeader) unmarshalBloomFilters(src []byte) ([]byte, error) { srcOrig := src - n, nSize := binary.Uvarint(src) + n, nSize := encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal bloomFilterOffset from uvarint") } src = src[nSize:] ch.bloomFilterOffset = n - n, nSize = binary.Uvarint(src) + n, nSize = encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal bloomFilterSize from uvarint") } diff --git a/lib/logstorage/encoding.go b/lib/logstorage/encoding.go index 3cd222f7d..95b34ae16 100644 --- a/lib/logstorage/encoding.go +++ b/lib/logstorage/encoding.go @@ -1,7 +1,6 @@ package logstorage import ( - "encoding/binary" "fmt" "sync" @@ -280,7 +279,7 @@ func unmarshalBytesBlock(dst, src []byte) ([]byte, []byte, error) { // Compressed block // Read block length - blockLen, nSize := binary.Uvarint(src) + blockLen, nSize := encoding.UnmarshalVarUint64(src) if nSize <= 0 { return dst, src, fmt.Errorf("cannot unmarshal compressed block size from uvarint") } diff --git a/lib/logstorage/indexdb.go b/lib/logstorage/indexdb.go index 82fc141ec..441919f1c 100644 --- a/lib/logstorage/indexdb.go +++ b/lib/logstorage/indexdb.go @@ -2,7 +2,6 @@ package logstorage import ( "bytes" - "encoding/binary" "fmt" "io" "sort" @@ -508,7 +507,7 @@ func (idb *indexdb) loadStreamIDsFromCache(tenantIDs []TenantID, sf *StreamFilte return nil, false } // Cache hit - unpack streamIDs from data. - n, nSize := binary.Uvarint(data) + n, nSize := encoding.UnmarshalVarUint64(data) if nSize <= 0 { logger.Panicf("BUG: cannot unmarshal the number of streamIDs from cache") } diff --git a/lib/logstorage/stream_tags.go b/lib/logstorage/stream_tags.go index d5d3e440a..38a97f9f7 100644 --- a/lib/logstorage/stream_tags.go +++ b/lib/logstorage/stream_tags.go @@ -2,7 +2,6 @@ package logstorage import ( "bytes" - "encoding/binary" "fmt" "sort" "strconv" @@ -120,7 +119,7 @@ func (st *StreamTags) UnmarshalCanonical(src []byte) ([]byte, error) { srcOrig := src - n, nSize := binary.Uvarint(src) + n, nSize := encoding.UnmarshalVarUint64(src) if nSize <= 0 { return srcOrig, fmt.Errorf("cannot unmarshal tags len from uvarint") } diff --git a/lib/promutils/labelscompressor.go b/lib/promutils/labelscompressor.go index a135d8e92..391b8f3c5 100644 --- a/lib/promutils/labelscompressor.go +++ b/lib/promutils/labelscompressor.go @@ -91,10 +91,11 @@ func cloneLabel(label prompbmarshal.Label) prompbmarshal.Label { // // It is safe calling Decompress from concurrent goroutines. func (lc *LabelsCompressor) Decompress(dst []prompbmarshal.Label, src []byte) []prompbmarshal.Label { - tail, labelsLen, err := encoding.UnmarshalVarUint64(src) - if err != nil { - logger.Panicf("BUG: cannot unmarshal labels length: %s", err) + labelsLen, nSize := encoding.UnmarshalVarUint64(src) + if nSize <= 0 { + logger.Panicf("BUG: cannot unmarshal labels length from uvarint") } + tail := src[nSize:] if labelsLen == 0 { // fast path - nothing to decode if len(tail) > 0 { @@ -104,6 +105,7 @@ func (lc *LabelsCompressor) Decompress(dst []prompbmarshal.Label, src []byte) [] } a := encoding.GetUint64s(int(labelsLen)) + var err error tail, err = encoding.UnmarshalVarUint64s(a.A, tail) if err != nil { logger.Panicf("BUG: cannot unmarshal label indexes: %s", err) diff --git a/lib/storage/block_header.go b/lib/storage/block_header.go index f7689f5f1..293411464 100644 --- a/lib/storage/block_header.go +++ b/lib/storage/block_header.go @@ -167,33 +167,42 @@ func (bh *blockHeader) marshalPortable(dst []byte) []byte { } func (bh *blockHeader) unmarshalPortable(src []byte) ([]byte, error) { - src, minTimestamp, err := encoding.UnmarshalVarInt64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal firstTimestamp: %w", err) + minTimestamp, nSize := encoding.UnmarshalVarInt64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal firstTimestamp from varint") } + src = src[nSize:] bh.MinTimestamp = minTimestamp - src, maxTimestamp, err := encoding.UnmarshalVarInt64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal firstTimestamp: %w", err) + + maxTimestamp, nSize := encoding.UnmarshalVarInt64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal firstTimestamp rom varint") } + src = src[nSize:] bh.MaxTimestamp = maxTimestamp - src, firstValue, err := encoding.UnmarshalVarInt64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal firstValue: %w", err) + + firstValue, nSize := encoding.UnmarshalVarInt64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal firstValue from varint") } + src = src[nSize:] bh.FirstValue = firstValue - src, rowsCount, err := encoding.UnmarshalVarUint64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal rowsCount: %w", err) + + rowsCount, nSize := encoding.UnmarshalVarUint64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal rowsCount from varuint") } + src = src[nSize:] if rowsCount > math.MaxUint32 { return src, fmt.Errorf("got too big rowsCount=%d; it mustn't exceed %d", rowsCount, uint32(math.MaxUint32)) } bh.RowsCount = uint32(rowsCount) - src, scale, err := encoding.UnmarshalVarInt64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal scale: %w", err) + + scale, nSize := encoding.UnmarshalVarInt64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal scale from varint") } + src = src[nSize:] if scale < math.MinInt16 { return src, fmt.Errorf("got too small scale=%d; it mustn't be smaller than %d", scale, math.MinInt16) } @@ -204,6 +213,7 @@ func (bh *blockHeader) unmarshalPortable(src []byte) ([]byte, error) { if len(src) < 1 { return src, fmt.Errorf("cannot unmarshal marshalType for timestamps from %d bytes; need at least %d bytes", len(src), 1) } + bh.TimestampsMarshalType = encoding.MarshalType(src[0]) src = src[1:] if len(src) < 1 { diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index af698e5bd..848f90cb1 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -2015,11 +2015,11 @@ func removeCompositeTagFilters(tfs []*tagFilter, prefix []byte) []*tagFilter { continue } tagKey = tagKey[1:] - var nameLen uint64 - tagKey, nameLen, err = encoding.UnmarshalVarUint64(tagKey) - if err != nil { - logger.Panicf("BUG: cannot unmarshal nameLen from tagKey %q: %s", tagKey, err) + nameLen, nSize := encoding.UnmarshalVarUint64(tagKey) + if nSize <= 0 { + logger.Panicf("BUG: cannot unmarshal nameLen from tagKey %q", tagKey) } + tagKey = tagKey[nSize:] if nameLen == 0 { logger.Panicf("BUG: nameLen must be greater than 0") } @@ -2830,11 +2830,11 @@ func unmarshalCompositeTagKey(src []byte) ([]byte, []byte, error) { return nil, nil, fmt.Errorf("missing composite tag key prefix in %q", src) } src = src[1:] - tail, n, err := encoding.UnmarshalVarUint64(src) - if err != nil { - return nil, nil, fmt.Errorf("cannot unmarshal metric name length from composite tag key: %w", err) + n, nSize := encoding.UnmarshalVarUint64(src) + if nSize <= 0 { + return nil, nil, fmt.Errorf("cannot unmarshal metric name length from composite tag key") } - src = tail + src = src[nSize:] if uint64(len(src)) < n { return nil, nil, fmt.Errorf("missing metric name with length %d in composite tag key %q", n, src) } diff --git a/lib/storage/search.go b/lib/storage/search.go index 0e406b344..6f93547b0 100644 --- a/lib/storage/search.go +++ b/lib/storage/search.go @@ -396,33 +396,33 @@ func (sq *SearchQuery) Marshal(dst []byte) []byte { // Unmarshal unmarshals sq from src and returns the tail. func (sq *SearchQuery) Unmarshal(src []byte) ([]byte, error) { - tail, minTs, err := encoding.UnmarshalVarInt64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal MinTimestamp: %w", err) + minTs, nSize := encoding.UnmarshalVarInt64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal MinTimestamp from varint") } + src = src[nSize:] sq.MinTimestamp = minTs - src = tail - tail, maxTs, err := encoding.UnmarshalVarInt64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal MaxTimestamp: %w", err) + maxTs, nSize := encoding.UnmarshalVarInt64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal MaxTimestamp from varint") } + src = src[nSize:] sq.MaxTimestamp = maxTs - src = tail - tail, tfssCount, err := encoding.UnmarshalVarUint64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal the count of TagFilterss: %w", err) + tfssCount, nSize := encoding.UnmarshalVarUint64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal the count of TagFilterss from uvarint") } + src = src[nSize:] sq.TagFilterss = slicesutil.SetLength(sq.TagFilterss, int(tfssCount)) - src = tail for i := 0; i < int(tfssCount); i++ { - tail, tfsCount, err := encoding.UnmarshalVarUint64(src) - if err != nil { - return src, fmt.Errorf("cannot unmarshal the count of TagFilters: %w", err) + tfsCount, nSize := encoding.UnmarshalVarUint64(src) + if nSize <= 0 { + return src, fmt.Errorf("cannot unmarshal the count of TagFilters from uvarint") } - src = tail + src = src[nSize:] tagFilters := sq.TagFilterss[i] tagFilters = slicesutil.SetLength(tagFilters, int(tfsCount)) diff --git a/lib/streamaggr/streamaggr.go b/lib/streamaggr/streamaggr.go index 261967853..f7110290e 100644 --- a/lib/streamaggr/streamaggr.go +++ b/lib/streamaggr/streamaggr.go @@ -867,22 +867,23 @@ func decompressLabels(dst []prompbmarshal.Label, key string) []prompbmarshal.Lab func getOutputKey(key string) string { src := bytesutil.ToUnsafeBytes(key) - tail, inputKeyLen, err := encoding.UnmarshalVarUint64(src) - if err != nil { - logger.Panicf("BUG: cannot unmarshal inputKeyLen: %s", err) + inputKeyLen, nSize := encoding.UnmarshalVarUint64(src) + if nSize <= 0 { + logger.Panicf("BUG: cannot unmarshal inputKeyLen from uvarint") } - outputKey := tail[inputKeyLen:] + outputKey := src[inputKeyLen:] return bytesutil.ToUnsafeString(outputKey) } func getInputOutputKey(key string) (string, string) { src := bytesutil.ToUnsafeBytes(key) - tail, inputKeyLen, err := encoding.UnmarshalVarUint64(src) - if err != nil { - logger.Panicf("BUG: cannot unmarshal inputKeyLen: %s", err) + inputKeyLen, nSize := encoding.UnmarshalVarUint64(src) + if nSize <= 0 { + logger.Panicf("BUG: cannot unmarshal inputKeyLen from uvarint") } - inputKey := tail[:inputKeyLen] - outputKey := tail[inputKeyLen:] + src = src[nSize:] + inputKey := src[:inputKeyLen] + outputKey := src[inputKeyLen:] return bytesutil.ToUnsafeString(inputKey), bytesutil.ToUnsafeString(outputKey) }