package logstorage import ( "fmt" "sync" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" ) // marshalStringsBlock marshals a and appends the result to dst. // // The marshaled strings block can be unmarshaled with stringsBlockUnmarshaler. func marshalStringsBlock(dst []byte, a []string) []byte { // Encode string lengths u64s := encoding.GetUint64s(len(a)) aLens := u64s.A[:0] for _, s := range a { aLens = append(aLens, uint64(len(s))) } u64s.A = aLens dst = marshalUint64Block(dst, u64s.A) encoding.PutUint64s(u64s) // Encode strings bb := bbPool.Get() b := bb.B for _, s := range a { b = append(b, s...) } bb.B = b dst = marshalBytesBlock(dst, bb.B) bbPool.Put(bb) return dst } // stringsBlockUnmarshaler is used for unmarshaling the block returned from marshalStringsBlock() // // use getStringsBlockUnmarshaler() for obtaining the unmarshaler from the pool in order to save memory allocations. type stringsBlockUnmarshaler struct { // data contains the data for the unmarshaled values data []byte } func (sbu *stringsBlockUnmarshaler) reset() { sbu.data = sbu.data[:0] } func (sbu *stringsBlockUnmarshaler) copyString(s string) string { dataLen := len(sbu.data) sbu.data = append(sbu.data, s...) return bytesutil.ToUnsafeString(sbu.data[dataLen:]) } func (sbu *stringsBlockUnmarshaler) appendFields(dst, src []Field) []Field { for _, f := range src { dst = append(dst, Field{ Name: sbu.copyString(f.Name), Value: sbu.copyString(f.Value), }) } return dst } // unmarshal unmarshals itemsCount strings from src, appends them to dst and returns the result. // // The returned strings are valid until sbu.reset() call. func (sbu *stringsBlockUnmarshaler) unmarshal(dst []string, src []byte, itemsCount uint64) ([]string, error) { u64s := encoding.GetUint64s(0) defer encoding.PutUint64s(u64s) // Decode string lengths var tail []byte var err error u64s.A, tail, err = unmarshalUint64Block(u64s.A[:0], src, itemsCount) if err != nil { return dst, fmt.Errorf("cannot unmarshal string lengths: %w", err) } aLens := u64s.A src = tail // Read bytes block into sbu.data dataLen := len(sbu.data) sbu.data, tail, err = unmarshalBytesBlock(sbu.data, src) if err != nil { return dst, fmt.Errorf("cannot unmarshal bytes block with strings: %w", err) } if len(tail) > 0 { return dst, fmt.Errorf("unexpected non-empty tail after reading bytes block with strings; len(tail)=%d", len(tail)) } // Decode strings from sbu.data into dst data := sbu.data[dataLen:] for _, sLen := range aLens { if uint64(len(data)) < sLen { return dst, fmt.Errorf("cannot unmarshal a string with the length %d bytes from %d bytes", sLen, len(data)) } s := bytesutil.ToUnsafeString(data[:sLen]) data = data[sLen:] dst = append(dst, s) } return dst, nil } // marshalUint64Block appends marshaled a to dst and returns the result. func marshalUint64Block(dst []byte, a []uint64) []byte { bb := bbPool.Get() bb.B = marshalUint64Items(bb.B[:0], a) dst = marshalBytesBlock(dst, bb.B) bbPool.Put(bb) return dst } // unmarshalUint64Block appends unmarshaled from src itemsCount uint64 items to dst and returns the result. func unmarshalUint64Block(dst []uint64, src []byte, itemsCount uint64) ([]uint64, []byte, error) { bb := bbPool.Get() defer bbPool.Put(bb) // Unmarshal the underlying bytes block var err error bb.B, src, err = unmarshalBytesBlock(bb.B[:0], src) if err != nil { return dst, src, fmt.Errorf("cannot unmarshal bytes block: %w", err) } // Unmarshal the items from bb. dst, err = unmarshalUint64Items(dst, bb.B, itemsCount) if err != nil { return dst, src, fmt.Errorf("cannot unmarshal %d uint64 items from bytes block of length %d bytes: %w", itemsCount, len(bb.B), err) } return dst, src, nil } const ( uintBlockType8 = 0 uintBlockType16 = 1 uintBlockType32 = 2 uintBlockType64 = 3 ) // marshalUint64Items appends the marshaled a items to dst and returns the result. func marshalUint64Items(dst []byte, a []uint64) []byte { // Do not marshal len(a), since it is expected that unmarshaler knows it. nMax := uint64(0) for _, n := range a { if n > nMax { nMax = n } } switch { case nMax < (1 << 8): dst = append(dst, uintBlockType8) for _, n := range a { dst = append(dst, byte(n)) } case nMax < (1 << 16): dst = append(dst, uintBlockType16) for _, n := range a { dst = encoding.MarshalUint16(dst, uint16(n)) } case nMax < (1 << 32): dst = append(dst, uintBlockType32) for _, n := range a { dst = encoding.MarshalUint32(dst, uint32(n)) } default: dst = append(dst, uintBlockType64) for _, n := range a { dst = encoding.MarshalUint64(dst, uint64(n)) } } return dst } // unmarshalUint64Items appends unmarshaled from src itemsCount uint64 items to dst and returns the result. func unmarshalUint64Items(dst []uint64, src []byte, itemsCount uint64) ([]uint64, error) { // Unmarshal block type if len(src) < 1 { return dst, fmt.Errorf("cannot unmarshal uint64 block type from empty src") } blockType := src[0] src = src[1:] switch blockType { case uintBlockType8: // A block with items smaller than 1<<8 bytes if uint64(len(src)) != itemsCount { return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), itemsCount) } for _, v := range src { dst = append(dst, uint64(v)) } case uintBlockType16: // A block with items smaller than 1<<16 bytes if uint64(len(src)) != 2*itemsCount { return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 2*itemsCount) } for len(src) > 0 { v := encoding.UnmarshalUint16(src) src = src[2:] dst = append(dst, uint64(v)) } case uintBlockType32: // A block with items smaller than 1<<32 bytes if uint64(len(src)) != 4*itemsCount { return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 4*itemsCount) } for len(src) > 0 { v := encoding.UnmarshalUint32(src) src = src[4:] dst = append(dst, uint64(v)) } case uintBlockType64: // A block with items smaller than 1<<64 bytes if uint64(len(src)) != 8*itemsCount { return dst, fmt.Errorf("unexpected block length for %d items; got %d bytes; want %d bytes", itemsCount, len(src), 8*itemsCount) } for len(src) > 0 { v := encoding.UnmarshalUint64(src) src = src[8:] dst = append(dst, v) } default: return dst, fmt.Errorf("unexpected uint64 block type: %d; want 0, 1, 2 or 3", blockType) } return dst, nil } const ( marshalBytesTypePlain = 0 marshalBytesTypeZSTD = 1 ) func marshalBytesBlock(dst, src []byte) []byte { if len(src) < 128 { // Marshal the block in plain without compression dst = append(dst, marshalBytesTypePlain) dst = append(dst, byte(len(src))) return append(dst, src...) } // Compress the block dst = append(dst, marshalBytesTypeZSTD) compressLevel := getCompressLevel(len(src)) bb := bbPool.Get() bb.B = encoding.CompressZSTDLevel(bb.B[:0], src, compressLevel) dst = encoding.MarshalVarUint64(dst, uint64(len(bb.B))) dst = append(dst, bb.B...) bbPool.Put(bb) return dst } func getCompressLevel(dataLen int) int { if dataLen <= 512 { return 1 } if dataLen <= 4*1024 { return 2 } return 3 } func unmarshalBytesBlock(dst, src []byte) ([]byte, []byte, error) { if len(src) < 1 { return dst, src, fmt.Errorf("cannot unmarshal block type from empty src") } blockType := src[0] src = src[1:] switch blockType { case marshalBytesTypePlain: // Plain block // Read block length if len(src) < 1 { return dst, src, fmt.Errorf("cannot unmarshal plain block size from empty src") } blockLen := int(src[0]) src = src[1:] if len(src) < blockLen { return dst, src, fmt.Errorf("cannot read plain block with the size %d bytes from %b bytes", blockLen, len(src)) } // Copy the block to dst dst = append(dst, src[:blockLen]...) src = src[blockLen:] return dst, src, nil case marshalBytesTypeZSTD: // Compressed block // Read block length blockLen, nSize := encoding.UnmarshalVarUint64(src) if nSize <= 0 { return dst, src, fmt.Errorf("cannot unmarshal compressed block size") } src = src[nSize:] if uint64(len(src)) < blockLen { return dst, src, fmt.Errorf("cannot read compressed block with the size %d bytes from %d bytes", blockLen, len(src)) } compressedBlock := src[:blockLen] src = src[blockLen:] // Decompress the block bb := bbPool.Get() var err error bb.B, err = encoding.DecompressZSTD(bb.B[:0], compressedBlock) if err != nil { return dst, src, fmt.Errorf("cannot decompress block: %w", err) } // Copy the decompressed block to dst. dst = append(dst, bb.B...) bbPool.Put(bb) return dst, src, nil default: return dst, src, fmt.Errorf("unexpected block type: %d; supported types: 0, 1", blockType) } } var bbPool bytesutil.ByteBufferPool // getStringsBlockUnmarshaler returns stringsBlockUnmarshaler from the pool. // // Return back the stringsBlockUnmarshaler to the pool by calling putStringsBlockUnmarshaler(). func getStringsBlockUnmarshaler() *stringsBlockUnmarshaler { v := sbuPool.Get() if v == nil { return &stringsBlockUnmarshaler{} } return v.(*stringsBlockUnmarshaler) } // putStringsBlockUnmarshaler returns back sbu to the pool. // // sbu mustn't be used after returning to the pool. func putStringsBlockUnmarshaler(sbu *stringsBlockUnmarshaler) { sbu.reset() sbuPool.Put(sbu) } var sbuPool sync.Pool