mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-11 14:53:49 +00:00
414 lines
12 KiB
Go
414 lines
12 KiB
Go
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
|
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package s2
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"math"
|
|
"math/bits"
|
|
"sync"
|
|
|
|
"github.com/klauspost/compress/internal/race"
|
|
)
|
|
|
|
// Encode returns the encoded form of src. The returned slice may be a sub-
|
|
// slice of dst if dst was large enough to hold the entire encoded block.
|
|
// Otherwise, a newly allocated slice will be returned.
|
|
//
|
|
// The dst and src must not overlap. It is valid to pass a nil dst.
|
|
//
|
|
// The blocks will require the same amount of memory to decode as encoding,
|
|
// and does not make for concurrent decoding.
|
|
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
|
//
|
|
// If you need to encode larger amounts of data, consider using
|
|
// the streaming interface which gives all of these features.
|
|
func Encode(dst, src []byte) []byte {
|
|
if n := MaxEncodedLen(len(src)); n < 0 {
|
|
panic(ErrTooLarge)
|
|
} else if cap(dst) < n {
|
|
dst = make([]byte, n)
|
|
} else {
|
|
dst = dst[:n]
|
|
}
|
|
|
|
// The block starts with the varint-encoded length of the decompressed bytes.
|
|
d := binary.PutUvarint(dst, uint64(len(src)))
|
|
|
|
if len(src) == 0 {
|
|
return dst[:d]
|
|
}
|
|
if len(src) < minNonLiteralBlockSize {
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
n := encodeBlock(dst[d:], src)
|
|
if n > 0 {
|
|
d += n
|
|
return dst[:d]
|
|
}
|
|
// Not compressible
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
var estblockPool [2]sync.Pool
|
|
|
|
// EstimateBlockSize will perform a very fast compression
|
|
// without outputting the result and return the compressed output size.
|
|
// The function returns -1 if no improvement could be achieved.
|
|
// Using actual compression will most often produce better compression than the estimate.
|
|
func EstimateBlockSize(src []byte) (d int) {
|
|
if len(src) <= inputMargin || int64(len(src)) > 0xffffffff {
|
|
return -1
|
|
}
|
|
if len(src) <= 1024 {
|
|
const sz, pool = 2048, 0
|
|
tmp, ok := estblockPool[pool].Get().(*[sz]byte)
|
|
if !ok {
|
|
tmp = &[sz]byte{}
|
|
}
|
|
race.WriteSlice(tmp[:])
|
|
defer estblockPool[pool].Put(tmp)
|
|
|
|
d = calcBlockSizeSmall(src, tmp)
|
|
} else {
|
|
const sz, pool = 32768, 1
|
|
tmp, ok := estblockPool[pool].Get().(*[sz]byte)
|
|
if !ok {
|
|
tmp = &[sz]byte{}
|
|
}
|
|
race.WriteSlice(tmp[:])
|
|
defer estblockPool[pool].Put(tmp)
|
|
|
|
d = calcBlockSize(src, tmp)
|
|
}
|
|
|
|
if d == 0 {
|
|
return -1
|
|
}
|
|
// Size of the varint encoded block size.
|
|
d += (bits.Len64(uint64(len(src))) + 7) / 7
|
|
|
|
if d >= len(src) {
|
|
return -1
|
|
}
|
|
return d
|
|
}
|
|
|
|
// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
|
|
// slice of dst if dst was large enough to hold the entire encoded block.
|
|
// Otherwise, a newly allocated slice will be returned.
|
|
//
|
|
// EncodeBetter compresses better than Encode but typically with a
|
|
// 10-40% speed decrease on both compression and decompression.
|
|
//
|
|
// The dst and src must not overlap. It is valid to pass a nil dst.
|
|
//
|
|
// The blocks will require the same amount of memory to decode as encoding,
|
|
// and does not make for concurrent decoding.
|
|
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
|
//
|
|
// If you need to encode larger amounts of data, consider using
|
|
// the streaming interface which gives all of these features.
|
|
func EncodeBetter(dst, src []byte) []byte {
|
|
if n := MaxEncodedLen(len(src)); n < 0 {
|
|
panic(ErrTooLarge)
|
|
} else if len(dst) < n {
|
|
dst = make([]byte, n)
|
|
}
|
|
|
|
// The block starts with the varint-encoded length of the decompressed bytes.
|
|
d := binary.PutUvarint(dst, uint64(len(src)))
|
|
|
|
if len(src) == 0 {
|
|
return dst[:d]
|
|
}
|
|
if len(src) < minNonLiteralBlockSize {
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
n := encodeBlockBetter(dst[d:], src)
|
|
if n > 0 {
|
|
d += n
|
|
return dst[:d]
|
|
}
|
|
// Not compressible
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
// EncodeBest returns the encoded form of src. The returned slice may be a sub-
|
|
// slice of dst if dst was large enough to hold the entire encoded block.
|
|
// Otherwise, a newly allocated slice will be returned.
|
|
//
|
|
// EncodeBest compresses as good as reasonably possible but with a
|
|
// big speed decrease.
|
|
//
|
|
// The dst and src must not overlap. It is valid to pass a nil dst.
|
|
//
|
|
// The blocks will require the same amount of memory to decode as encoding,
|
|
// and does not make for concurrent decoding.
|
|
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
|
//
|
|
// If you need to encode larger amounts of data, consider using
|
|
// the streaming interface which gives all of these features.
|
|
func EncodeBest(dst, src []byte) []byte {
|
|
if n := MaxEncodedLen(len(src)); n < 0 {
|
|
panic(ErrTooLarge)
|
|
} else if len(dst) < n {
|
|
dst = make([]byte, n)
|
|
}
|
|
|
|
// The block starts with the varint-encoded length of the decompressed bytes.
|
|
d := binary.PutUvarint(dst, uint64(len(src)))
|
|
|
|
if len(src) == 0 {
|
|
return dst[:d]
|
|
}
|
|
if len(src) < minNonLiteralBlockSize {
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
n := encodeBlockBest(dst[d:], src, nil)
|
|
if n > 0 {
|
|
d += n
|
|
return dst[:d]
|
|
}
|
|
// Not compressible
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
// EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
|
|
// slice of dst if dst was large enough to hold the entire encoded block.
|
|
// Otherwise, a newly allocated slice will be returned.
|
|
//
|
|
// The output is Snappy compatible and will likely decompress faster.
|
|
//
|
|
// The dst and src must not overlap. It is valid to pass a nil dst.
|
|
//
|
|
// The blocks will require the same amount of memory to decode as encoding,
|
|
// and does not make for concurrent decoding.
|
|
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
|
//
|
|
// If you need to encode larger amounts of data, consider using
|
|
// the streaming interface which gives all of these features.
|
|
func EncodeSnappy(dst, src []byte) []byte {
|
|
if n := MaxEncodedLen(len(src)); n < 0 {
|
|
panic(ErrTooLarge)
|
|
} else if cap(dst) < n {
|
|
dst = make([]byte, n)
|
|
} else {
|
|
dst = dst[:n]
|
|
}
|
|
|
|
// The block starts with the varint-encoded length of the decompressed bytes.
|
|
d := binary.PutUvarint(dst, uint64(len(src)))
|
|
|
|
if len(src) == 0 {
|
|
return dst[:d]
|
|
}
|
|
if len(src) < minNonLiteralBlockSize {
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
n := encodeBlockSnappy(dst[d:], src)
|
|
if n > 0 {
|
|
d += n
|
|
return dst[:d]
|
|
}
|
|
// Not compressible
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
// EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
|
|
// slice of dst if dst was large enough to hold the entire encoded block.
|
|
// Otherwise, a newly allocated slice will be returned.
|
|
//
|
|
// The output is Snappy compatible and will likely decompress faster.
|
|
//
|
|
// The dst and src must not overlap. It is valid to pass a nil dst.
|
|
//
|
|
// The blocks will require the same amount of memory to decode as encoding,
|
|
// and does not make for concurrent decoding.
|
|
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
|
//
|
|
// If you need to encode larger amounts of data, consider using
|
|
// the streaming interface which gives all of these features.
|
|
func EncodeSnappyBetter(dst, src []byte) []byte {
|
|
if n := MaxEncodedLen(len(src)); n < 0 {
|
|
panic(ErrTooLarge)
|
|
} else if cap(dst) < n {
|
|
dst = make([]byte, n)
|
|
} else {
|
|
dst = dst[:n]
|
|
}
|
|
|
|
// The block starts with the varint-encoded length of the decompressed bytes.
|
|
d := binary.PutUvarint(dst, uint64(len(src)))
|
|
|
|
if len(src) == 0 {
|
|
return dst[:d]
|
|
}
|
|
if len(src) < minNonLiteralBlockSize {
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
n := encodeBlockBetterSnappy(dst[d:], src)
|
|
if n > 0 {
|
|
d += n
|
|
return dst[:d]
|
|
}
|
|
// Not compressible
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
// EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
|
|
// slice of dst if dst was large enough to hold the entire encoded block.
|
|
// Otherwise, a newly allocated slice will be returned.
|
|
//
|
|
// The output is Snappy compatible and will likely decompress faster.
|
|
//
|
|
// The dst and src must not overlap. It is valid to pass a nil dst.
|
|
//
|
|
// The blocks will require the same amount of memory to decode as encoding,
|
|
// and does not make for concurrent decoding.
|
|
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
|
//
|
|
// If you need to encode larger amounts of data, consider using
|
|
// the streaming interface which gives all of these features.
|
|
func EncodeSnappyBest(dst, src []byte) []byte {
|
|
if n := MaxEncodedLen(len(src)); n < 0 {
|
|
panic(ErrTooLarge)
|
|
} else if cap(dst) < n {
|
|
dst = make([]byte, n)
|
|
} else {
|
|
dst = dst[:n]
|
|
}
|
|
|
|
// The block starts with the varint-encoded length of the decompressed bytes.
|
|
d := binary.PutUvarint(dst, uint64(len(src)))
|
|
|
|
if len(src) == 0 {
|
|
return dst[:d]
|
|
}
|
|
if len(src) < minNonLiteralBlockSize {
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
n := encodeBlockBestSnappy(dst[d:], src)
|
|
if n > 0 {
|
|
d += n
|
|
return dst[:d]
|
|
}
|
|
// Not compressible
|
|
d += emitLiteral(dst[d:], src)
|
|
return dst[:d]
|
|
}
|
|
|
|
// ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
|
|
// If the destination is nil or too small, a new will be allocated.
|
|
// The blocks are not validated, so garbage in = garbage out.
|
|
// dst may not overlap block data.
|
|
// Any data in dst is preserved as is, so it will not be considered a block.
|
|
func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
|
|
totalSize := uint64(0)
|
|
compSize := 0
|
|
for _, b := range blocks {
|
|
l, hdr, err := decodedLen(b)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
totalSize += uint64(l)
|
|
compSize += len(b) - hdr
|
|
}
|
|
if totalSize == 0 {
|
|
dst = append(dst, 0)
|
|
return dst, nil
|
|
}
|
|
if totalSize > math.MaxUint32 {
|
|
return nil, ErrTooLarge
|
|
}
|
|
var tmp [binary.MaxVarintLen32]byte
|
|
hdrSize := binary.PutUvarint(tmp[:], totalSize)
|
|
wantSize := hdrSize + compSize
|
|
|
|
if cap(dst)-len(dst) < wantSize {
|
|
dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
|
|
}
|
|
dst = append(dst, tmp[:hdrSize]...)
|
|
for _, b := range blocks {
|
|
_, hdr, err := decodedLen(b)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
dst = append(dst, b[hdr:]...)
|
|
}
|
|
return dst, nil
|
|
}
|
|
|
|
// inputMargin is the minimum number of extra input bytes to keep, inside
|
|
// encodeBlock's inner loop. On some architectures, this margin lets us
|
|
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
|
|
// literals can be implemented as a single load to and store from a 16-byte
|
|
// register. That literal's actual length can be as short as 1 byte, so this
|
|
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
|
|
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
|
|
// that we don't overrun the dst and src buffers.
|
|
const inputMargin = 8
|
|
|
|
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
|
|
// will be accepted by the encoder.
|
|
const minNonLiteralBlockSize = 32
|
|
|
|
const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
|
|
|
|
// MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
|
|
// Blocks this big are highly discouraged, though.
|
|
// Half the size on 32 bit systems.
|
|
const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
|
|
|
|
// MaxEncodedLen returns the maximum length of a snappy block, given its
|
|
// uncompressed length.
|
|
//
|
|
// It will return a negative value if srcLen is too large to encode.
|
|
// 32 bit platforms will have lower thresholds for rejecting big content.
|
|
func MaxEncodedLen(srcLen int) int {
|
|
n := uint64(srcLen)
|
|
if intReduction == 1 {
|
|
// 32 bits
|
|
if n > math.MaxInt32 {
|
|
// Also includes negative.
|
|
return -1
|
|
}
|
|
} else if n > 0xffffffff {
|
|
// 64 bits
|
|
// Also includes negative.
|
|
return -1
|
|
}
|
|
// Size of the varint encoded block size.
|
|
n = n + uint64((bits.Len64(n)+7)/7)
|
|
|
|
// Add maximum size of encoding block as literals.
|
|
n += uint64(literalExtraSize(int64(srcLen)))
|
|
if intReduction == 1 {
|
|
// 32 bits
|
|
if n > math.MaxInt32 {
|
|
return -1
|
|
}
|
|
} else if n > 0xffffffff {
|
|
// 64 bits
|
|
// Also includes negative.
|
|
return -1
|
|
}
|
|
return int(n)
|
|
}
|