2019-07-23 16:26:39 +00:00
|
|
|
// Package zstd provides decompression of zstandard files.
|
|
|
|
//
|
|
|
|
// For advanced usage and examples, go to the README: https://github.com/klauspost/compress/tree/master/zstd#zstd
|
|
|
|
package zstd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"log"
|
2020-02-26 18:45:19 +00:00
|
|
|
"math"
|
2019-07-23 16:26:39 +00:00
|
|
|
"math/bits"
|
|
|
|
)
|
|
|
|
|
2020-02-26 18:45:19 +00:00
|
|
|
// enable debug printing
|
2019-07-23 16:26:39 +00:00
|
|
|
const debug = false
|
2020-02-26 18:45:19 +00:00
|
|
|
|
|
|
|
// Enable extra assertions.
|
|
|
|
const debugAsserts = debug || false
|
|
|
|
|
|
|
|
// print sequence details
|
2019-07-23 16:26:39 +00:00
|
|
|
const debugSequences = false
|
2020-02-26 18:45:19 +00:00
|
|
|
|
|
|
|
// print detailed matching information
|
2019-10-28 11:36:35 +00:00
|
|
|
const debugMatches = false
|
2019-07-23 16:26:39 +00:00
|
|
|
|
|
|
|
// force encoder to use predefined tables.
|
|
|
|
const forcePreDef = false
|
|
|
|
|
|
|
|
// zstdMinMatch is the minimum zstd match length.
|
|
|
|
const zstdMinMatch = 3
|
|
|
|
|
2020-02-26 18:45:19 +00:00
|
|
|
// Reset the buffer offset when reaching this.
|
|
|
|
const bufferReset = math.MaxInt32 - MaxWindowSize
|
|
|
|
|
2019-07-23 16:26:39 +00:00
|
|
|
var (
|
|
|
|
// ErrReservedBlockType is returned when a reserved block type is found.
|
|
|
|
// Typically this indicates wrong or corrupted input.
|
|
|
|
ErrReservedBlockType = errors.New("invalid input: reserved block type encountered")
|
|
|
|
|
|
|
|
// ErrCompressedSizeTooBig is returned when a block is bigger than allowed.
|
|
|
|
// Typically this indicates wrong or corrupted input.
|
|
|
|
ErrCompressedSizeTooBig = errors.New("invalid input: compressed size too big")
|
|
|
|
|
|
|
|
// ErrBlockTooSmall is returned when a block is too small to be decoded.
|
|
|
|
// Typically returned on invalid input.
|
|
|
|
ErrBlockTooSmall = errors.New("block too small")
|
|
|
|
|
|
|
|
// ErrMagicMismatch is returned when a "magic" number isn't what is expected.
|
|
|
|
// Typically this indicates wrong or corrupted input.
|
|
|
|
ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
|
|
|
|
|
|
|
|
// ErrWindowSizeExceeded is returned when a reference exceeds the valid window size.
|
|
|
|
// Typically this indicates wrong or corrupted input.
|
|
|
|
ErrWindowSizeExceeded = errors.New("window size exceeded")
|
|
|
|
|
|
|
|
// ErrWindowSizeTooSmall is returned when no window size is specified.
|
|
|
|
// Typically this indicates wrong or corrupted input.
|
|
|
|
ErrWindowSizeTooSmall = errors.New("invalid input: window size was too small")
|
|
|
|
|
|
|
|
// ErrDecoderSizeExceeded is returned if decompressed size exceeds the configured limit.
|
|
|
|
ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
|
|
|
|
|
|
|
|
// ErrUnknownDictionary is returned if the dictionary ID is unknown.
|
|
|
|
// For the time being dictionaries are not supported.
|
|
|
|
ErrUnknownDictionary = errors.New("unknown dictionary")
|
|
|
|
|
|
|
|
// ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
|
|
|
|
// This is only returned if SingleSegment is specified on the frame.
|
|
|
|
ErrFrameSizeExceeded = errors.New("frame size exceeded")
|
|
|
|
|
|
|
|
// ErrCRCMismatch is returned if CRC mismatches.
|
|
|
|
ErrCRCMismatch = errors.New("CRC check failed")
|
|
|
|
|
|
|
|
// ErrDecoderClosed will be returned if the Decoder was used after
|
|
|
|
// Close has been called.
|
|
|
|
ErrDecoderClosed = errors.New("decoder used after Close")
|
2021-01-07 21:55:02 +00:00
|
|
|
|
|
|
|
// ErrDecoderNilInput is returned when a nil Reader was provided
|
|
|
|
// and an operation other than Reset/DecodeAll/Close was attempted.
|
|
|
|
ErrDecoderNilInput = errors.New("nil input provided as reader")
|
2019-07-23 16:26:39 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func println(a ...interface{}) {
|
|
|
|
if debug {
|
|
|
|
log.Println(a...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func printf(format string, a ...interface{}) {
|
|
|
|
if debug {
|
|
|
|
log.Printf(format, a...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-12 15:32:07 +00:00
|
|
|
// matchLenFast does matching, but will not match the last up to 7 bytes.
|
|
|
|
func matchLenFast(a, b []byte) int {
|
|
|
|
endI := len(a) & (math.MaxInt32 - 7)
|
|
|
|
for i := 0; i < endI; i += 8 {
|
|
|
|
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
|
|
|
|
return i + bits.TrailingZeros64(diff)>>3
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return endI
|
|
|
|
}
|
|
|
|
|
2019-07-23 16:26:39 +00:00
|
|
|
// matchLen returns the maximum length.
|
|
|
|
// a must be the shortest of the two.
|
|
|
|
// The function also returns whether all bytes matched.
|
|
|
|
func matchLen(a, b []byte) int {
|
|
|
|
b = b[:len(a)]
|
|
|
|
for i := 0; i < len(a)-7; i += 8 {
|
|
|
|
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
|
|
|
|
return i + (bits.TrailingZeros64(diff) >> 3)
|
|
|
|
}
|
|
|
|
}
|
2020-03-12 15:32:07 +00:00
|
|
|
|
2019-07-23 16:26:39 +00:00
|
|
|
checked := (len(a) >> 3) << 3
|
|
|
|
a = a[checked:]
|
|
|
|
b = b[checked:]
|
|
|
|
for i := range a {
|
|
|
|
if a[i] != b[i] {
|
2020-03-12 15:32:07 +00:00
|
|
|
return i + checked
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return len(a) + checked
|
|
|
|
}
|
|
|
|
|
|
|
|
func load3232(b []byte, i int32) uint32 {
|
|
|
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
|
|
|
b = b[i:]
|
|
|
|
b = b[:4]
|
|
|
|
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
|
|
|
}
|
|
|
|
|
|
|
|
func load6432(b []byte, i int32) uint64 {
|
|
|
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
|
|
|
b = b[i:]
|
|
|
|
b = b[:8]
|
|
|
|
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
|
|
|
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
|
|
|
}
|
|
|
|
|
|
|
|
func load64(b []byte, i int) uint64 {
|
|
|
|
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
|
|
|
|
b = b[i:]
|
|
|
|
b = b[:8]
|
|
|
|
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
|
|
|
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
|
|
|
}
|