2019-07-23 16:26:39 +00:00
|
|
|
// Copyright 2019+ Klaus Post. All rights reserved.
|
|
|
|
// License information can be found in the LICENSE file.
|
|
|
|
// Based on work by Yann Collet, released under BSD License.
|
|
|
|
|
|
|
|
package zstd
|
|
|
|
|
|
|
|
import (
|
2022-03-16 11:55:03 +00:00
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"encoding/binary"
|
2019-07-23 16:26:39 +00:00
|
|
|
"io"
|
|
|
|
"sync"
|
2022-03-16 11:55:03 +00:00
|
|
|
|
|
|
|
"github.com/klauspost/compress/zstd/internal/xxhash"
|
2019-07-23 16:26:39 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Decoder provides decoding of zstandard streams.
|
|
|
|
// The decoder has been designed to operate without allocations after a warmup.
|
|
|
|
// This means that you should store the decoder for best performance.
|
|
|
|
// To re-use a stream decoder, use the Reset(r io.Reader) error to switch to another stream.
|
|
|
|
// A decoder can safely be re-used even if the previous stream failed.
|
|
|
|
// To release the resources, you must call the Close() function on a decoder.
|
|
|
|
type Decoder struct {
|
|
|
|
o decoderOptions
|
|
|
|
|
|
|
|
// Unreferenced decoders, ready for use.
|
|
|
|
decoders chan *blockDec
|
|
|
|
|
|
|
|
// Current read position used for Reader functionality.
|
|
|
|
current decoderState
|
|
|
|
|
2022-03-16 11:55:03 +00:00
|
|
|
// sync stream decoding
|
|
|
|
syncStream struct {
|
|
|
|
decodedFrame uint64
|
|
|
|
br readerWrapper
|
|
|
|
enabled bool
|
|
|
|
inFrame bool
|
|
|
|
}
|
|
|
|
|
|
|
|
frame *frameDec
|
|
|
|
|
2020-06-02 21:10:44 +00:00
|
|
|
// Custom dictionaries.
|
|
|
|
// Always uses copies.
|
|
|
|
dicts map[uint32]dict
|
2019-07-23 16:26:39 +00:00
|
|
|
|
|
|
|
// streamWg is the waitgroup for all streams
|
|
|
|
streamWg sync.WaitGroup
|
|
|
|
}
|
|
|
|
|
|
|
|
// decoderState is used for maintaining state when the decoder
|
|
|
|
// is used for streaming.
|
|
|
|
type decoderState struct {
|
|
|
|
// current block being written to stream.
|
|
|
|
decodeOutput
|
|
|
|
|
|
|
|
// output in order to be written to stream.
|
|
|
|
output chan decodeOutput
|
|
|
|
|
|
|
|
// cancel remaining output.
|
2022-03-16 11:55:03 +00:00
|
|
|
cancel context.CancelFunc
|
|
|
|
|
|
|
|
// crc of current frame
|
|
|
|
crc *xxhash.Digest
|
2019-07-23 16:26:39 +00:00
|
|
|
|
|
|
|
flushed bool
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
// Check the interfaces we want to support.
|
|
|
|
_ = io.WriterTo(&Decoder{})
|
|
|
|
_ = io.Reader(&Decoder{})
|
|
|
|
)
|
|
|
|
|
|
|
|
// NewReader creates a new decoder.
|
|
|
|
// A nil Reader can be provided in which case Reset can be used to start a decode.
|
|
|
|
//
|
|
|
|
// A Decoder can be used in two modes:
|
|
|
|
//
|
|
|
|
// 1) As a stream, or
|
2020-03-12 15:32:07 +00:00
|
|
|
// 2) For stateless decoding using DecodeAll.
|
2019-07-23 16:26:39 +00:00
|
|
|
//
|
|
|
|
// Only a single stream can be decoded concurrently, but the same decoder
|
|
|
|
// can run multiple concurrent stateless decodes. It is even possible to
|
|
|
|
// use stateless decodes while a stream is being decoded.
|
|
|
|
//
|
|
|
|
// The Reset function can be used to initiate a new stream, which is will considerably
|
|
|
|
// reduce the allocations normally caused by NewReader.
|
|
|
|
func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
|
2019-09-05 21:45:26 +00:00
|
|
|
initPredefined()
|
2019-07-23 16:26:39 +00:00
|
|
|
var d Decoder
|
|
|
|
d.o.setDefault()
|
|
|
|
for _, o := range opts {
|
|
|
|
err := o(&d.o)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
d.current.crc = xxhash.New()
|
2019-07-23 16:26:39 +00:00
|
|
|
d.current.flushed = true
|
|
|
|
|
2021-01-07 21:55:02 +00:00
|
|
|
if r == nil {
|
|
|
|
d.current.err = ErrDecoderNilInput
|
|
|
|
}
|
|
|
|
|
2020-06-18 23:39:53 +00:00
|
|
|
// Transfer option dicts.
|
|
|
|
d.dicts = make(map[uint32]dict, len(d.o.dicts))
|
|
|
|
for _, dc := range d.o.dicts {
|
|
|
|
d.dicts[dc.id] = dc
|
|
|
|
}
|
|
|
|
d.o.dicts = nil
|
|
|
|
|
2019-07-23 16:26:39 +00:00
|
|
|
// Create decoders
|
|
|
|
d.decoders = make(chan *blockDec, d.o.concurrent)
|
|
|
|
for i := 0; i < d.o.concurrent; i++ {
|
2020-06-05 20:51:30 +00:00
|
|
|
dec := newBlockDec(d.o.lowMem)
|
|
|
|
dec.localFrame = newFrameDec(d.o)
|
|
|
|
d.decoders <- dec
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if r == nil {
|
|
|
|
return &d, nil
|
|
|
|
}
|
|
|
|
return &d, d.Reset(r)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read bytes from the decompressed stream into p.
|
|
|
|
// Returns the number of bytes written and any error that occurred.
|
|
|
|
// When the stream is done, io.EOF will be returned.
|
|
|
|
func (d *Decoder) Read(p []byte) (int, error) {
|
|
|
|
var n int
|
|
|
|
for {
|
|
|
|
if len(d.current.b) > 0 {
|
|
|
|
filled := copy(p, d.current.b)
|
|
|
|
p = p[filled:]
|
|
|
|
d.current.b = d.current.b[filled:]
|
|
|
|
n += filled
|
|
|
|
}
|
|
|
|
if len(p) == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if len(d.current.b) == 0 {
|
|
|
|
// We have an error and no more data
|
|
|
|
if d.current.err != nil {
|
|
|
|
break
|
|
|
|
}
|
2019-10-13 20:17:40 +00:00
|
|
|
if !d.nextBlock(n == 0) {
|
2022-03-16 11:55:03 +00:00
|
|
|
return n, d.current.err
|
2019-10-13 20:17:40 +00:00
|
|
|
}
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(d.current.b) > 0 {
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2019-07-23 16:26:39 +00:00
|
|
|
println("returning", n, "still bytes left:", len(d.current.b))
|
|
|
|
}
|
|
|
|
// Only return error at end of block
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
if d.current.err != nil {
|
|
|
|
d.drainOutput()
|
|
|
|
}
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2019-07-23 16:26:39 +00:00
|
|
|
println("returning", n, d.current.err, len(d.decoders))
|
|
|
|
}
|
|
|
|
return n, d.current.err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reset will reset the decoder the supplied stream after the current has finished processing.
|
|
|
|
// Note that this functionality cannot be used after Close has been called.
|
2021-01-07 21:55:02 +00:00
|
|
|
// Reset can be called with a nil reader to release references to the previous reader.
|
|
|
|
// After being called with a nil reader, no other operations than Reset or DecodeAll or Close
|
|
|
|
// should be used.
|
2019-07-23 16:26:39 +00:00
|
|
|
func (d *Decoder) Reset(r io.Reader) error {
|
|
|
|
if d.current.err == ErrDecoderClosed {
|
|
|
|
return d.current.err
|
|
|
|
}
|
2021-01-07 21:55:02 +00:00
|
|
|
|
|
|
|
d.drainOutput()
|
|
|
|
|
2022-03-16 11:55:03 +00:00
|
|
|
d.syncStream.br.r = nil
|
2019-07-23 16:26:39 +00:00
|
|
|
if r == nil {
|
2021-01-07 21:55:02 +00:00
|
|
|
d.current.err = ErrDecoderNilInput
|
2021-06-08 12:42:57 +00:00
|
|
|
if len(d.current.b) > 0 {
|
|
|
|
d.current.b = d.current.b[:0]
|
|
|
|
}
|
2021-01-07 21:55:02 +00:00
|
|
|
d.current.flushed = true
|
|
|
|
return nil
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
|
2021-06-08 12:42:57 +00:00
|
|
|
// If bytes buffer and < 5MB, do sync decoding anyway.
|
|
|
|
if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
|
2021-04-14 11:20:56 +00:00
|
|
|
bb2 := bb
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2019-07-23 16:26:39 +00:00
|
|
|
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
|
|
|
|
}
|
2021-02-01 17:39:00 +00:00
|
|
|
b := bb2.Bytes()
|
2020-04-10 15:39:15 +00:00
|
|
|
var dst []byte
|
|
|
|
if cap(d.current.b) > 0 {
|
|
|
|
dst = d.current.b
|
|
|
|
}
|
|
|
|
|
|
|
|
dst, err := d.DecodeAll(b, dst[:0])
|
2019-07-23 16:26:39 +00:00
|
|
|
if err == nil {
|
|
|
|
err = io.EOF
|
|
|
|
}
|
|
|
|
d.current.b = dst
|
|
|
|
d.current.err = err
|
|
|
|
d.current.flushed = true
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2020-09-08 12:19:47 +00:00
|
|
|
println("sync decode to", len(dst), "bytes, err:", err)
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
// Remove current block.
|
2022-03-16 11:55:03 +00:00
|
|
|
d.stashDecoder()
|
2019-07-23 16:26:39 +00:00
|
|
|
d.current.decodeOutput = decodeOutput{}
|
|
|
|
d.current.err = nil
|
|
|
|
d.current.flushed = false
|
|
|
|
d.current.d = nil
|
|
|
|
|
2022-03-16 11:55:03 +00:00
|
|
|
// Ensure no-one else is still running...
|
|
|
|
d.streamWg.Wait()
|
|
|
|
if d.frame == nil {
|
|
|
|
d.frame = newFrameDec(d.o)
|
|
|
|
}
|
|
|
|
|
|
|
|
if d.o.concurrent == 1 {
|
|
|
|
return d.startSyncDecoder(r)
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
|
|
|
|
d.current.output = make(chan decodeOutput, d.o.concurrent)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
d.current.cancel = cancel
|
|
|
|
d.streamWg.Add(1)
|
|
|
|
go d.startStreamDecoder(ctx, r, d.current.output)
|
|
|
|
|
2019-07-23 16:26:39 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// drainOutput will drain the output until errEndOfStream is sent.
|
|
|
|
func (d *Decoder) drainOutput() {
|
|
|
|
if d.current.cancel != nil {
|
2022-03-16 11:55:03 +00:00
|
|
|
if debugDecoder {
|
|
|
|
println("cancelling current")
|
|
|
|
}
|
|
|
|
d.current.cancel()
|
2019-07-23 16:26:39 +00:00
|
|
|
d.current.cancel = nil
|
|
|
|
}
|
|
|
|
if d.current.d != nil {
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2019-07-23 16:26:39 +00:00
|
|
|
printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders))
|
|
|
|
}
|
|
|
|
d.decoders <- d.current.d
|
|
|
|
d.current.d = nil
|
|
|
|
d.current.b = nil
|
|
|
|
}
|
|
|
|
if d.current.output == nil || d.current.flushed {
|
|
|
|
println("current already flushed")
|
|
|
|
return
|
|
|
|
}
|
2021-04-14 11:20:56 +00:00
|
|
|
for v := range d.current.output {
|
|
|
|
if v.d != nil {
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2021-04-14 11:20:56 +00:00
|
|
|
printf("re-adding decoder %p", v.d)
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2021-04-14 11:20:56 +00:00
|
|
|
d.decoders <- v.d
|
|
|
|
}
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
d.current.output = nil
|
|
|
|
d.current.flushed = true
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// WriteTo writes data to w until there's no more data to write or when an error occurs.
|
|
|
|
// The return value n is the number of bytes written.
|
|
|
|
// Any error encountered during the write is also returned.
|
|
|
|
func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
|
|
|
|
var n int64
|
|
|
|
for {
|
|
|
|
if len(d.current.b) > 0 {
|
|
|
|
n2, err2 := w.Write(d.current.b)
|
|
|
|
n += int64(n2)
|
2021-08-12 09:38:41 +00:00
|
|
|
if err2 != nil && (d.current.err == nil || d.current.err == io.EOF) {
|
2019-07-23 16:26:39 +00:00
|
|
|
d.current.err = err2
|
2021-08-12 09:38:41 +00:00
|
|
|
} else if n2 != len(d.current.b) {
|
|
|
|
d.current.err = io.ErrShortWrite
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if d.current.err != nil {
|
|
|
|
break
|
|
|
|
}
|
2019-10-13 20:17:40 +00:00
|
|
|
d.nextBlock(true)
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
err := d.current.err
|
|
|
|
if err != nil {
|
|
|
|
d.drainOutput()
|
|
|
|
}
|
|
|
|
if err == io.EOF {
|
|
|
|
err = nil
|
|
|
|
}
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// DecodeAll allows stateless decoding of a blob of bytes.
|
|
|
|
// Output will be appended to dst, so if the destination size is known
|
|
|
|
// you can pre-allocate the destination slice to avoid allocations.
|
|
|
|
// DecodeAll can be used concurrently.
|
|
|
|
// The Decoder concurrency limits will be respected.
|
|
|
|
func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
2022-03-16 11:55:03 +00:00
|
|
|
if d.decoders == nil {
|
2019-07-23 16:26:39 +00:00
|
|
|
return dst, ErrDecoderClosed
|
|
|
|
}
|
|
|
|
|
|
|
|
// Grab a block decoder and frame decoder.
|
2020-06-05 20:51:30 +00:00
|
|
|
block := <-d.decoders
|
|
|
|
frame := block.localFrame
|
2019-07-23 16:26:39 +00:00
|
|
|
defer func() {
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2019-07-23 16:26:39 +00:00
|
|
|
printf("re-adding decoder: %p", block)
|
|
|
|
}
|
|
|
|
frame.rawInput = nil
|
2019-08-05 07:33:21 +00:00
|
|
|
frame.bBuf = nil
|
2022-03-16 11:55:03 +00:00
|
|
|
if frame.history.decoders.br != nil {
|
|
|
|
frame.history.decoders.br.in = nil
|
|
|
|
}
|
2020-06-05 20:51:30 +00:00
|
|
|
d.decoders <- block
|
2019-07-23 16:26:39 +00:00
|
|
|
}()
|
2019-08-05 07:33:21 +00:00
|
|
|
frame.bBuf = input
|
2019-07-23 16:26:39 +00:00
|
|
|
|
|
|
|
for {
|
2020-06-02 21:10:44 +00:00
|
|
|
frame.history.reset()
|
2019-08-05 07:33:21 +00:00
|
|
|
err := frame.reset(&frame.bBuf)
|
2022-03-16 11:55:03 +00:00
|
|
|
if err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
if debugDecoder {
|
|
|
|
println("frame reset return EOF")
|
|
|
|
}
|
|
|
|
return dst, nil
|
2020-09-08 12:19:47 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
return dst, err
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2020-06-02 21:10:44 +00:00
|
|
|
if frame.DictionaryID != nil {
|
|
|
|
dict, ok := d.dicts[*frame.DictionaryID]
|
|
|
|
if !ok {
|
|
|
|
return nil, ErrUnknownDictionary
|
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
if debugDecoder {
|
|
|
|
println("setting dict", frame.DictionaryID)
|
|
|
|
}
|
2020-06-02 21:10:44 +00:00
|
|
|
frame.history.setDict(&dict)
|
|
|
|
}
|
2022-05-02 13:00:32 +00:00
|
|
|
if frame.WindowSize > d.o.maxWindowSize {
|
|
|
|
return dst, ErrWindowSizeExceeded
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2022-05-02 13:00:32 +00:00
|
|
|
if frame.FrameContentSize != fcsUnknown {
|
|
|
|
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
|
|
|
return dst, ErrDecoderSizeExceeded
|
|
|
|
}
|
2020-11-16 18:53:10 +00:00
|
|
|
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
|
2022-05-02 13:00:32 +00:00
|
|
|
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc)
|
2019-07-23 16:26:39 +00:00
|
|
|
copy(dst2, dst)
|
|
|
|
dst = dst2
|
|
|
|
}
|
|
|
|
}
|
2022-05-02 13:00:32 +00:00
|
|
|
|
2019-09-05 21:45:26 +00:00
|
|
|
if cap(dst) == 0 {
|
2020-11-16 18:53:10 +00:00
|
|
|
// Allocate len(input) * 2 by default if nothing is provided
|
|
|
|
// and we didn't get frame content size.
|
|
|
|
size := len(input) * 2
|
2019-09-05 21:45:26 +00:00
|
|
|
// Cap to 1 MB.
|
|
|
|
if size > 1<<20 {
|
|
|
|
size = 1 << 20
|
|
|
|
}
|
2020-11-16 18:53:10 +00:00
|
|
|
if uint64(size) > d.o.maxDecodedSize {
|
|
|
|
size = int(d.o.maxDecodedSize)
|
|
|
|
}
|
2020-03-12 15:32:07 +00:00
|
|
|
dst = make([]byte, 0, size)
|
2019-09-05 21:45:26 +00:00
|
|
|
}
|
|
|
|
|
2019-07-23 16:26:39 +00:00
|
|
|
dst, err = frame.runDecoder(dst, block)
|
|
|
|
if err != nil {
|
|
|
|
return dst, err
|
|
|
|
}
|
2019-08-05 07:33:21 +00:00
|
|
|
if len(frame.bBuf) == 0 {
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2020-09-08 12:19:47 +00:00
|
|
|
println("frame dbuf empty")
|
|
|
|
}
|
2019-07-23 16:26:39 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return dst, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// nextBlock returns the next block.
|
|
|
|
// If an error occurs d.err will be set.
|
2019-10-13 20:17:40 +00:00
|
|
|
// Optionally the function can block for new output.
|
|
|
|
// If non-blocking mode is used the returned boolean will be false
|
|
|
|
// if no data was available without blocking.
|
|
|
|
func (d *Decoder) nextBlock(blocking bool) (ok bool) {
|
2019-07-23 16:26:39 +00:00
|
|
|
if d.current.err != nil {
|
|
|
|
// Keep error state.
|
2022-03-16 11:55:03 +00:00
|
|
|
return false
|
2019-10-13 20:17:40 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
d.current.b = d.current.b[:0]
|
2019-10-13 20:17:40 +00:00
|
|
|
|
2022-03-16 11:55:03 +00:00
|
|
|
// SYNC:
|
|
|
|
if d.syncStream.enabled {
|
|
|
|
if !blocking {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
ok = d.nextBlockSync()
|
|
|
|
if !ok {
|
|
|
|
d.stashDecoder()
|
|
|
|
}
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
|
|
|
//ASYNC:
|
|
|
|
d.stashDecoder()
|
2019-10-13 20:17:40 +00:00
|
|
|
if blocking {
|
2022-03-16 11:55:03 +00:00
|
|
|
d.current.decodeOutput, ok = <-d.current.output
|
2019-10-13 20:17:40 +00:00
|
|
|
} else {
|
|
|
|
select {
|
2022-03-16 11:55:03 +00:00
|
|
|
case d.current.decodeOutput, ok = <-d.current.output:
|
2019-10-13 20:17:40 +00:00
|
|
|
default:
|
|
|
|
return false
|
|
|
|
}
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
if !ok {
|
|
|
|
// This should not happen, so signal error state...
|
|
|
|
d.current.err = io.ErrUnexpectedEOF
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
next := d.current.decodeOutput
|
|
|
|
if next.d != nil && next.d.async.newHist != nil {
|
|
|
|
d.current.crc.Reset()
|
|
|
|
}
|
2021-06-08 12:42:57 +00:00
|
|
|
if debugDecoder {
|
2022-03-16 11:55:03 +00:00
|
|
|
var tmp [4]byte
|
|
|
|
binary.LittleEndian.PutUint32(tmp[:], uint32(xxhash.Sum64(next.b)))
|
|
|
|
println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
|
|
|
|
}
|
|
|
|
|
2022-05-06 22:48:35 +00:00
|
|
|
if !d.o.ignoreChecksum && len(next.b) > 0 {
|
2022-03-16 11:55:03 +00:00
|
|
|
n, err := d.current.crc.Write(next.b)
|
|
|
|
if err == nil {
|
|
|
|
if n != len(next.b) {
|
|
|
|
d.current.err = io.ErrShortWrite
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
|
|
|
|
got := d.current.crc.Sum64()
|
|
|
|
var tmp [4]byte
|
|
|
|
binary.LittleEndian.PutUint32(tmp[:], uint32(got))
|
2022-05-06 22:48:35 +00:00
|
|
|
if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) && !ignoreCRC {
|
2022-03-16 11:55:03 +00:00
|
|
|
if debugDecoder {
|
|
|
|
println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
|
|
|
|
}
|
|
|
|
d.current.err = ErrCRCMismatch
|
|
|
|
} else {
|
|
|
|
if debugDecoder {
|
|
|
|
println("CRC ok", tmp[:])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *Decoder) nextBlockSync() (ok bool) {
|
|
|
|
if d.current.d == nil {
|
|
|
|
d.current.d = <-d.decoders
|
|
|
|
}
|
|
|
|
for len(d.current.b) == 0 {
|
|
|
|
if !d.syncStream.inFrame {
|
|
|
|
d.frame.history.reset()
|
|
|
|
d.current.err = d.frame.reset(&d.syncStream.br)
|
|
|
|
if d.current.err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if d.frame.DictionaryID != nil {
|
|
|
|
dict, ok := d.dicts[*d.frame.DictionaryID]
|
|
|
|
if !ok {
|
|
|
|
d.current.err = ErrUnknownDictionary
|
|
|
|
return false
|
|
|
|
} else {
|
|
|
|
d.frame.history.setDict(&dict)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize {
|
|
|
|
d.current.err = ErrDecoderSizeExceeded
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
d.syncStream.decodedFrame = 0
|
|
|
|
d.syncStream.inFrame = true
|
|
|
|
}
|
|
|
|
d.current.err = d.frame.next(d.current.d)
|
|
|
|
if d.current.err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
d.frame.history.ensureBlock()
|
|
|
|
if debugDecoder {
|
|
|
|
println("History trimmed:", len(d.frame.history.b), "decoded already:", d.syncStream.decodedFrame)
|
|
|
|
}
|
|
|
|
histBefore := len(d.frame.history.b)
|
|
|
|
d.current.err = d.current.d.decodeBuf(&d.frame.history)
|
|
|
|
|
|
|
|
if d.current.err != nil {
|
|
|
|
println("error after:", d.current.err)
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
d.current.b = d.frame.history.b[histBefore:]
|
|
|
|
if debugDecoder {
|
|
|
|
println("history after:", len(d.frame.history.b))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check frame size (before CRC)
|
|
|
|
d.syncStream.decodedFrame += uint64(len(d.current.b))
|
|
|
|
if d.syncStream.decodedFrame > d.frame.FrameContentSize {
|
|
|
|
if debugDecoder {
|
|
|
|
printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
|
|
|
|
}
|
|
|
|
d.current.err = ErrFrameSizeExceeded
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check FCS
|
|
|
|
if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize {
|
|
|
|
if debugDecoder {
|
|
|
|
printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
|
|
|
|
}
|
|
|
|
d.current.err = ErrFrameSizeMismatch
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update/Check CRC
|
2022-05-06 22:48:35 +00:00
|
|
|
if !d.o.ignoreChecksum && d.frame.HasCheckSum {
|
2022-03-16 11:55:03 +00:00
|
|
|
d.frame.crc.Write(d.current.b)
|
|
|
|
if d.current.d.Last {
|
|
|
|
d.current.err = d.frame.checkCRC()
|
|
|
|
if d.current.err != nil {
|
|
|
|
println("CRC error:", d.current.err)
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
d.syncStream.inFrame = !d.current.d.Last
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2019-10-13 20:17:40 +00:00
|
|
|
return true
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
|
2022-03-16 11:55:03 +00:00
|
|
|
func (d *Decoder) stashDecoder() {
|
|
|
|
if d.current.d != nil {
|
|
|
|
if debugDecoder {
|
|
|
|
printf("re-adding current decoder %p", d.current.d)
|
|
|
|
}
|
|
|
|
d.decoders <- d.current.d
|
|
|
|
d.current.d = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-23 16:26:39 +00:00
|
|
|
// Close will release all resources.
|
|
|
|
// It is NOT possible to reuse the decoder after this.
|
|
|
|
func (d *Decoder) Close() {
|
|
|
|
if d.current.err == ErrDecoderClosed {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
d.drainOutput()
|
2022-03-16 11:55:03 +00:00
|
|
|
if d.current.cancel != nil {
|
|
|
|
d.current.cancel()
|
2019-07-23 16:26:39 +00:00
|
|
|
d.streamWg.Wait()
|
2022-03-16 11:55:03 +00:00
|
|
|
d.current.cancel = nil
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
if d.decoders != nil {
|
|
|
|
close(d.decoders)
|
|
|
|
for dec := range d.decoders {
|
|
|
|
dec.Close()
|
|
|
|
}
|
|
|
|
d.decoders = nil
|
|
|
|
}
|
|
|
|
if d.current.d != nil {
|
|
|
|
d.current.d.Close()
|
|
|
|
d.current.d = nil
|
|
|
|
}
|
|
|
|
d.current.err = ErrDecoderClosed
|
|
|
|
}
|
|
|
|
|
2020-01-16 12:14:19 +00:00
|
|
|
// IOReadCloser returns the decoder as an io.ReadCloser for convenience.
|
|
|
|
// Any changes to the decoder will be reflected, so the returned ReadCloser
|
|
|
|
// can be reused along with the decoder.
|
|
|
|
// io.WriterTo is also supported by the returned ReadCloser.
|
|
|
|
func (d *Decoder) IOReadCloser() io.ReadCloser {
|
|
|
|
return closeWrapper{d: d}
|
|
|
|
}
|
|
|
|
|
|
|
|
// closeWrapper wraps a function call as a closer.
|
|
|
|
type closeWrapper struct {
|
|
|
|
d *Decoder
|
|
|
|
}
|
|
|
|
|
|
|
|
// WriteTo forwards WriteTo calls to the decoder.
|
|
|
|
func (c closeWrapper) WriteTo(w io.Writer) (n int64, err error) {
|
|
|
|
return c.d.WriteTo(w)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read forwards read calls to the decoder.
|
|
|
|
func (c closeWrapper) Read(p []byte) (n int, err error) {
|
|
|
|
return c.d.Read(p)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close closes the decoder.
|
|
|
|
func (c closeWrapper) Close() error {
|
|
|
|
c.d.Close()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-07-23 16:26:39 +00:00
|
|
|
type decodeOutput struct {
|
|
|
|
d *blockDec
|
|
|
|
b []byte
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
2022-03-16 11:55:03 +00:00
|
|
|
func (d *Decoder) startSyncDecoder(r io.Reader) error {
|
|
|
|
d.frame.history.reset()
|
|
|
|
d.syncStream.br = readerWrapper{r: r}
|
|
|
|
d.syncStream.inFrame = false
|
|
|
|
d.syncStream.enabled = true
|
|
|
|
d.syncStream.decodedFrame = 0
|
|
|
|
return nil
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create Decoder:
|
2022-03-16 11:55:03 +00:00
|
|
|
// ASYNC:
|
|
|
|
// Spawn 4 go routines.
|
|
|
|
// 0: Read frames and decode blocks.
|
|
|
|
// 1: Decode block and literals. Receives hufftree and seqdecs, returns seqdecs and huff tree.
|
|
|
|
// 2: Wait for recentOffsets if needed. Decode sequences, send recentOffsets.
|
|
|
|
// 3: Wait for stream history, execute sequences, send stream history.
|
|
|
|
func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) {
|
2019-07-23 16:26:39 +00:00
|
|
|
defer d.streamWg.Done()
|
2022-03-16 11:55:03 +00:00
|
|
|
br := readerWrapper{r: r}
|
|
|
|
|
|
|
|
var seqPrepare = make(chan *blockDec, d.o.concurrent)
|
|
|
|
var seqDecode = make(chan *blockDec, d.o.concurrent)
|
|
|
|
var seqExecute = make(chan *blockDec, d.o.concurrent)
|
|
|
|
|
|
|
|
// Async 1: Prepare blocks...
|
|
|
|
go func() {
|
|
|
|
var hist history
|
|
|
|
var hasErr bool
|
|
|
|
for block := range seqPrepare {
|
|
|
|
if hasErr {
|
|
|
|
if block != nil {
|
|
|
|
seqDecode <- block
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if block.async.newHist != nil {
|
|
|
|
if debugDecoder {
|
|
|
|
println("Async 1: new history")
|
|
|
|
}
|
|
|
|
hist.reset()
|
|
|
|
if block.async.newHist.dict != nil {
|
|
|
|
hist.setDict(block.async.newHist.dict)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if block.err != nil || block.Type != blockTypeCompressed {
|
|
|
|
hasErr = block.err != nil
|
|
|
|
seqDecode <- block
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
remain, err := block.decodeLiterals(block.data, &hist)
|
|
|
|
block.err = err
|
|
|
|
hasErr = block.err != nil
|
|
|
|
if err == nil {
|
|
|
|
block.async.literals = hist.decoders.literals
|
|
|
|
block.async.seqData = remain
|
|
|
|
} else if debugDecoder {
|
|
|
|
println("decodeLiterals error:", err)
|
|
|
|
}
|
|
|
|
seqDecode <- block
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
close(seqDecode)
|
|
|
|
}()
|
|
|
|
|
|
|
|
// Async 2: Decode sequences...
|
|
|
|
go func() {
|
|
|
|
var hist history
|
|
|
|
var hasErr bool
|
|
|
|
|
|
|
|
for block := range seqDecode {
|
|
|
|
if hasErr {
|
|
|
|
if block != nil {
|
|
|
|
seqExecute <- block
|
|
|
|
}
|
|
|
|
continue
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
if block.async.newHist != nil {
|
|
|
|
if debugDecoder {
|
|
|
|
println("Async 2: new history, recent:", block.async.newHist.recentOffsets)
|
|
|
|
}
|
|
|
|
hist.decoders = block.async.newHist.decoders
|
|
|
|
hist.recentOffsets = block.async.newHist.recentOffsets
|
|
|
|
hist.windowSize = block.async.newHist.windowSize
|
|
|
|
if block.async.newHist.dict != nil {
|
|
|
|
hist.setDict(block.async.newHist.dict)
|
2020-06-02 21:10:44 +00:00
|
|
|
}
|
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
if block.err != nil || block.Type != blockTypeCompressed {
|
|
|
|
hasErr = block.err != nil
|
|
|
|
seqExecute <- block
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
hist.decoders.literals = block.async.literals
|
|
|
|
block.err = block.prepareSequences(block.async.seqData, &hist)
|
|
|
|
if debugDecoder && block.err != nil {
|
|
|
|
println("prepareSequences returned:", block.err)
|
|
|
|
}
|
|
|
|
hasErr = block.err != nil
|
|
|
|
if block.err == nil {
|
|
|
|
block.err = block.decodeSequences(&hist)
|
|
|
|
if debugDecoder && block.err != nil {
|
|
|
|
println("decodeSequences returned:", block.err)
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
hasErr = block.err != nil
|
|
|
|
// block.async.sequence = hist.decoders.seq[:hist.decoders.nSeqs]
|
|
|
|
block.async.seqSize = hist.decoders.seqSize
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
seqExecute <- block
|
|
|
|
}
|
|
|
|
close(seqExecute)
|
|
|
|
}()
|
|
|
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
wg.Add(1)
|
|
|
|
|
|
|
|
// Async 3: Execute sequences...
|
|
|
|
frameHistCache := d.frame.history.b
|
|
|
|
go func() {
|
|
|
|
var hist history
|
|
|
|
var decodedFrame uint64
|
|
|
|
var fcs uint64
|
|
|
|
var hasErr bool
|
|
|
|
for block := range seqExecute {
|
|
|
|
out := decodeOutput{err: block.err, d: block}
|
|
|
|
if block.err != nil || hasErr {
|
|
|
|
hasErr = true
|
|
|
|
output <- out
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if block.async.newHist != nil {
|
|
|
|
if debugDecoder {
|
|
|
|
println("Async 3: new history")
|
|
|
|
}
|
|
|
|
hist.windowSize = block.async.newHist.windowSize
|
|
|
|
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
|
|
|
|
if block.async.newHist.dict != nil {
|
|
|
|
hist.setDict(block.async.newHist.dict)
|
|
|
|
}
|
|
|
|
|
|
|
|
if cap(hist.b) < hist.allocFrameBuffer {
|
|
|
|
if cap(frameHistCache) >= hist.allocFrameBuffer {
|
|
|
|
hist.b = frameHistCache
|
|
|
|
} else {
|
|
|
|
hist.b = make([]byte, 0, hist.allocFrameBuffer)
|
|
|
|
println("Alloc history sized", hist.allocFrameBuffer)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
hist.b = hist.b[:0]
|
|
|
|
fcs = block.async.fcs
|
|
|
|
decodedFrame = 0
|
|
|
|
}
|
|
|
|
do := decodeOutput{err: block.err, d: block}
|
|
|
|
switch block.Type {
|
|
|
|
case blockTypeRLE:
|
|
|
|
if debugDecoder {
|
|
|
|
println("add rle block length:", block.RLESize)
|
|
|
|
}
|
|
|
|
|
|
|
|
if cap(block.dst) < int(block.RLESize) {
|
|
|
|
if block.lowMem {
|
|
|
|
block.dst = make([]byte, block.RLESize)
|
|
|
|
} else {
|
|
|
|
block.dst = make([]byte, maxBlockSize)
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
block.dst = block.dst[:block.RLESize]
|
|
|
|
v := block.data[0]
|
|
|
|
for i := range block.dst {
|
|
|
|
block.dst[i] = v
|
|
|
|
}
|
|
|
|
hist.append(block.dst)
|
|
|
|
do.b = block.dst
|
|
|
|
case blockTypeRaw:
|
|
|
|
if debugDecoder {
|
|
|
|
println("add raw block length:", len(block.data))
|
|
|
|
}
|
|
|
|
hist.append(block.data)
|
|
|
|
do.b = block.data
|
|
|
|
case blockTypeCompressed:
|
|
|
|
if debugDecoder {
|
|
|
|
println("execute with history length:", len(hist.b), "window:", hist.windowSize)
|
|
|
|
}
|
|
|
|
hist.decoders.seqSize = block.async.seqSize
|
|
|
|
hist.decoders.literals = block.async.literals
|
|
|
|
do.err = block.executeSequences(&hist)
|
|
|
|
hasErr = do.err != nil
|
|
|
|
if debugDecoder && hasErr {
|
|
|
|
println("executeSequences returned:", do.err)
|
|
|
|
}
|
|
|
|
do.b = block.dst
|
|
|
|
}
|
|
|
|
if !hasErr {
|
|
|
|
decodedFrame += uint64(len(do.b))
|
|
|
|
if decodedFrame > fcs {
|
|
|
|
println("fcs exceeded", block.Last, fcs, decodedFrame)
|
|
|
|
do.err = ErrFrameSizeExceeded
|
|
|
|
hasErr = true
|
|
|
|
} else if block.Last && fcs != fcsUnknown && decodedFrame != fcs {
|
|
|
|
do.err = ErrFrameSizeMismatch
|
|
|
|
hasErr = true
|
|
|
|
} else {
|
|
|
|
if debugDecoder {
|
|
|
|
println("fcs ok", block.Last, fcs, decodedFrame)
|
|
|
|
}
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
output <- do
|
|
|
|
}
|
|
|
|
close(output)
|
|
|
|
frameHistCache = hist.b
|
|
|
|
wg.Done()
|
|
|
|
if debugDecoder {
|
|
|
|
println("decoder goroutines finished")
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
decodeStream:
|
|
|
|
for {
|
|
|
|
frame := d.frame
|
|
|
|
if debugDecoder {
|
|
|
|
println("New frame...")
|
|
|
|
}
|
|
|
|
var historySent bool
|
|
|
|
frame.history.reset()
|
|
|
|
err := frame.reset(&br)
|
|
|
|
if debugDecoder && err != nil {
|
|
|
|
println("Frame decoder returned", err)
|
|
|
|
}
|
|
|
|
if err == nil && frame.DictionaryID != nil {
|
|
|
|
dict, ok := d.dicts[*frame.DictionaryID]
|
|
|
|
if !ok {
|
|
|
|
err = ErrUnknownDictionary
|
|
|
|
} else {
|
|
|
|
frame.history.setDict(&dict)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
|
|
|
|
err = ErrDecoderSizeExceeded
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
case dec := <-d.decoders:
|
|
|
|
dec.sendErr(err)
|
|
|
|
seqPrepare <- dec
|
|
|
|
}
|
|
|
|
break decodeStream
|
|
|
|
}
|
|
|
|
|
|
|
|
// Go through all blocks of the frame.
|
|
|
|
for {
|
|
|
|
var dec *blockDec
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
break decodeStream
|
|
|
|
case dec = <-d.decoders:
|
|
|
|
// Once we have a decoder, we MUST return it.
|
|
|
|
}
|
|
|
|
err := frame.next(dec)
|
|
|
|
if !historySent {
|
|
|
|
h := frame.history
|
|
|
|
if debugDecoder {
|
|
|
|
println("Alloc History:", h.allocFrameBuffer)
|
|
|
|
}
|
|
|
|
dec.async.newHist = &h
|
|
|
|
dec.async.fcs = frame.FrameContentSize
|
|
|
|
historySent = true
|
|
|
|
} else {
|
|
|
|
dec.async.newHist = nil
|
|
|
|
}
|
|
|
|
if debugDecoder && err != nil {
|
|
|
|
println("next block returned error:", err)
|
|
|
|
}
|
|
|
|
dec.err = err
|
|
|
|
dec.checkCRC = nil
|
|
|
|
if dec.Last && frame.HasCheckSum && err == nil {
|
|
|
|
crc, err := frame.rawInput.readSmall(4)
|
|
|
|
if err != nil {
|
|
|
|
println("CRC missing?", err)
|
|
|
|
dec.err = err
|
|
|
|
}
|
|
|
|
var tmp [4]byte
|
|
|
|
copy(tmp[:], crc)
|
|
|
|
dec.checkCRC = tmp[:]
|
|
|
|
if debugDecoder {
|
|
|
|
println("found crc to check:", dec.checkCRC)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
err = dec.err
|
|
|
|
last := dec.Last
|
|
|
|
seqPrepare <- dec
|
|
|
|
if err != nil {
|
|
|
|
break decodeStream
|
|
|
|
}
|
|
|
|
if last {
|
|
|
|
break
|
|
|
|
}
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|
|
|
|
}
|
2022-03-16 11:55:03 +00:00
|
|
|
close(seqPrepare)
|
|
|
|
wg.Wait()
|
|
|
|
d.frame.history.b = frameHistCache
|
2019-07-23 16:26:39 +00:00
|
|
|
}
|