From 356845d7164f7d573a677dacfabf0c8b1d69f98d Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 5 Jun 2020 23:51:30 +0300 Subject: [PATCH] vendor: update github.com/klauspost/compress from v1.10.7 to v1.10.8 --- go.mod | 2 +- go.sum | 4 +- .../klauspost/compress/fse/bitreader.go | 27 +- .../klauspost/compress/fse/bytereader.go | 13 +- .../klauspost/compress/huff0/README.md | 6 +- .../klauspost/compress/huff0/bitreader.go | 256 +++++- .../klauspost/compress/huff0/decompress.go | 784 ++++++++++++++++-- .../klauspost/compress/zstd/bitreader.go | 25 +- .../klauspost/compress/zstd/blockdec.go | 4 + .../klauspost/compress/zstd/bytereader.go | 25 +- .../klauspost/compress/zstd/decoder.go | 15 +- .../klauspost/compress/zstd/fse_decoder.go | 19 +- vendor/modules.txt | 2 +- 13 files changed, 1034 insertions(+), 148 deletions(-) diff --git a/go.mod b/go.mod index 2b72b280e..c0bc753b7 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/cespare/xxhash/v2 v2.1.1 github.com/golang/protobuf v1.4.2 // indirect github.com/golang/snappy v0.0.1 - github.com/klauspost/compress v1.10.7 + github.com/klauspost/compress v1.10.8 github.com/lithammer/go-jump-consistent-hash v1.0.1 github.com/valyala/fastjson v1.5.1 github.com/valyala/fastrand v1.0.0 diff --git a/go.sum b/go.sum index c992bc81a..c39619762 100644 --- a/go.sum +++ b/go.sum @@ -132,8 +132,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.5 h1:7q6vHIqubShURwQz8cQK6yIe/xC3IF0Vm7TGfqjewrc= github.com/klauspost/compress v1.10.5/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.10.7 h1:7rix8v8GpI3ZBb0nSozFRgbtXKv+hOe+qfEpZqybrAg= -github.com/klauspost/compress v1.10.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.10.8 h1:eLeJ3dr/Y9+XRfJT4l+8ZjmtB5RPJhucH2HeCV5+IZY= +github.com/klauspost/compress v1.10.8/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= diff --git a/vendor/github.com/klauspost/compress/fse/bitreader.go b/vendor/github.com/klauspost/compress/fse/bitreader.go index b9db204f5..f65eb3909 100644 --- a/vendor/github.com/klauspost/compress/fse/bitreader.go +++ b/vendor/github.com/klauspost/compress/fse/bitreader.go @@ -6,6 +6,7 @@ package fse import ( + "encoding/binary" "errors" "io" ) @@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBits(uint32(v))) return nil } @@ -63,8 +68,9 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. - v := b.in[b.off-4 : b.off] + // 2 bounds checks. + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -77,7 +83,8 @@ func (b *bitReader) fill() { return } if b.off > 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -91,9 +98,17 @@ func (b *bitReader) fill() { } } +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // finished returns true if all bits have been read from the bit stream. func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 + return b.bitsRead >= 64 && b.off == 0 } // close the bitstream and returns an error if out-of-buffer reads occurred. diff --git a/vendor/github.com/klauspost/compress/fse/bytereader.go b/vendor/github.com/klauspost/compress/fse/bytereader.go index f228a46cd..abade2d60 100644 --- a/vendor/github.com/klauspost/compress/fse/bytereader.go +++ b/vendor/github.com/klauspost/compress/fse/bytereader.go @@ -25,19 +25,10 @@ func (b *byteReader) advance(n uint) { b.off += int(n) } -// Int32 returns a little endian int32 starting at current offset. -func (b byteReader) Int32() int32 { - b2 := b.b[b.off : b.off+4 : b.off+4] - v3 := int32(b2[3]) - v2 := int32(b2[2]) - v1 := int32(b2[1]) - v0 := int32(b2[0]) - return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) -} - // Uint32 returns a little endian uint32 starting at current offset. func (b byteReader) Uint32() uint32 { - b2 := b.b[b.off : b.off+4 : b.off+4] + b2 := b.b[b.off:] + b2 = b2[:4] v3 := uint32(b2[3]) v2 := uint32(b2[2]) v1 := uint32(b2[1]) diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md index 0a8448ce9..e12da4db2 100644 --- a/vendor/github.com/klauspost/compress/huff0/README.md +++ b/vendor/github.com/klauspost/compress/huff0/README.md @@ -12,8 +12,6 @@ but it can be used as a secondary step to compressors (like Snappy) that does no * [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0) -THIS PACKAGE IS NOT CONSIDERED STABLE AND API OR ENCODING MAY CHANGE IN THE FUTURE. - ## News * Mar 2018: First implementation released. Consider this beta software for now. @@ -75,6 +73,8 @@ which can be given to the decompressor. Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X) or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function. +For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size. + You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back your input was likely corrupted. @@ -84,4 +84,4 @@ There are no integrity checks, so relying on errors from the decompressor does n # Contributing Contributions are always welcome. Be aware that adding public functions will require good justification and breaking -changes will likely not be accepted. If in doubt open an issue before writing the PR. \ No newline at end of file +changes will likely not be accepted. If in doubt open an issue before writing the PR. diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go index 7d0903c70..a4979e886 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitreader.go +++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go @@ -6,6 +6,7 @@ package huff0 import ( + "encoding/binary" "errors" "io" ) @@ -34,29 +35,16 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBit32(uint32(v))) return nil } -// getBits will return n bits. n can be 0. -func (b *bitReader) getBits(n uint8) uint16 { - if n == 0 || b.bitsRead >= 64 { - return 0 - } - return b.getBitsFast(n) -} - -// getBitsFast requires that at least one bit is requested every time. -// There are no checks if the buffer is filled. -func (b *bitReader) getBitsFast(n uint8) uint16 { - const regMask = 64 - 1 - v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) - b.bitsRead += n - return v -} - // peekBitsFast requires that at least one bit is requested every time. // There are no checks if the buffer is filled. func (b *bitReader) peekBitsFast(n uint8) uint16 { @@ -71,21 +59,36 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. + + // 2 bounds checks. v := b.in[b.off-4 : b.off] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 b.off -= 4 } +func (b *bitReader) advance(n uint8) { + b.bitsRead += n +} + +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // fill() will make sure at least 32 bits are available. func (b *bitReader) fill() { if b.bitsRead < 32 { return } if b.off > 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 @@ -113,3 +116,214 @@ func (b *bitReader) close() error { } return nil } + +// bitReader reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReaderBytes struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReaderBytes) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } + b.advance(8 - uint8(highBit32(uint32(v)))) + return nil +} + +// peekBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReaderBytes) peekByteFast() uint8 { + got := uint8(b.value >> 56) + return got +} + +func (b *bitReaderBytes) advance(n uint8) { + b.bitsRead += n + b.value <<= n & 63 +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReaderBytes) fillFast() { + if b.bitsRead < 32 { + return + } + + // 2 bounds checks. + v := b.in[b.off-4 : b.off] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << (b.bitsRead - 32) + b.bitsRead -= 32 + b.off -= 4 +} + +// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read. +func (b *bitReaderBytes) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReaderBytes) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << (b.bitsRead - 32) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReaderBytes) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReaderBytes) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} + +// bitReaderShifted reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReaderShifted struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReaderShifted) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } + b.advance(8 - uint8(highBit32(uint32(v)))) + return nil +} + +// peekBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 { + return uint16(b.value >> ((64 - n) & 63)) +} + +func (b *bitReaderShifted) advance(n uint8) { + b.bitsRead += n + b.value <<= n & 63 +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReaderShifted) fillFast() { + if b.bitsRead < 32 { + return + } + + // 2 bounds checks. + v := b.in[b.off-4 : b.off] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << ((b.bitsRead - 32) & 63) + b.bitsRead -= 32 + b.off -= 4 +} + +// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read. +func (b *bitReaderShifted) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReaderShifted) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value |= uint64(low) << ((b.bitsRead - 32) & 63) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReaderShifted) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReaderShifted) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index fb42a398b..a03b2634a 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -25,6 +25,9 @@ type dEntryDouble struct { len uint8 } +// Uses special code for all tables that are < 8 bits. +const use8BitTables = true + // ReadTable will read a table from the input. // The size of the input may be larger than the table definition. // Any content remaining after the table definition will be returned. @@ -83,6 +86,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { } v2 := v & 15 rankStats[v2]++ + // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0. weightTotal += (1 << v2) >> 1 } if weightTotal == 0 { @@ -142,12 +146,14 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { d := dEntrySingle{ entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8), } - single := s.dt.single[rankStats[w] : rankStats[w]+length] + rank := &rankStats[w] + single := s.dt.single[*rank : *rank+length] for i := range single { single[i] = d } - rankStats[w] += length + *rank += length } + return s, in, nil } @@ -208,7 +214,10 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { if len(d.dt.single) == 0 { return nil, errors.New("no table loaded") } - var br bitReader + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress1X8Bit(dst, src) + } + var br bitReaderShifted err := br.init(src) if err != nil { return dst, err @@ -216,17 +225,6 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { maxDecodedSize := cap(dst) dst = dst[:0] - decode := func() byte { - val := br.peekBitsFast(d.actualTableLog) /* note : actualTableLog >= 1 */ - v := d.dt.single[val] - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) - } - hasDec := func(v dEntrySingle) byte { - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) - } - // Avoid bounds check by always having full sized table. const tlSize = 1 << tableLogMax const tlMask = tlSize - 1 @@ -238,11 +236,25 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { for br.off >= 8 { br.fillFast() - buf[off+0] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) - buf[off+1] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) + v := dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + // Refill br.fillFast() - buf[off+2] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) - buf[off+3] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekBitsFast(d.actualTableLog)&tlMask] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + off += 4 if off == 0 { if len(dst)+256 > maxDecodedSize { @@ -259,13 +271,196 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { } dst = append(dst, buf[:off]...) - for !br.finished() { + // br < 8, so uint8 is fine + bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead + for bitsLeft > 0 { br.fill() + if false && br.bitsRead >= 32 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value = (br.value << 32) | uint64(low) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value = (br.value << 8) | uint64(br.in[br.off-1]) + br.bitsRead -= 8 + br.off-- + } + } + } if len(dst) >= maxDecodedSize { br.close() return nil, ErrMaxDecodedSizeExceeded } - dst = append(dst, decode()) + v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= nBits + dst = append(dst, uint8(v.entry>>8)) + } + return dst, br.close() +} + +// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) { + if d.actualTableLog == 8 { + return d.decompress1X8BitExactly(dst, src) + } + var br bitReaderBytes + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:0] + + // Avoid bounds check by always having full sized table. + dt := d.dt.single[:256] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + + shift := (8 - d.actualTableLog) & 7 + + //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) + for br.off >= 4 { + br.fillFast() + v := dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + + if len(dst)+int(off) > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:off]...) + + // br < 4, so uint8 is fine + bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) + for bitsLeft > 0 { + if br.bitsRead >= 64-8 { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + if len(dst) >= maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + v := dt[br.peekByteFast()>>shift] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= int8(nBits) + dst = append(dst, uint8(v.entry>>8)) + } + return dst, br.close() +} + +// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8. +// The cap of the output buffer will be the maximum decompressed size. +// The length of the supplied input must match the end of a block exactly. +func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) { + var br bitReaderBytes + err := br.init(src) + if err != nil { + return dst, err + } + maxDecodedSize := cap(dst) + dst = dst[:0] + + // Avoid bounds check by always having full sized table. + dt := d.dt.single[:256] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + + const shift = 0 + + //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog) + for br.off >= 4 { + br.fillFast() + v := dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+0] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+1] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+2] = uint8(v.entry >> 8) + + v = dt[br.peekByteFast()>>shift] + br.advance(uint8(v.entry)) + buf[off+3] = uint8(v.entry >> 8) + + off += 4 + if off == 0 { + if len(dst)+256 > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:]...) + } + } + + if len(dst)+int(off) > maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + dst = append(dst, buf[:off]...) + + // br < 4, so uint8 is fine + bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead)) + for bitsLeft > 0 { + if br.bitsRead >= 64-8 { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + if len(dst) >= maxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + v := dt[br.peekByteFast()>>shift] + nBits := uint8(v.entry) + br.advance(nBits) + bitsLeft -= int8(nBits) + dst = append(dst, uint8(v.entry>>8)) } return dst, br.close() } @@ -274,15 +469,18 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { // The length of the supplied input must match the end of a block exactly. // The *capacity* of the dst slice must match the destination size of // the uncompressed data exactly. -func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { - if len(s.dt.single) == 0 { +func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { + if len(d.dt.single) == 0 { return nil, errors.New("no table loaded") } if len(src) < 6+(4*1) { return nil, errors.New("input too small") } + if use8BitTables && d.actualTableLog <= 8 { + return d.decompress4X8bit(dst, src) + } - var br [4]bitReader + var br [4]bitReaderShifted start := 6 for i := 0; i < 3; i++ { length := int(src[i*2]) | (int(src[i*2+1]) << 8) @@ -308,14 +506,7 @@ func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { const tlSize = 1 << tableLogMax const tlMask = tlSize - 1 - single := s.dt.single[:tlSize] - - decode := func(br *bitReader) byte { - val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ - v := single[val&tlMask] - br.bitsRead += uint8(v.entry) - return uint8(v.entry >> 8) - } + single := d.dt.single[:tlSize] // Use temp table to avoid bound checks/append penalty. var buf [256]byte @@ -324,66 +515,63 @@ func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { // Decode 2 values from each decoder/loop. const bufoff = 256 / 4 -bigloop: for { - for i := range br { - br := &br[i] - if br.off < 4 { - break bigloop - } - br.fillFast() + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break } { const stream = 0 - val := br[stream].peekBitsFast(s.actualTableLog) + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) - - val2 := br[stream].peekBitsFast(s.actualTableLog) - v2 := single[val2&tlMask] - buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) + br[stream].advance(uint8(v.entry)) buf[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) - } - { - const stream = 1 - val := br[stream].peekBitsFast(s.actualTableLog) - v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) - - val2 := br[stream].peekBitsFast(s.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) v2 := single[val2&tlMask] - buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) - buf[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream+1] = uint8(v.entry >> 8) + + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v2 = single[val2&tlMask] + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) } { const stream = 2 - val := br[stream].peekBitsFast(s.actualTableLog) + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + val := br[stream].peekBitsFast(d.actualTableLog) v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) - - val2 := br[stream].peekBitsFast(s.actualTableLog) - v2 := single[val2&tlMask] - buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) + br[stream].advance(uint8(v.entry)) buf[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) - } - { - const stream = 3 - val := br[stream].peekBitsFast(s.actualTableLog) - v := single[val&tlMask] - br[stream].bitsRead += uint8(v.entry) - - val2 := br[stream].peekBitsFast(s.actualTableLog) + val2 := br[stream2].peekBitsFast(d.actualTableLog) v2 := single[val2&tlMask] - buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) - buf[off+bufoff*stream] = uint8(v.entry >> 8) - br[stream].bitsRead += uint8(v2.entry) + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2] = uint8(v2.entry >> 8) + + val = br[stream].peekBitsFast(d.actualTableLog) + v = single[val&tlMask] + br[stream].advance(uint8(v.entry)) + buf[off+bufoff*stream+1] = uint8(v.entry >> 8) + + val2 = br[stream2].peekBitsFast(d.actualTableLog) + v2 = single[val2&tlMask] + br[stream2].advance(uint8(v2.entry)) + buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8) } off += 2 @@ -422,12 +610,456 @@ bigloop: for i := range br { offset := dstEvery * i br := &br[i] - for !br.finished() { + bitsLeft := br.off*8 + uint(64-br.bitsRead) + for bitsLeft > 0 { br.fill() + if false && br.bitsRead >= 32 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value = (br.value << 32) | uint64(low) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value = (br.value << 8) | uint64(br.in[br.off-1]) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... if offset >= len(out) { return nil, errors.New("corruption detected: stream overrun 4") } - out[offset] = decode(br) + + // Read value and increment offset. + val := br.peekBitsFast(d.actualTableLog) + v := single[val&tlMask].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= uint(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { + if d.actualTableLog == 8 { + return d.decompress4X8bitExactly(dst, src) + } + + var br [4]bitReaderBytes + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + shift := (8 - d.actualTableLog) & 7 + + const tlSize = 1 << 8 + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + var decoded int + + // Decode 4 values from each decoder/loop. + const bufoff = 256 / 4 + for { + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break + } + + { + // Interleave 2 decodes. + const stream = 0 + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + } + + { + const stream = 2 + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + } + + off += 4 + + if off == bufoff { + if bufoff > dstEvery { + return nil, errors.New("corruption detected: stream overrun 1") + } + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) + off = 0 + out = out[bufoff:] + decoded += 256 + // There must at least be 3 buffers left. + if len(out) < dstEvery*3 { + return nil, errors.New("corruption detected: stream overrun 2") + } + } + } + if off > 0 { + ioff := int(off) + if len(out) < dstEvery*3+ioff { + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + decoded += int(off) * 4 + out = out[off:] + } + + // Decode remaining. + for i := range br { + offset := dstEvery * i + br := &br[i] + bitsLeft := int(br.off*8) + int(64-br.bitsRead) + for bitsLeft > 0 { + if br.finished() { + return nil, io.ErrUnexpectedEOF + } + if br.bitsRead >= 56 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value |= uint64(low) << (br.bitsRead - 32) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... + if offset >= len(out) { + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + v := single[br.peekByteFast()>>shift].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= int(nBits) + out[offset] = uint8(v >> 8) + offset++ + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return dst, nil +} + +// Decompress4X will decompress a 4X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// The *capacity* of the dst slice must match the destination size of +// the uncompressed data exactly. +func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { + var br [4]bitReaderBytes + start := 6 + for i := 0; i < 3; i++ { + length := int(src[i*2]) | (int(src[i*2+1]) << 8) + if start+length >= len(src) { + return nil, errors.New("truncated input (or invalid offset)") + } + err := br[i].init(src[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err := br[3].init(src[start:]) + if err != nil { + return nil, err + } + + // destination, offset to match first output + dstSize := cap(dst) + dst = dst[:dstSize] + out := dst + dstEvery := (dstSize + 3) / 4 + + const shift = 0 + const tlSize = 1 << 8 + const tlMask = tlSize - 1 + single := d.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + var buf [256]byte + var off uint8 + var decoded int + + // Decode 4 values from each decoder/loop. + const bufoff = 256 / 4 + for { + if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { + break + } + + { + // Interleave 2 decodes. + const stream = 0 + const stream2 = 1 + br[stream].fillFast() + br[stream2].fillFast() + + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + } + + { + const stream = 2 + const stream2 = 3 + br[stream].fillFast() + br[stream2].fillFast() + + v := single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 := single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+1] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+2] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + + v = single[br[stream].peekByteFast()>>shift].entry + buf[off+bufoff*stream+3] = uint8(v >> 8) + br[stream].advance(uint8(v)) + + v2 = single[br[stream2].peekByteFast()>>shift].entry + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) + br[stream2].advance(uint8(v2)) + } + + off += 4 + + if off == bufoff { + if bufoff > dstEvery { + return nil, errors.New("corruption detected: stream overrun 1") + } + copy(out, buf[:bufoff]) + copy(out[dstEvery:], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4]) + off = 0 + out = out[bufoff:] + decoded += 256 + // There must at least be 3 buffers left. + if len(out) < dstEvery*3 { + return nil, errors.New("corruption detected: stream overrun 2") + } + } + } + if off > 0 { + ioff := int(off) + if len(out) < dstEvery*3+ioff { + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(out, buf[:off]) + copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2]) + copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3]) + copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4]) + decoded += int(off) * 4 + out = out[off:] + } + + // Decode remaining. + for i := range br { + offset := dstEvery * i + br := &br[i] + bitsLeft := int(br.off*8) + int(64-br.bitsRead) + for bitsLeft > 0 { + if br.finished() { + return nil, io.ErrUnexpectedEOF + } + if br.bitsRead >= 56 { + if br.off >= 4 { + v := br.in[br.off-4:] + v = v[:4] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + br.value |= uint64(low) << (br.bitsRead - 32) + br.bitsRead -= 32 + br.off -= 4 + } else { + for br.off > 0 { + br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8) + br.bitsRead -= 8 + br.off-- + } + } + } + // end inline... + if offset >= len(out) { + return nil, errors.New("corruption detected: stream overrun 4") + } + + // Read value and increment offset. + v := single[br.peekByteFast()>>shift].entry + nBits := uint8(v) + br.advance(nBits) + bitsLeft -= int(nBits) + out[offset] = uint8(v >> 8) offset++ } decoded += offset - dstEvery*i diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go index 15d79d439..854458537 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -5,6 +5,7 @@ package zstd import ( + "encoding/binary" "errors" "io" "math/bits" @@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error { } b.bitsRead = 64 b.value = 0 - b.fill() - b.fill() + if len(in) >= 8 { + b.fillFastStart() + } else { + b.fill() + b.fill() + } b.bitsRead += 8 - uint8(highBits(uint32(v))) return nil } @@ -63,21 +68,31 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // Do single re-slice to avoid bounds checks. - v := b.in[b.off-4 : b.off] + // 2 bounds checks. + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 b.off -= 4 } +// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. +func (b *bitReader) fillFastStart() { + // Do single re-slice to avoid bounds checks. + b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + b.bitsRead = 0 + b.off -= 8 +} + // fill() will make sure at least 32 bits are available. func (b *bitReader) fill() { if b.bitsRead < 32 { return } if b.off >= 4 { - v := b.in[b.off-4 : b.off] + v := b.in[b.off-4:] + v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 4a14242c7..c8ec6e331 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -83,6 +83,10 @@ type blockDec struct { err error decWG sync.WaitGroup + // Frame to use for singlethreaded decoding. + // Should not be used by the decoder itself since parent may be another frame. + localFrame *frameDec + // Block is RLE, this is the size. RLESize uint32 tmp [4]byte diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go index f708df1c4..2c4fca17f 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytereader.go +++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go @@ -4,8 +4,6 @@ package zstd -import "encoding/binary" - // byteReader provides a byte reader that reads // little endian values from a byte stream. // The input stream is manually advanced. @@ -33,7 +31,8 @@ func (b *byteReader) overread() bool { // Int32 returns a little endian int32 starting at current offset. func (b byteReader) Int32() int32 { - b2 := b.b[b.off : b.off+4 : b.off+4] + b2 := b.b[b.off:] + b2 = b2[:4] v3 := int32(b2[3]) v2 := int32(b2[2]) v1 := int32(b2[1]) @@ -57,7 +56,25 @@ func (b byteReader) Uint32() uint32 { } return v } - return binary.LittleEndian.Uint32(b.b[b.off : b.off+4]) + b2 := b.b[b.off:] + b2 = b2[:4] + v3 := uint32(b2[3]) + v2 := uint32(b2[2]) + v1 := uint32(b2[1]) + v0 := uint32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// Uint32NC returns a little endian uint32 starting at current offset. +// The caller must be sure if there are at least 4 bytes left. +func (b byteReader) Uint32NC() uint32 { + b2 := b.b[b.off:] + b2 = b2[:4] + v3 := uint32(b2[3]) + v2 := uint32(b2[2]) + v1 := uint32(b2[1]) + v0 := uint32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) } // unread returns the unread portion of the input. diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 8e34479ff..75bf05bc9 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -23,9 +23,6 @@ type Decoder struct { // Unreferenced decoders, ready for use. decoders chan *blockDec - // Unreferenced decoders, ready for use. - frames chan *frameDec - // Streams ready to be decoded. stream chan decodeStream @@ -90,10 +87,10 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { // Create decoders d.decoders = make(chan *blockDec, d.o.concurrent) - d.frames = make(chan *frameDec, d.o.concurrent) for i := 0; i < d.o.concurrent; i++ { - d.frames <- newFrameDec(d.o) - d.decoders <- newBlockDec(d.o.lowMem) + dec := newBlockDec(d.o.lowMem) + dec.localFrame = newFrameDec(d.o) + d.decoders <- dec } if r == nil { @@ -283,15 +280,15 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } // Grab a block decoder and frame decoder. - block, frame := <-d.decoders, <-d.frames + block := <-d.decoders + frame := block.localFrame defer func() { if debug { printf("re-adding decoder: %p", block) } - d.decoders <- block frame.rawInput = nil frame.bBuf = nil - d.frames <- frame + d.decoders <- block }() frame.bBuf = input diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index 957cfeb79..e6d3d49b3 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -55,7 +55,7 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { if b.remain() < 4 { return errors.New("input too small") } - bitStream := b.Uint32() + bitStream := b.Uint32NC() nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog if nbBits > tablelogAbsoluteMax { println("Invalid tablelog:", nbBits) @@ -79,7 +79,8 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { n0 += 24 if r := b.remain(); r > 5 { b.advance(2) - bitStream = b.Uint32() >> bitCount + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> bitCount } else { // end of bit stream bitStream >>= 16 @@ -104,10 +105,11 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { charnum++ } - if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { + if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 { b.advance(bitCount >> 3) bitCount &= 7 - bitStream = b.Uint32() >> bitCount + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> bitCount } else { bitStream >>= 2 } @@ -148,17 +150,16 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { threshold >>= 1 } - //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "remain:", b.remain()) - if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { + if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 { b.advance(bitCount >> 3) bitCount &= 7 + // The check above should make sure we can read 32 bits + bitStream = b.Uint32NC() >> (bitCount & 31) } else { bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) b.off = len(b.b) - 4 - //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "iend", iend) + bitStream = b.Uint32() >> (bitCount & 31) } - bitStream = b.Uint32() >> (bitCount & 31) - //printf("bitstream is now: 0b%b", bitStream) } s.symbolLen = charnum if s.symbolLen <= 1 { diff --git a/vendor/modules.txt b/vendor/modules.txt index d7c0ca055..e631d714a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -89,7 +89,7 @@ github.com/jmespath/go-jmespath github.com/jstemmer/go-junit-report github.com/jstemmer/go-junit-report/formatter github.com/jstemmer/go-junit-report/parser -# github.com/klauspost/compress v1.10.7 +# github.com/klauspost/compress v1.10.8 github.com/klauspost/compress/flate github.com/klauspost/compress/fse github.com/klauspost/compress/gzip