vendor: update github.com/klauspost/compress from v1.10.7 to v1.10.8

This commit is contained in:
Aliaksandr Valialkin 2020-06-05 23:51:30 +03:00
parent 69a647b0d2
commit 2382053d32
13 changed files with 1034 additions and 148 deletions

2
go.mod
View file

@ -13,7 +13,7 @@ require (
github.com/cespare/xxhash/v2 v2.1.1 github.com/cespare/xxhash/v2 v2.1.1
github.com/golang/protobuf v1.4.2 // indirect github.com/golang/protobuf v1.4.2 // indirect
github.com/golang/snappy v0.0.1 github.com/golang/snappy v0.0.1
github.com/klauspost/compress v1.10.7 github.com/klauspost/compress v1.10.8
github.com/valyala/fastjson v1.5.1 github.com/valyala/fastjson v1.5.1
github.com/valyala/fastrand v1.0.0 github.com/valyala/fastrand v1.0.0
github.com/valyala/gozstd v1.7.0 github.com/valyala/gozstd v1.7.0

4
go.sum
View file

@ -132,8 +132,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.10.5 h1:7q6vHIqubShURwQz8cQK6yIe/xC3IF0Vm7TGfqjewrc= github.com/klauspost/compress v1.10.5 h1:7q6vHIqubShURwQz8cQK6yIe/xC3IF0Vm7TGfqjewrc=
github.com/klauspost/compress v1.10.5/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.5/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.10.7 h1:7rix8v8GpI3ZBb0nSozFRgbtXKv+hOe+qfEpZqybrAg= github.com/klauspost/compress v1.10.8 h1:eLeJ3dr/Y9+XRfJT4l+8ZjmtB5RPJhucH2HeCV5+IZY=
github.com/klauspost/compress v1.10.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.8/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=

View file

@ -6,6 +6,7 @@
package fse package fse
import ( import (
"encoding/binary"
"errors" "errors"
"io" "io"
) )
@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error {
} }
b.bitsRead = 64 b.bitsRead = 64
b.value = 0 b.value = 0
b.fill() if len(in) >= 8 {
b.fill() b.fillFastStart()
} else {
b.fill()
b.fill()
}
b.bitsRead += 8 - uint8(highBits(uint32(v))) b.bitsRead += 8 - uint8(highBits(uint32(v)))
return nil return nil
} }
@ -63,8 +68,9 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 { if b.bitsRead < 32 {
return return
} }
// Do single re-slice to avoid bounds checks. // 2 bounds checks.
v := b.in[b.off-4 : b.off] v := b.in[b.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low) b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32 b.bitsRead -= 32
@ -77,7 +83,8 @@ func (b *bitReader) fill() {
return return
} }
if b.off > 4 { if b.off > 4 {
v := b.in[b.off-4 : b.off] v := b.in[b.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low) b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32 b.bitsRead -= 32
@ -91,9 +98,17 @@ func (b *bitReader) fill() {
} }
} }
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
func (b *bitReader) fillFastStart() {
// Do single re-slice to avoid bounds checks.
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
b.bitsRead = 0
b.off -= 8
}
// finished returns true if all bits have been read from the bit stream. // finished returns true if all bits have been read from the bit stream.
func (b *bitReader) finished() bool { func (b *bitReader) finished() bool {
return b.off == 0 && b.bitsRead >= 64 return b.bitsRead >= 64 && b.off == 0
} }
// close the bitstream and returns an error if out-of-buffer reads occurred. // close the bitstream and returns an error if out-of-buffer reads occurred.

View file

@ -25,19 +25,10 @@ func (b *byteReader) advance(n uint) {
b.off += int(n) b.off += int(n)
} }
// Int32 returns a little endian int32 starting at current offset.
func (b byteReader) Int32() int32 {
b2 := b.b[b.off : b.off+4 : b.off+4]
v3 := int32(b2[3])
v2 := int32(b2[2])
v1 := int32(b2[1])
v0 := int32(b2[0])
return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
}
// Uint32 returns a little endian uint32 starting at current offset. // Uint32 returns a little endian uint32 starting at current offset.
func (b byteReader) Uint32() uint32 { func (b byteReader) Uint32() uint32 {
b2 := b.b[b.off : b.off+4 : b.off+4] b2 := b.b[b.off:]
b2 = b2[:4]
v3 := uint32(b2[3]) v3 := uint32(b2[3])
v2 := uint32(b2[2]) v2 := uint32(b2[2])
v1 := uint32(b2[1]) v1 := uint32(b2[1])

View file

@ -12,8 +12,6 @@ but it can be used as a secondary step to compressors (like Snappy) that does no
* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0) * [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0)
THIS PACKAGE IS NOT CONSIDERED STABLE AND API OR ENCODING MAY CHANGE IN THE FUTURE.
## News ## News
* Mar 2018: First implementation released. Consider this beta software for now. * Mar 2018: First implementation released. Consider this beta software for now.
@ -75,6 +73,8 @@ which can be given to the decompressor.
Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X) Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X)
or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function. or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function.
For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size.
You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
your input was likely corrupted. your input was likely corrupted.
@ -84,4 +84,4 @@ There are no integrity checks, so relying on errors from the decompressor does n
# Contributing # Contributing
Contributions are always welcome. Be aware that adding public functions will require good justification and breaking Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
changes will likely not be accepted. If in doubt open an issue before writing the PR. changes will likely not be accepted. If in doubt open an issue before writing the PR.

View file

@ -6,6 +6,7 @@
package huff0 package huff0
import ( import (
"encoding/binary"
"errors" "errors"
"io" "io"
) )
@ -34,29 +35,16 @@ func (b *bitReader) init(in []byte) error {
} }
b.bitsRead = 64 b.bitsRead = 64
b.value = 0 b.value = 0
b.fill() if len(in) >= 8 {
b.fill() b.fillFastStart()
} else {
b.fill()
b.fill()
}
b.bitsRead += 8 - uint8(highBit32(uint32(v))) b.bitsRead += 8 - uint8(highBit32(uint32(v)))
return nil return nil
} }
// getBits will return n bits. n can be 0.
func (b *bitReader) getBits(n uint8) uint16 {
if n == 0 || b.bitsRead >= 64 {
return 0
}
return b.getBitsFast(n)
}
// getBitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReader) getBitsFast(n uint8) uint16 {
const regMask = 64 - 1
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return v
}
// peekBitsFast requires that at least one bit is requested every time. // peekBitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled. // There are no checks if the buffer is filled.
func (b *bitReader) peekBitsFast(n uint8) uint16 { func (b *bitReader) peekBitsFast(n uint8) uint16 {
@ -71,21 +59,36 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 { if b.bitsRead < 32 {
return return
} }
// Do single re-slice to avoid bounds checks.
// 2 bounds checks.
v := b.in[b.off-4 : b.off] v := b.in[b.off-4 : b.off]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low) b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32 b.bitsRead -= 32
b.off -= 4 b.off -= 4
} }
func (b *bitReader) advance(n uint8) {
b.bitsRead += n
}
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
func (b *bitReader) fillFastStart() {
// Do single re-slice to avoid bounds checks.
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
b.bitsRead = 0
b.off -= 8
}
// fill() will make sure at least 32 bits are available. // fill() will make sure at least 32 bits are available.
func (b *bitReader) fill() { func (b *bitReader) fill() {
if b.bitsRead < 32 { if b.bitsRead < 32 {
return return
} }
if b.off > 4 { if b.off > 4 {
v := b.in[b.off-4 : b.off] v := b.in[b.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low) b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32 b.bitsRead -= 32
@ -113,3 +116,214 @@ func (b *bitReader) close() error {
} }
return nil return nil
} }
// bitReader reads a bitstream in reverse.
// The last set bit indicates the start of the stream and is used
// for aligning the input.
type bitReaderBytes struct {
in []byte
off uint // next byte to read is at in[off - 1]
value uint64
bitsRead uint8
}
// init initializes and resets the bit reader.
func (b *bitReaderBytes) init(in []byte) error {
if len(in) < 1 {
return errors.New("corrupt stream: too short")
}
b.in = in
b.off = uint(len(in))
// The highest bit of the last byte indicates where to start
v := in[len(in)-1]
if v == 0 {
return errors.New("corrupt stream, did not find end of stream")
}
b.bitsRead = 64
b.value = 0
if len(in) >= 8 {
b.fillFastStart()
} else {
b.fill()
b.fill()
}
b.advance(8 - uint8(highBit32(uint32(v))))
return nil
}
// peekBitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReaderBytes) peekByteFast() uint8 {
got := uint8(b.value >> 56)
return got
}
func (b *bitReaderBytes) advance(n uint8) {
b.bitsRead += n
b.value <<= n & 63
}
// fillFast() will make sure at least 32 bits are available.
// There must be at least 4 bytes available.
func (b *bitReaderBytes) fillFast() {
if b.bitsRead < 32 {
return
}
// 2 bounds checks.
v := b.in[b.off-4 : b.off]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << (b.bitsRead - 32)
b.bitsRead -= 32
b.off -= 4
}
// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read.
func (b *bitReaderBytes) fillFastStart() {
// Do single re-slice to avoid bounds checks.
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
b.bitsRead = 0
b.off -= 8
}
// fill() will make sure at least 32 bits are available.
func (b *bitReaderBytes) fill() {
if b.bitsRead < 32 {
return
}
if b.off > 4 {
v := b.in[b.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << (b.bitsRead - 32)
b.bitsRead -= 32
b.off -= 4
return
}
for b.off > 0 {
b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8)
b.bitsRead -= 8
b.off--
}
}
// finished returns true if all bits have been read from the bit stream.
func (b *bitReaderBytes) finished() bool {
return b.off == 0 && b.bitsRead >= 64
}
// close the bitstream and returns an error if out-of-buffer reads occurred.
func (b *bitReaderBytes) close() error {
// Release reference.
b.in = nil
if b.bitsRead > 64 {
return io.ErrUnexpectedEOF
}
return nil
}
// bitReaderShifted reads a bitstream in reverse.
// The last set bit indicates the start of the stream and is used
// for aligning the input.
type bitReaderShifted struct {
in []byte
off uint // next byte to read is at in[off - 1]
value uint64
bitsRead uint8
}
// init initializes and resets the bit reader.
func (b *bitReaderShifted) init(in []byte) error {
if len(in) < 1 {
return errors.New("corrupt stream: too short")
}
b.in = in
b.off = uint(len(in))
// The highest bit of the last byte indicates where to start
v := in[len(in)-1]
if v == 0 {
return errors.New("corrupt stream, did not find end of stream")
}
b.bitsRead = 64
b.value = 0
if len(in) >= 8 {
b.fillFastStart()
} else {
b.fill()
b.fill()
}
b.advance(8 - uint8(highBit32(uint32(v))))
return nil
}
// peekBitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
return uint16(b.value >> ((64 - n) & 63))
}
func (b *bitReaderShifted) advance(n uint8) {
b.bitsRead += n
b.value <<= n & 63
}
// fillFast() will make sure at least 32 bits are available.
// There must be at least 4 bytes available.
func (b *bitReaderShifted) fillFast() {
if b.bitsRead < 32 {
return
}
// 2 bounds checks.
v := b.in[b.off-4 : b.off]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
b.bitsRead -= 32
b.off -= 4
}
// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read.
func (b *bitReaderShifted) fillFastStart() {
// Do single re-slice to avoid bounds checks.
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
b.bitsRead = 0
b.off -= 8
}
// fill() will make sure at least 32 bits are available.
func (b *bitReaderShifted) fill() {
if b.bitsRead < 32 {
return
}
if b.off > 4 {
v := b.in[b.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
b.bitsRead -= 32
b.off -= 4
return
}
for b.off > 0 {
b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63)
b.bitsRead -= 8
b.off--
}
}
// finished returns true if all bits have been read from the bit stream.
func (b *bitReaderShifted) finished() bool {
return b.off == 0 && b.bitsRead >= 64
}
// close the bitstream and returns an error if out-of-buffer reads occurred.
func (b *bitReaderShifted) close() error {
// Release reference.
b.in = nil
if b.bitsRead > 64 {
return io.ErrUnexpectedEOF
}
return nil
}

View file

@ -25,6 +25,9 @@ type dEntryDouble struct {
len uint8 len uint8
} }
// Uses special code for all tables that are < 8 bits.
const use8BitTables = true
// ReadTable will read a table from the input. // ReadTable will read a table from the input.
// The size of the input may be larger than the table definition. // The size of the input may be larger than the table definition.
// Any content remaining after the table definition will be returned. // Any content remaining after the table definition will be returned.
@ -83,6 +86,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
} }
v2 := v & 15 v2 := v & 15
rankStats[v2]++ rankStats[v2]++
// (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
weightTotal += (1 << v2) >> 1 weightTotal += (1 << v2) >> 1
} }
if weightTotal == 0 { if weightTotal == 0 {
@ -142,12 +146,14 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
d := dEntrySingle{ d := dEntrySingle{
entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8), entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
} }
single := s.dt.single[rankStats[w] : rankStats[w]+length] rank := &rankStats[w]
single := s.dt.single[*rank : *rank+length]
for i := range single { for i := range single {
single[i] = d single[i] = d
} }
rankStats[w] += length *rank += length
} }
return s, in, nil return s, in, nil
} }
@ -208,7 +214,10 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
if len(d.dt.single) == 0 { if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded") return nil, errors.New("no table loaded")
} }
var br bitReader if use8BitTables && d.actualTableLog <= 8 {
return d.decompress1X8Bit(dst, src)
}
var br bitReaderShifted
err := br.init(src) err := br.init(src)
if err != nil { if err != nil {
return dst, err return dst, err
@ -216,17 +225,6 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
maxDecodedSize := cap(dst) maxDecodedSize := cap(dst)
dst = dst[:0] dst = dst[:0]
decode := func() byte {
val := br.peekBitsFast(d.actualTableLog) /* note : actualTableLog >= 1 */
v := d.dt.single[val]
br.bitsRead += uint8(v.entry)
return uint8(v.entry >> 8)
}
hasDec := func(v dEntrySingle) byte {
br.bitsRead += uint8(v.entry)
return uint8(v.entry >> 8)
}
// Avoid bounds check by always having full sized table. // Avoid bounds check by always having full sized table.
const tlSize = 1 << tableLogMax const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1 const tlMask = tlSize - 1
@ -238,11 +236,25 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
for br.off >= 8 { for br.off >= 8 {
br.fillFast() br.fillFast()
buf[off+0] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
buf[off+1] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
// Refill
br.fillFast() br.fillFast()
buf[off+2] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask])
buf[off+3] = hasDec(dt[br.peekBitsFast(d.actualTableLog)&tlMask]) v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4 off += 4
if off == 0 { if off == 0 {
if len(dst)+256 > maxDecodedSize { if len(dst)+256 > maxDecodedSize {
@ -259,13 +271,196 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
} }
dst = append(dst, buf[:off]...) dst = append(dst, buf[:off]...)
for !br.finished() { // br < 8, so uint8 is fine
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
for bitsLeft > 0 {
br.fill() br.fill()
if false && br.bitsRead >= 32 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value = (br.value << 32) | uint64(low)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value = (br.value << 8) | uint64(br.in[br.off-1])
br.bitsRead -= 8
br.off--
}
}
}
if len(dst) >= maxDecodedSize { if len(dst) >= maxDecodedSize {
br.close() br.close()
return nil, ErrMaxDecodedSizeExceeded return nil, ErrMaxDecodedSizeExceeded
} }
dst = append(dst, decode()) v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= nBits
dst = append(dst, uint8(v.entry>>8))
}
return dst, br.close()
}
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
if d.actualTableLog == 8 {
return d.decompress1X8BitExactly(dst, src)
}
var br bitReaderBytes
err := br.init(src)
if err != nil {
return dst, err
}
maxDecodedSize := cap(dst)
dst = dst[:0]
// Avoid bounds check by always having full sized table.
dt := d.dt.single[:256]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
shift := (8 - d.actualTableLog) & 7
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
for br.off >= 4 {
br.fillFast()
v := dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
if len(dst)+int(off) > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:off]...)
// br < 4, so uint8 is fine
bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
for bitsLeft > 0 {
if br.bitsRead >= 64-8 {
for br.off > 0 {
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
br.bitsRead -= 8
br.off--
}
}
if len(dst) >= maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := dt[br.peekByteFast()>>shift]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= int8(nBits)
dst = append(dst, uint8(v.entry>>8))
}
return dst, br.close()
}
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
var br bitReaderBytes
err := br.init(src)
if err != nil {
return dst, err
}
maxDecodedSize := cap(dst)
dst = dst[:0]
// Avoid bounds check by always having full sized table.
dt := d.dt.single[:256]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
const shift = 0
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
for br.off >= 4 {
br.fillFast()
v := dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekByteFast()>>shift]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
if len(dst)+int(off) > maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:off]...)
// br < 4, so uint8 is fine
bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
for bitsLeft > 0 {
if br.bitsRead >= 64-8 {
for br.off > 0 {
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
br.bitsRead -= 8
br.off--
}
}
if len(dst) >= maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := dt[br.peekByteFast()>>shift]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= int8(nBits)
dst = append(dst, uint8(v.entry>>8))
} }
return dst, br.close() return dst, br.close()
} }
@ -274,15 +469,18 @@ func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
// The length of the supplied input must match the end of a block exactly. // The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of // The *capacity* of the dst slice must match the destination size of
// the uncompressed data exactly. // the uncompressed data exactly.
func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
if len(s.dt.single) == 0 { if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded") return nil, errors.New("no table loaded")
} }
if len(src) < 6+(4*1) { if len(src) < 6+(4*1) {
return nil, errors.New("input too small") return nil, errors.New("input too small")
} }
if use8BitTables && d.actualTableLog <= 8 {
return d.decompress4X8bit(dst, src)
}
var br [4]bitReader var br [4]bitReaderShifted
start := 6 start := 6
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
length := int(src[i*2]) | (int(src[i*2+1]) << 8) length := int(src[i*2]) | (int(src[i*2+1]) << 8)
@ -308,14 +506,7 @@ func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
const tlSize = 1 << tableLogMax const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1 const tlMask = tlSize - 1
single := s.dt.single[:tlSize] single := d.dt.single[:tlSize]
decode := func(br *bitReader) byte {
val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
v := single[val&tlMask]
br.bitsRead += uint8(v.entry)
return uint8(v.entry >> 8)
}
// Use temp table to avoid bound checks/append penalty. // Use temp table to avoid bound checks/append penalty.
var buf [256]byte var buf [256]byte
@ -324,66 +515,63 @@ func (s *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
// Decode 2 values from each decoder/loop. // Decode 2 values from each decoder/loop.
const bufoff = 256 / 4 const bufoff = 256 / 4
bigloop:
for { for {
for i := range br { if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
br := &br[i] break
if br.off < 4 {
break bigloop
}
br.fillFast()
} }
{ {
const stream = 0 const stream = 0
val := br[stream].peekBitsFast(s.actualTableLog) const stream2 = 1
br[stream].fillFast()
br[stream2].fillFast()
val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask] v := single[val&tlMask]
br[stream].bitsRead += uint8(v.entry) br[stream].advance(uint8(v.entry))
val2 := br[stream].peekBitsFast(s.actualTableLog)
v2 := single[val2&tlMask]
buf[off+bufoff*stream+1] = uint8(v2.entry >> 8)
buf[off+bufoff*stream] = uint8(v.entry >> 8) buf[off+bufoff*stream] = uint8(v.entry >> 8)
br[stream].bitsRead += uint8(v2.entry)
}
{ val2 := br[stream2].peekBitsFast(d.actualTableLog)
const stream = 1
val := br[stream].peekBitsFast(s.actualTableLog)
v := single[val&tlMask]
br[stream].bitsRead += uint8(v.entry)
val2 := br[stream].peekBitsFast(s.actualTableLog)
v2 := single[val2&tlMask] v2 := single[val2&tlMask]
buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8) buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
br[stream].bitsRead += uint8(v2.entry)
val = br[stream].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
v2 = single[val2&tlMask]
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
} }
{ {
const stream = 2 const stream = 2
val := br[stream].peekBitsFast(s.actualTableLog) const stream2 = 3
br[stream].fillFast()
br[stream2].fillFast()
val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask] v := single[val&tlMask]
br[stream].bitsRead += uint8(v.entry) br[stream].advance(uint8(v.entry))
val2 := br[stream].peekBitsFast(s.actualTableLog)
v2 := single[val2&tlMask]
buf[off+bufoff*stream+1] = uint8(v2.entry >> 8)
buf[off+bufoff*stream] = uint8(v.entry >> 8) buf[off+bufoff*stream] = uint8(v.entry >> 8)
br[stream].bitsRead += uint8(v2.entry)
}
{ val2 := br[stream2].peekBitsFast(d.actualTableLog)
const stream = 3
val := br[stream].peekBitsFast(s.actualTableLog)
v := single[val&tlMask]
br[stream].bitsRead += uint8(v.entry)
val2 := br[stream].peekBitsFast(s.actualTableLog)
v2 := single[val2&tlMask] v2 := single[val2&tlMask]
buf[off+bufoff*stream+1] = uint8(v2.entry >> 8) br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8) buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
br[stream].bitsRead += uint8(v2.entry)
val = br[stream].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
v2 = single[val2&tlMask]
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
} }
off += 2 off += 2
@ -422,12 +610,456 @@ bigloop:
for i := range br { for i := range br {
offset := dstEvery * i offset := dstEvery * i
br := &br[i] br := &br[i]
for !br.finished() { bitsLeft := br.off*8 + uint(64-br.bitsRead)
for bitsLeft > 0 {
br.fill() br.fill()
if false && br.bitsRead >= 32 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value = (br.value << 32) | uint64(low)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value = (br.value << 8) | uint64(br.in[br.off-1])
br.bitsRead -= 8
br.off--
}
}
}
// end inline...
if offset >= len(out) { if offset >= len(out) {
return nil, errors.New("corruption detected: stream overrun 4") return nil, errors.New("corruption detected: stream overrun 4")
} }
out[offset] = decode(br)
// Read value and increment offset.
val := br.peekBitsFast(d.actualTableLog)
v := single[val&tlMask].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= uint(nBits)
out[offset] = uint8(v >> 8)
offset++
}
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
return nil, err
}
}
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
return dst, nil
}
// Decompress4X will decompress a 4X encoded stream.
// The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of
// the uncompressed data exactly.
func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
if d.actualTableLog == 8 {
return d.decompress4X8bitExactly(dst, src)
}
var br [4]bitReaderBytes
start := 6
for i := 0; i < 3; i++ {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
}
err := br[i].init(src[start : start+length])
if err != nil {
return nil, err
}
start += length
}
err := br[3].init(src[start:])
if err != nil {
return nil, err
}
// destination, offset to match first output
dstSize := cap(dst)
dst = dst[:dstSize]
out := dst
dstEvery := (dstSize + 3) / 4
shift := (8 - d.actualTableLog) & 7
const tlSize = 1 << 8
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
var decoded int
// Decode 4 values from each decoder/loop.
const bufoff = 256 / 4
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
}
{
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
{
const stream = 2
const stream2 = 3
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
off += 4
if off == bufoff {
if bufoff > dstEvery {
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(out, buf[:bufoff])
copy(out[dstEvery:], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
off = 0
out = out[bufoff:]
decoded += 256
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
return nil, errors.New("corruption detected: stream overrun 2")
}
}
}
if off > 0 {
ioff := int(off)
if len(out) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun 3")
}
copy(out, buf[:off])
copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
decoded += int(off) * 4
out = out[off:]
}
// Decode remaining.
for i := range br {
offset := dstEvery * i
br := &br[i]
bitsLeft := int(br.off*8) + int(64-br.bitsRead)
for bitsLeft > 0 {
if br.finished() {
return nil, io.ErrUnexpectedEOF
}
if br.bitsRead >= 56 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value |= uint64(low) << (br.bitsRead - 32)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
br.bitsRead -= 8
br.off--
}
}
}
// end inline...
if offset >= len(out) {
return nil, errors.New("corruption detected: stream overrun 4")
}
// Read value and increment offset.
v := single[br.peekByteFast()>>shift].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= int(nBits)
out[offset] = uint8(v >> 8)
offset++
}
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
return nil, err
}
}
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
return dst, nil
}
// Decompress4X will decompress a 4X encoded stream.
// The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of
// the uncompressed data exactly.
func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
var br [4]bitReaderBytes
start := 6
for i := 0; i < 3; i++ {
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
}
err := br[i].init(src[start : start+length])
if err != nil {
return nil, err
}
start += length
}
err := br[3].init(src[start:])
if err != nil {
return nil, err
}
// destination, offset to match first output
dstSize := cap(dst)
dst = dst[:dstSize]
out := dst
dstEvery := (dstSize + 3) / 4
const shift = 0
const tlSize = 1 << 8
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
var buf [256]byte
var off uint8
var decoded int
// Decode 4 values from each decoder/loop.
const bufoff = 256 / 4
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
}
{
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
{
const stream = 2
const stream2 = 3
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
off += 4
if off == bufoff {
if bufoff > dstEvery {
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(out, buf[:bufoff])
copy(out[dstEvery:], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
off = 0
out = out[bufoff:]
decoded += 256
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
return nil, errors.New("corruption detected: stream overrun 2")
}
}
}
if off > 0 {
ioff := int(off)
if len(out) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun 3")
}
copy(out, buf[:off])
copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
decoded += int(off) * 4
out = out[off:]
}
// Decode remaining.
for i := range br {
offset := dstEvery * i
br := &br[i]
bitsLeft := int(br.off*8) + int(64-br.bitsRead)
for bitsLeft > 0 {
if br.finished() {
return nil, io.ErrUnexpectedEOF
}
if br.bitsRead >= 56 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value |= uint64(low) << (br.bitsRead - 32)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
br.bitsRead -= 8
br.off--
}
}
}
// end inline...
if offset >= len(out) {
return nil, errors.New("corruption detected: stream overrun 4")
}
// Read value and increment offset.
v := single[br.peekByteFast()>>shift].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= int(nBits)
out[offset] = uint8(v >> 8)
offset++ offset++
} }
decoded += offset - dstEvery*i decoded += offset - dstEvery*i

View file

@ -5,6 +5,7 @@
package zstd package zstd
import ( import (
"encoding/binary"
"errors" "errors"
"io" "io"
"math/bits" "math/bits"
@ -34,8 +35,12 @@ func (b *bitReader) init(in []byte) error {
} }
b.bitsRead = 64 b.bitsRead = 64
b.value = 0 b.value = 0
b.fill() if len(in) >= 8 {
b.fill() b.fillFastStart()
} else {
b.fill()
b.fill()
}
b.bitsRead += 8 - uint8(highBits(uint32(v))) b.bitsRead += 8 - uint8(highBits(uint32(v)))
return nil return nil
} }
@ -63,21 +68,31 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 { if b.bitsRead < 32 {
return return
} }
// Do single re-slice to avoid bounds checks. // 2 bounds checks.
v := b.in[b.off-4 : b.off] v := b.in[b.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low) b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32 b.bitsRead -= 32
b.off -= 4 b.off -= 4
} }
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
func (b *bitReader) fillFastStart() {
// Do single re-slice to avoid bounds checks.
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
b.bitsRead = 0
b.off -= 8
}
// fill() will make sure at least 32 bits are available. // fill() will make sure at least 32 bits are available.
func (b *bitReader) fill() { func (b *bitReader) fill() {
if b.bitsRead < 32 { if b.bitsRead < 32 {
return return
} }
if b.off >= 4 { if b.off >= 4 {
v := b.in[b.off-4 : b.off] v := b.in[b.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low) b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32 b.bitsRead -= 32

View file

@ -83,6 +83,10 @@ type blockDec struct {
err error err error
decWG sync.WaitGroup decWG sync.WaitGroup
// Frame to use for singlethreaded decoding.
// Should not be used by the decoder itself since parent may be another frame.
localFrame *frameDec
// Block is RLE, this is the size. // Block is RLE, this is the size.
RLESize uint32 RLESize uint32
tmp [4]byte tmp [4]byte

View file

@ -4,8 +4,6 @@
package zstd package zstd
import "encoding/binary"
// byteReader provides a byte reader that reads // byteReader provides a byte reader that reads
// little endian values from a byte stream. // little endian values from a byte stream.
// The input stream is manually advanced. // The input stream is manually advanced.
@ -33,7 +31,8 @@ func (b *byteReader) overread() bool {
// Int32 returns a little endian int32 starting at current offset. // Int32 returns a little endian int32 starting at current offset.
func (b byteReader) Int32() int32 { func (b byteReader) Int32() int32 {
b2 := b.b[b.off : b.off+4 : b.off+4] b2 := b.b[b.off:]
b2 = b2[:4]
v3 := int32(b2[3]) v3 := int32(b2[3])
v2 := int32(b2[2]) v2 := int32(b2[2])
v1 := int32(b2[1]) v1 := int32(b2[1])
@ -57,7 +56,25 @@ func (b byteReader) Uint32() uint32 {
} }
return v return v
} }
return binary.LittleEndian.Uint32(b.b[b.off : b.off+4]) b2 := b.b[b.off:]
b2 = b2[:4]
v3 := uint32(b2[3])
v2 := uint32(b2[2])
v1 := uint32(b2[1])
v0 := uint32(b2[0])
return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
}
// Uint32NC returns a little endian uint32 starting at current offset.
// The caller must be sure if there are at least 4 bytes left.
func (b byteReader) Uint32NC() uint32 {
b2 := b.b[b.off:]
b2 = b2[:4]
v3 := uint32(b2[3])
v2 := uint32(b2[2])
v1 := uint32(b2[1])
v0 := uint32(b2[0])
return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
} }
// unread returns the unread portion of the input. // unread returns the unread portion of the input.

View file

@ -23,9 +23,6 @@ type Decoder struct {
// Unreferenced decoders, ready for use. // Unreferenced decoders, ready for use.
decoders chan *blockDec decoders chan *blockDec
// Unreferenced decoders, ready for use.
frames chan *frameDec
// Streams ready to be decoded. // Streams ready to be decoded.
stream chan decodeStream stream chan decodeStream
@ -90,10 +87,10 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
// Create decoders // Create decoders
d.decoders = make(chan *blockDec, d.o.concurrent) d.decoders = make(chan *blockDec, d.o.concurrent)
d.frames = make(chan *frameDec, d.o.concurrent)
for i := 0; i < d.o.concurrent; i++ { for i := 0; i < d.o.concurrent; i++ {
d.frames <- newFrameDec(d.o) dec := newBlockDec(d.o.lowMem)
d.decoders <- newBlockDec(d.o.lowMem) dec.localFrame = newFrameDec(d.o)
d.decoders <- dec
} }
if r == nil { if r == nil {
@ -283,15 +280,15 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
} }
// Grab a block decoder and frame decoder. // Grab a block decoder and frame decoder.
block, frame := <-d.decoders, <-d.frames block := <-d.decoders
frame := block.localFrame
defer func() { defer func() {
if debug { if debug {
printf("re-adding decoder: %p", block) printf("re-adding decoder: %p", block)
} }
d.decoders <- block
frame.rawInput = nil frame.rawInput = nil
frame.bBuf = nil frame.bBuf = nil
d.frames <- frame d.decoders <- block
}() }()
frame.bBuf = input frame.bBuf = input

View file

@ -55,7 +55,7 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
if b.remain() < 4 { if b.remain() < 4 {
return errors.New("input too small") return errors.New("input too small")
} }
bitStream := b.Uint32() bitStream := b.Uint32NC()
nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog
if nbBits > tablelogAbsoluteMax { if nbBits > tablelogAbsoluteMax {
println("Invalid tablelog:", nbBits) println("Invalid tablelog:", nbBits)
@ -79,7 +79,8 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
n0 += 24 n0 += 24
if r := b.remain(); r > 5 { if r := b.remain(); r > 5 {
b.advance(2) b.advance(2)
bitStream = b.Uint32() >> bitCount // The check above should make sure we can read 32 bits
bitStream = b.Uint32NC() >> bitCount
} else { } else {
// end of bit stream // end of bit stream
bitStream >>= 16 bitStream >>= 16
@ -104,10 +105,11 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
charnum++ charnum++
} }
if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 {
b.advance(bitCount >> 3) b.advance(bitCount >> 3)
bitCount &= 7 bitCount &= 7
bitStream = b.Uint32() >> bitCount // The check above should make sure we can read 32 bits
bitStream = b.Uint32NC() >> bitCount
} else { } else {
bitStream >>= 2 bitStream >>= 2
} }
@ -148,17 +150,16 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
threshold >>= 1 threshold >>= 1
} }
//println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "remain:", b.remain()) if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 {
if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 {
b.advance(bitCount >> 3) b.advance(bitCount >> 3)
bitCount &= 7 bitCount &= 7
// The check above should make sure we can read 32 bits
bitStream = b.Uint32NC() >> (bitCount & 31)
} else { } else {
bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) bitCount -= (uint)(8 * (len(b.b) - 4 - b.off))
b.off = len(b.b) - 4 b.off = len(b.b) - 4
//println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "iend", iend) bitStream = b.Uint32() >> (bitCount & 31)
} }
bitStream = b.Uint32() >> (bitCount & 31)
//printf("bitstream is now: 0b%b", bitStream)
} }
s.symbolLen = charnum s.symbolLen = charnum
if s.symbolLen <= 1 { if s.symbolLen <= 1 {

2
vendor/modules.txt vendored
View file

@ -89,7 +89,7 @@ github.com/jmespath/go-jmespath
github.com/jstemmer/go-junit-report github.com/jstemmer/go-junit-report
github.com/jstemmer/go-junit-report/formatter github.com/jstemmer/go-junit-report/formatter
github.com/jstemmer/go-junit-report/parser github.com/jstemmer/go-junit-report/parser
# github.com/klauspost/compress v1.10.7 # github.com/klauspost/compress v1.10.8
github.com/klauspost/compress/flate github.com/klauspost/compress/flate
github.com/klauspost/compress/fse github.com/klauspost/compress/fse
github.com/klauspost/compress/gzip github.com/klauspost/compress/gzip