From 3935038e20f0fcca992b13736a633071e56541d7 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 12 Mar 2020 17:32:07 +0200 Subject: [PATCH] vendor: update github.com/klauspost/compress from v1.10.1 to v1.10.3 --- go.mod | 2 +- go.sum | 4 +- .../klauspost/compress/flate/deflate.go | 4 +- .../klauspost/compress/flate/fast_encoder.go | 7 +- .../klauspost/compress/flate/gen_inflate.go | 274 ++++++ .../compress/flate/huffman_bit_writer.go | 13 + .../klauspost/compress/flate/huffman_code.go | 4 +- .../klauspost/compress/flate/inflate.go | 113 ++- .../klauspost/compress/flate/inflate_gen.go | 922 ++++++++++++++++++ .../klauspost/compress/flate/level1.go | 22 +- .../klauspost/compress/flate/level2.go | 28 +- .../klauspost/compress/flate/level3.go | 54 +- .../klauspost/compress/flate/level4.go | 32 +- .../klauspost/compress/flate/level5.go | 46 +- .../klauspost/compress/flate/level6.go | 36 +- .../klauspost/compress/flate/token.go | 4 +- .../klauspost/compress/zstd/decoder.go | 4 +- .../klauspost/compress/zstd/enc_better.go | 521 ++++++++++ .../klauspost/compress/zstd/enc_dfast.go | 51 +- .../klauspost/compress/zstd/enc_fast.go | 140 ++- .../klauspost/compress/zstd/encoder.go | 15 +- .../compress/zstd/encoder_options.go | 26 +- .../zstd/internal/xxhash/xxhash_amd64.s | 8 +- .../klauspost/compress/zstd/zstd.go | 30 +- vendor/modules.txt | 2 +- 25 files changed, 2091 insertions(+), 271 deletions(-) create mode 100644 vendor/github.com/klauspost/compress/flate/gen_inflate.go create mode 100644 vendor/github.com/klauspost/compress/flate/inflate_gen.go create mode 100644 vendor/github.com/klauspost/compress/zstd/enc_better.go diff --git a/go.mod b/go.mod index fa1b8276f..55d4afe79 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/aws/aws-sdk-go v1.29.10 github.com/cespare/xxhash/v2 v2.1.1 github.com/golang/snappy v0.0.1 - github.com/klauspost/compress v1.10.1 + github.com/klauspost/compress v1.10.3 github.com/valyala/fasthttp v1.9.0 github.com/valyala/fastjson v1.5.0 github.com/valyala/fastrand v1.0.0 diff --git a/go.sum b/go.sum index 8a4548032..d0d83cdf5 100644 --- a/go.sum +++ b/go.sum @@ -105,8 +105,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.10.1 h1:a/QY0o9S6wCi0XhxaMX/QmusicNUqCqFugR6WKPOSoQ= -github.com/klauspost/compress v1.10.1/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.10.3 h1:OP96hzwJVBIHYU52pVTI6CczrxPvrGfgqF9N5eTO0Q8= +github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 3f4d76b6a..2b101d26b 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -48,6 +48,8 @@ const ( maxHashOffset = 1 << 24 skipNever = math.MaxInt32 + + debugDeflate = false ) type compressionLevel struct { @@ -365,7 +367,7 @@ func (d *compressor) deflateLazy() { // Sanity enables additional runtime tests. // It's intended to be used during development // to supplement the currently ad-hoc unit tests. - const sanity = false + const sanity = debugDeflate if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { return diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index 3d2fdcd77..6d4c1e98b 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -35,16 +35,16 @@ func newFastEnc(level int) fastEnc { } const ( - tableBits = 16 // Bits used in the table + tableBits = 15 // Bits used in the table tableSize = 1 << tableBits // Size of the table tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. baseMatchOffset = 1 // The smallest match offset baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 maxMatchOffset = 1 << 15 // The largest match offset - bTableBits = 18 // Bits used in the big tables + bTableBits = 17 // Bits used in the big tables bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxStoreBlockSize * 20 // Size to preallocate for history. + allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history. bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. ) @@ -92,7 +92,6 @@ func hash(u uint32) uint32 { } type tableEntry struct { - val uint32 offset int32 } diff --git a/vendor/github.com/klauspost/compress/flate/gen_inflate.go b/vendor/github.com/klauspost/compress/flate/gen_inflate.go new file mode 100644 index 000000000..c74a95fe7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/gen_inflate.go @@ -0,0 +1,274 @@ +// +build generate + +//go:generate go run $GOFILE && gofmt -w inflate_gen.go + +package main + +import ( + "os" + "strings" +) + +func main() { + f, err := os.Create("inflate_gen.go") + if err != nil { + panic(err) + } + defer f.Close() + types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"} + names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"} + imports := []string{"bytes", "bufio", "io", "strings", "math/bits"} + f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( +`) + + for _, imp := range imports { + f.WriteString("\t\"" + imp + "\"\n") + } + f.WriteString(")\n\n") + + template := ` + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) $FUNCNAME$() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.($TYPE$) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).$FUNCNAME$ + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).$FUNCNAME$ // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +` + for i, t := range types { + s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1) + s = strings.Replace(s, "$TYPE$", t, -1) + f.WriteString(s) + } + f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n") + f.WriteString("\tswitch f.r.(type) {\n") + for i, t := range types { + f.WriteString("\t\tcase " + t + ":\n") + f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n") + } + f.WriteString("\t\tdefault:\n") + f.WriteString("\t\t\treturn f.huffmanBlockGeneric") + f.WriteString("\t}\n}\n") +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go index 56ee6dc8b..53fe1d06e 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -484,6 +484,9 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n } } +// writeStoredHeader will write a stored header. +// If the stored block is only used for EOF, +// it is replaced with a fixed huffman block. func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { if w.err != nil { return @@ -493,6 +496,16 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 } + + // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. + if length == 0 && isEof { + w.writeFixedHeader(isEof) + // EOB: 7 bits, value: 0 + w.writeBits(0, 7) + w.flush() + return + } + var flag int32 if isEof { flag = 1 diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go index 9d8e81ad6..4c39a3018 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -109,8 +109,8 @@ func generateFixedOffsetEncoding() *huffmanEncoder { return h } -var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() -var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() +var fixedLiteralEncoding = generateFixedLiteralEncoding() +var fixedOffsetEncoding = generateFixedOffsetEncoding() func (h *huffmanEncoder) bitLength(freq []uint16) int { var total int diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 6dc5b5d06..7f175a4ec 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -106,7 +106,7 @@ const ( ) type huffmanDecoder struct { - min int // the minimum code length + maxRead int // the maximum number of bits we can read and not overread chunks *[huffmanNumChunks]uint16 // chunks as described above links [][]uint16 // overflow links linkMask uint32 // mask the width of the link table @@ -126,12 +126,12 @@ func (h *huffmanDecoder) init(lengths []int) bool { if h.chunks == nil { h.chunks = &[huffmanNumChunks]uint16{} } - if h.min != 0 { + if h.maxRead != 0 { *h = huffmanDecoder{chunks: h.chunks, links: h.links} } // Count number of codes of each length, - // compute min and max length. + // compute maxRead and max length. var count [maxCodeLen]int var min, max int for _, n := range lengths { @@ -178,7 +178,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { return false } - h.min = min + h.maxRead = min chunks := h.chunks[:] for i := range chunks { chunks[i] = 0 @@ -342,7 +342,7 @@ func (f *decompressor) nextBlock() { // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil - f.huffmanBlock() + f.huffmanBlockDecoder()() case 2: // compressed, dynamic Huffman tables if f.err = f.readHuffman(); f.err != nil { @@ -350,7 +350,7 @@ func (f *decompressor) nextBlock() { } f.hl = &f.h1 f.hd = &f.h2 - f.huffmanBlock() + f.huffmanBlockDecoder()() default: // 3 is reserved. if debugDecode { @@ -543,12 +543,18 @@ func (f *decompressor) readHuffman() error { return CorruptInputError(f.roffset) } - // As an optimization, we can initialize the min bits to read at a time + // As an optimization, we can initialize the maxRead bits to read at a time // for the HLIT tree to the length of the EOB marker since we know that // every block must terminate with one. This preserves the property that // we never read any extra bytes after the end of the DEFLATE stream. - if f.h1.min < f.bits[endBlockMarker] { - f.h1.min = f.bits[endBlockMarker] + if f.h1.maxRead < f.bits[endBlockMarker] { + f.h1.maxRead = f.bits[endBlockMarker] + } + if !f.final { + // If not the final block, the smallest block possible is + // a predefined table, BTYPE=01, with a single EOB marker. + // This will take up 3 + 7 bits. + f.h1.maxRead += 10 } return nil @@ -558,7 +564,7 @@ func (f *decompressor) readHuffman() error { // hl and hd are the Huffman states for the lit/length values // and the distance values, respectively. If hd == nil, using the // fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBlock() { +func (f *decompressor) huffmanBlockGeneric() { const ( stateInit = iota // Zero value must be stateInit stateDict @@ -574,19 +580,64 @@ func (f *decompressor) huffmanBlock() { readLiteral: // Read literal and/or (length, distance) according to RFC section 3.2.3. { - v, err := f.huffSym(f.hl) - if err != nil { - f.err = err - return + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } } + var n uint // number of bits extra var length int + var err error switch { case v < 256: f.dict.writeByte(byte(v)) if f.dict.availWrite() == 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlock + f.step = (*decompressor).huffmanBlockGeneric f.stepState = stateInit return } @@ -714,7 +765,7 @@ copyHistory: if f.dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).huffmanBlock // We need to continue this work + f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work f.stepState = stateDict return } @@ -726,21 +777,33 @@ copyHistory: func (f *decompressor) dataBlock() { // Uncompressed. // Discard current half-byte. - f.nb = 0 - f.b = 0 + left := (f.nb) & 7 + f.nb -= left + f.b >>= left + + offBytes := f.nb >> 3 + // Unfilled values will be overwritten. + f.buf[0] = uint8(f.b) + f.buf[1] = uint8(f.b >> 8) + f.buf[2] = uint8(f.b >> 16) + f.buf[3] = uint8(f.b >> 24) + + f.roffset += int64(offBytes) + f.nb, f.b = 0, 0 // Length then ones-complement of length. - nr, err := io.ReadFull(f.r, f.buf[0:4]) + nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) f.roffset += int64(nr) if err != nil { f.err = noEOF(err) return } - n := int(f.buf[0]) | int(f.buf[1])<<8 - nn := int(f.buf[2]) | int(f.buf[3])<<8 - if uint16(nn) != uint16(^n) { + n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 + nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 + if nn != ^n { if debugDecode { - fmt.Println("uint16(nn) != uint16(^n)", nn, ^n) + ncomp := ^n + fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) } f.err = CorruptInputError(f.roffset) return @@ -752,7 +815,7 @@ func (f *decompressor) dataBlock() { return } - f.copyLen = n + f.copyLen = int(n) f.copyData() } @@ -816,7 +879,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { // with single element, huffSym must error on these two edge cases. In both // cases, the chunks slice will be 0 for the invalid sequence, leading it // satisfy the n == 0 check below. - n := uint(h.min) + n := uint(h.maxRead) // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go new file mode 100644 index 000000000..397dc1b1a --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -0,0 +1,922 @@ +// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( + "bufio" + "bytes" + "fmt" + "math/bits" + "strings" +) + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesBuffer() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Buffer) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBufioReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bufio.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBufioReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanStringsReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*strings.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanStringsReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +func (f *decompressor) huffmanBlockDecoder() func() { + switch f.r.(type) { + case *bytes.Buffer: + return f.huffmanBytesBuffer + case *bytes.Reader: + return f.huffmanBytesReader + case *bufio.Reader: + return f.huffmanBufioReader + case *strings.Reader: + return f.huffmanStringsReader + default: + return f.huffmanBlockGeneric + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go index 102fc74c7..1e5eea396 100644 --- a/vendor/github.com/klauspost/compress/flate/level1.go +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -16,7 +16,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -81,12 +81,12 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { } now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} nextHash = hash(uint32(now)) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } @@ -96,11 +96,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { nextS++ candidate = e.table[nextHash] now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } cv = uint32(now) @@ -139,7 +139,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+l+4) < len(src) { cv := load3232(src, s) - e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv} + e.table[hash(cv)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -153,14 +153,14 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { x := load6432(src, s-2) o := e.cur + s - 2 prevHash := hash(uint32(x)) - e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHash] = tableEntry{offset: o} x >>= 16 currHash := hash(uint32(x)) candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)} + e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x) != candidate.val { + if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { cv = uint32(x >> 8) s++ break diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go index dc6b1d314..5b986a194 100644 --- a/vendor/github.com/klauspost/compress/flate/level2.go +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -18,7 +18,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -83,12 +83,12 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { } candidate = e.table[nextHash] now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} nextHash = hash4u(uint32(now), bTableBits) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { - e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } @@ -98,10 +98,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { nextS++ candidate = e.table[nextHash] now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} + e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == candidate.val { + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { break } cv = uint32(now) @@ -148,7 +148,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+l+4) < len(src) { cv := load3232(src, s) - e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv} + e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -157,15 +157,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { for i := s - l + 2; i < s-5; i += 7 { x := load6432(src, int32(i)) nextHash := hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i} // Skip one x >>= 16 nextHash = hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i + 2} // Skip one x >>= 16 nextHash = hash4u(uint32(x), bTableBits) - e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)} + e.table[nextHash] = tableEntry{offset: e.cur + i + 4} } // We could immediately start working at s now, but to improve @@ -178,14 +178,14 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { o := e.cur + s - 2 prevHash := hash4u(uint32(x), bTableBits) prevHash2 := hash4u(uint32(x>>8), bTableBits) - e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} - e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)} + e.table[prevHash] = tableEntry{offset: o} + e.table[prevHash2] = tableEntry{offset: o + 1} currHash := hash4u(uint32(x>>16), bTableBits) candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)} + e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x>>16) != candidate.val { + if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { cv = uint32(x >> 24) s++ break diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go index 1a3ff9b6b..c22b4244a 100644 --- a/vendor/github.com/klauspost/compress/flate/level3.go +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -15,7 +15,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -81,22 +81,26 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { } candidates := e.table[nextHash] now := load3232(src, nextS) - e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}} + + // Safe offset distance until s + 4... + minOffset := e.cur + s - (maxMatchOffset - 4) + e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} // Check both candidates candidate = candidates.Cur - offset := s - (candidate.offset - e.cur) - if cv == candidate.val { - if offset > maxMatchOffset { - cv = now - // Previous will also be invalid, we have nothing. - continue - } - o2 := s - (candidates.Prev.offset - e.cur) - if cv != candidates.Prev.val || o2 > maxMatchOffset { + if candidate.offset < minOffset { + cv = now + // Previous will also be invalid, we have nothing. + continue + } + + if cv == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { break } // Both match and are valid, pick longest. + offset := s - (candidate.offset - e.cur) + o2 := s - (candidates.Prev.offset - e.cur) l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) if l2 > l1 { candidate = candidates.Prev @@ -106,11 +110,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { // We only check if value mismatches. // Offset will always be invalid in other cases. candidate = candidates.Prev - if cv == candidate.val { - offset := s - (candidate.offset - e.cur) - if offset <= maxMatchOffset { - break - } + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + break } } cv = now @@ -158,7 +159,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { nextHash := hash(cv) e.table[nextHash] = tableEntryPrev{ Prev: e.table[nextHash].Cur, - Cur: tableEntry{offset: e.cur + t, val: cv}, + Cur: tableEntry{offset: e.cur + t}, } } goto emitRemainder @@ -170,21 +171,21 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { prevHash := hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 3}, } x >>= 8 prevHash = hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 2}, } x >>= 8 prevHash = hash(uint32(x)) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)}, + Cur: tableEntry{offset: e.cur + s - 1}, } x >>= 8 currHash := hash(uint32(x)) @@ -192,21 +193,18 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { cv = uint32(x) e.table[currHash] = tableEntryPrev{ Prev: candidates.Cur, - Cur: tableEntry{offset: s + e.cur, val: cv}, + Cur: tableEntry{offset: s + e.cur}, } // Check both candidates candidate = candidates.Cur - if cv == candidate.val { - offset := s - (candidate.offset - e.cur) - if offset <= maxMatchOffset { - continue - } - } else { + minOffset := e.cur + s - (maxMatchOffset - 4) + + if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) { // We only check if value mismatches. // Offset will always be invalid in other cases. candidate = candidates.Prev - if cv == candidate.val { + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { offset := s - (candidate.offset - e.cur) if offset <= maxMatchOffset { continue diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go index f3ecc9c4d..e62f0c02b 100644 --- a/vendor/github.com/klauspost/compress/flate/level4.go +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -13,7 +13,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } // Protect against e.cur wraparound. @@ -92,24 +92,24 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry e.bTable[nextHashL] = entry t = lCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) { // We got a long match. Use that. break } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... lCandidate = e.bTable[hash7(next, tableBits)] // If the next long is a candidate, check if we should use that instead... lOff := nextS - (lCandidate.offset - e.cur) - if lOff < maxMatchOffset && lCandidate.val == uint32(next) { + if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) { l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) if l2 > l1 { s = nextS @@ -137,7 +137,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { if nextEmit < s { emitLiteral(dst, src[nextEmit:s]) } - if false { + if debugDeflate { if t >= s { panic("s-t") } @@ -160,8 +160,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+8) < len(src) { cv := load6432(src, s) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} - e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} + e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -171,20 +171,20 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { i := nextS if i < s-1 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 i += 3 for ; i < s-1; i += 3 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 } } } @@ -195,8 +195,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { o := e.cur + s - 1 prevHashS := hash4x64(x, tableBits) prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} - e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHashS] = tableEntry{offset: o} + e.bTable[prevHashL] = tableEntry{offset: o} cv = x >> 8 } diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index 4e3916825..d513f1ffd 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -13,7 +13,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -100,7 +100,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry eLong := &e.bTable[nextHashL] eLong.Cur, eLong.Prev = entry, eLong.Cur @@ -110,14 +110,14 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { t = lCandidate.Cur.offset - e.cur if s-t < maxMatchOffset { - if uint32(cv) == lCandidate.Cur.val { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { l = e.matchlen(s+4, t+4, src) + 4 ml1 := e.matchlen(s+4, t2+4, src) + 4 if ml1 > l { @@ -129,30 +129,30 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { break } t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur break } } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... l = e.matchlen(s+4, t+4, src) + 4 lCandidate = e.bTable[nextHashL] // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // If the next long is a candidate, use that... t2 := lCandidate.Cur.offset - e.cur if nextS-t2 < maxMatchOffset { - if lCandidate.Cur.val == uint32(next) { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -163,7 +163,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { } // If the previous long is a candidate, use that... t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -197,7 +197,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { if nextEmit < s { emitLiteral(dst, src[nextEmit:s]) } - if false { + if debugDeflate { if t >= s { panic(fmt.Sprintln("s-t", s, t)) } @@ -226,31 +226,31 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { i := s - l + 1 if i < s-1 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} + t := tableEntry{offset: i + e.cur} e.table[hash4x64(cv, tableBits)] = t eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur // Do an long at i+1 cv >>= 8 - t = tableEntry{offset: t.offset + 1, val: uint32(cv)} + t = tableEntry{offset: t.offset + 1} eLong = &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur // We only have enough bits for a short entry at i+2 cv >>= 8 - t = tableEntry{offset: t.offset + 1, val: uint32(cv)} + t = tableEntry{offset: t.offset + 1} e.table[hash4x64(cv, tableBits)] = t // Skip one - otherwise we risk hitting 's' i += 4 for ; i < s-1; i += hashEvery { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur - e.table[hash4u(t2.val, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 } } } @@ -261,9 +261,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { o := e.cur + s - 1 prevHashS := hash4x64(x, tableBits) prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} + e.table[prevHashS] = tableEntry{offset: o} eLong := &e.bTable[prevHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur cv = x >> 8 } diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go index 00a311977..a52c80ea4 100644 --- a/vendor/github.com/klauspost/compress/flate/level6.go +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -13,7 +13,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin ) - if debugDecode && e.cur < 0 { + if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) } @@ -101,7 +101,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { sCandidate := e.table[nextHashS] lCandidate := e.bTable[nextHashL] next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + entry := tableEntry{offset: s + e.cur} e.table[nextHashS] = entry eLong := &e.bTable[nextHashL] eLong.Cur, eLong.Prev = entry, eLong.Cur @@ -112,17 +112,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { t = lCandidate.Cur.offset - e.cur if s-t < maxMatchOffset { - if uint32(cv) == lCandidate.Cur.val { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { // Long candidate matches at least 4 bytes. // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // Check the previous long candidate as well. t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { l = e.matchlen(s+4, t+4, src) + 4 ml1 := e.matchlen(s+4, t2+4, src) + 4 if ml1 > l { @@ -135,17 +135,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { } // Current value did not match, but check if previous long value does. t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur break } } t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { // Found a 4 match... l = e.matchlen(s+4, t+4, src) + 4 @@ -153,9 +153,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { lCandidate = e.bTable[nextHashL] // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur // Check repeat at s + repOff const repOff = 1 @@ -174,7 +174,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // If the next long is a candidate, use that... t2 = lCandidate.Cur.offset - e.cur if nextS-t2 < maxMatchOffset { - if lCandidate.Cur.val == uint32(next) { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -185,7 +185,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { } // If the previous long is a candidate, use that... t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { ml := e.matchlen(nextS+4, t2+4, src) + 4 if ml > l { t = t2 @@ -244,9 +244,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // Index after match end. for i := nextS + 1; i < int32(len(src))-8; i += 2 { cv := load6432(src, i) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)} + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur + eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur } goto emitRemainder } @@ -255,8 +255,8 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { if true { for i := nextS + 1; i < s-1; i += 2 { cv := load6432(src, i) - t := tableEntry{offset: i + e.cur, val: uint32(cv)} - t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong2 := &e.bTable[hash7(cv>>8, tableBits)] e.table[hash4x64(cv, tableBits)] = t diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go index 099c0ddbc..f9abf606d 100644 --- a/vendor/github.com/klauspost/compress/flate/token.go +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -262,7 +262,7 @@ func (t *tokens) EstimatedBits() int { // AddMatch adds a match to the tokens. // This function is very sensitive to inlining and right on the border. func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { - if debugDecode { + if debugDeflate { if xlength >= maxMatchLength+baseMatchLength { panic(fmt.Errorf("invalid length: %v", xlength)) } @@ -281,7 +281,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { // AddMatchLong adds a match to the tokens, potentially longer than max match length. // Length should NOT have the base subtracted, only offset should. func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { - if debugDecode { + if debugDeflate { if xoffset >= maxMatchOffset+baseMatchOffset { panic(fmt.Errorf("invalid offset: %v", xoffset)) } diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 35a3cda91..86553c2c3 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -66,7 +66,7 @@ var ( // A Decoder can be used in two modes: // // 1) As a stream, or -// 2) For stateless decoding using DecodeAll or DecodeBuffer. +// 2) For stateless decoding using DecodeAll. // // Only a single stream can be decoded concurrently, but the same decoder // can run multiple concurrent stateless decodes. It is even possible to @@ -315,7 +315,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { if size > 1<<20 { size = 1 << 20 } - dst = make([]byte, 0, frame.WindowSize) + dst = make([]byte, 0, size) } dst, err = frame.runDecoder(dst, block) diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go new file mode 100644 index 000000000..4375e08b4 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -0,0 +1,521 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import "fmt" + +const ( + betterLongTableBits = 19 // Bits used in the long match table + betterLongTableSize = 1 << betterLongTableBits // Size of the table + + // Note: Increasing the short table bits or making the hash shorter + // can actually lead to compression degradation since it will 'steal' more from the + // long match table and match offsets are quite big. + // This greatly depends on the type of input. + betterShortTableBits = 13 // Bits used in the short match table + betterShortTableSize = 1 << betterShortTableBits // Size of the table +) + +type prevEntry struct { + offset int32 + prev int32 +} + +// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. +// The long match table contains the previous entry with the same hash, +// effectively making it a "chain" of length 2. +// When we find a long match we choose between the two values and select the longest. +// When we find a short match, after checking the long, we check if we can find a long at n+1 +// and that it is longer (lazy matching). +type betterFastEncoder struct { + fastBase + table [betterShortTableSize]tableEntry + longTable [betterLongTableSize]prevEntry +} + +// Encode improves compression... +func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + stepSize := int32(e.o.targetLength) + if stepSize == 0 { + stepSize++ + } + + const kSearchStrength = 9 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + var matched int32 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + off := s + e.cur + e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + repOff + s += lenght + repOff + + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + continue + } + const repOff2 = 1 + + // We deviate from the reference encoder and also check offset 2. + // Still slower and not much better, so disabled. + // repIndex = s - offset2 + repOff2 + if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { + // Consider history as well. + var seq seq + lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 2 + seq.offset = 2 + if debugSequences { + println("repeat sequence 2", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + index0 := s + repOff2 + s += lenght + repOff2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + // Swap offsets + offset1, offset2 = offset2, offset1 + continue + } + } + // Find the offsets of our two matches. + coffsetL := candidateL.offset - e.cur + coffsetLP := candidateL.prev - e.cur + + // Check if we have a long match. + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetL+8, src) + 8 + t = coffsetL + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 + if prevMatch > matched { + matched = prevMatch + t = coffsetLP + } + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + } + break + } + + // Check if we have a long match on prev. + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetLP+8, src) + 8 + t = coffsetLP + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + coffsetS := candidateS.offset - e.cur + + // Check if we have a short match. + if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + matched = e.matchlen(s+4, coffsetS+4, src) + 4 + + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, betterLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = candidateL.offset - e.cur + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("long match (after short)") + } + break + } + } + + // Check prev long... + coffsetL = candidateL.prev - e.cur + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("prev long match (after short)") + } + break + } + } + t = coffsetS + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := matched + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.Encode(blk, src) +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 0ffea7655..d640e6a9f 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -172,55 +172,6 @@ encodeLoop: cv = load6432(src, s) continue } - const repOff2 = 1 - // We deviate from the reference encoder and also check offset 2. - // Slower and not consistently better, so disabled. - // repIndex = s - offset2 + repOff2 - if false && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff2*8)) { - // Consider history as well. - var seq seq - lenght := 4 + e.matchlen(s+4+repOff2, repIndex+4, src) - - seq.matchLen = uint32(lenght - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := s + repOff2 - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) - - // rep 2 - seq.offset = 2 - if debugSequences { - println("repeat sequence 2", seq, "next s:", s) - } - blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 - nextEmit = s - if s >= sLimit { - if debug { - println("repeat ended", s, lenght) - - } - break encodeLoop - } - cv = load6432(src, s) - // Swap offsets - offset1, offset2 = offset2, offset1 - continue - } } // Find the offsets of our two matches. coffsetL := s - (candidateL.offset - e.cur) @@ -372,7 +323,7 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv1>>8, dFastShortTableBits) + nextHashS := hash5(cv, dFastShortTableBits) nextHashL := hash8(cv, dFastLongTableBits) // We have at least 4 byte match. diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 28134b158..1387b8082 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -6,6 +6,7 @@ package zstd import ( "fmt" + "math" "math/bits" "github.com/klauspost/compress/zstd/internal/xxhash" @@ -23,7 +24,7 @@ type tableEntry struct { offset int32 } -type fastEncoder struct { +type fastBase struct { o encParams // cur is the offset at the start of hist cur int32 @@ -31,18 +32,22 @@ type fastEncoder struct { maxMatchOff int32 hist []byte crc *xxhash.Digest - table [tableSize]tableEntry tmp [8]byte blk *blockEnc } +type fastEncoder struct { + fastBase + table [tableSize]tableEntry +} + // CRC returns the underlying CRC writer. -func (e *fastEncoder) CRC() *xxhash.Digest { +func (e *fastBase) CRC() *xxhash.Digest { return e.crc } // AppendCRC will append the CRC to the destination slice and return it. -func (e *fastEncoder) AppendCRC(dst []byte) []byte { +func (e *fastBase) AppendCRC(dst []byte) []byte { crc := e.crc.Sum(e.tmp[:0]) dst = append(dst, crc[7], crc[6], crc[5], crc[4]) return dst @@ -50,7 +55,7 @@ func (e *fastEncoder) AppendCRC(dst []byte) []byte { // WindowSize returns the window size of the encoder, // or a window size small enough to contain the input size, if > 0. -func (e *fastEncoder) WindowSize(size int) int32 { +func (e *fastBase) WindowSize(size int) int32 { if size > 0 && size < int(e.maxMatchOff) { b := int32(1) << uint(bits.Len(uint(size))) // Keep minimum window. @@ -63,7 +68,7 @@ func (e *fastEncoder) WindowSize(size int) int32 { } // Block returns the current block. -func (e *fastEncoder) Block() *blockEnc { +func (e *fastBase) Block() *blockEnc { return e.blk } @@ -169,9 +174,22 @@ encodeLoop: if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+6, repIndex+4, src) + var length int32 + // length = 4 + e.matchlen(s+6, repIndex+4, src) + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -197,11 +215,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -257,7 +275,20 @@ encodeLoop: } // Extend the 4-byte match as long as possible. - l := e.matchlen(s+4, t+4, src) + 4 + //l := e.matchlen(s+4, t+4, src) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -294,7 +325,20 @@ encodeLoop: if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match - l := 4 + e.matchlen(s+4, o2+4, src) + //l := 4 + e.matchlen(s+4, o2+4, src) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -412,10 +456,23 @@ encodeLoop: if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { // Consider history as well. var seq seq - // lenght := 4 + e.matchlen(s+6, repIndex+4, src) - lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + // length := 4 + e.matchlen(s+6, repIndex+4, src) + // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + var length int32 + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -441,11 +498,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + 2 + s += length + 2 nextEmit = s if s >= sLimit { if debug { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -498,7 +555,20 @@ encodeLoop: // Extend the 4-byte match as long as possible. //l := e.matchlenNoHist(s+4, t+4, src) + 4 - l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Extend backwards tMin := s - e.maxMatchOff @@ -536,7 +606,20 @@ encodeLoop: // We have at least 4 byte match. // No need to check backwards. We come straight from a match //l := 4 + e.matchlenNoHist(s+4, o2+4, src) - l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } // Store this, since we have it. nextHash := hash6(cv, hashLog) @@ -571,7 +654,7 @@ encodeLoop: } } -func (e *fastEncoder) addBlock(src []byte) int32 { +func (e *fastBase) addBlock(src []byte) int32 { if debugAsserts && e.cur > bufferReset { panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) } @@ -602,17 +685,17 @@ func (e *fastEncoder) addBlock(src []byte) int32 { // useBlock will replace the block with the provided one, // but transfer recent offsets from the previous. -func (e *fastEncoder) UseBlock(enc *blockEnc) { +func (e *fastBase) UseBlock(enc *blockEnc) { enc.reset(e.blk) e.blk = enc } -func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 { +func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { // Extend the match to be as long as possible. return int32(matchLen(src[s:], src[t:])) } -func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { +func (e *fastBase) matchlen(s, t int32, src []byte) int32 { if debugAsserts { if s < 0 { err := fmt.Sprintf("s (%d) < 0", s) @@ -626,18 +709,17 @@ func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) panic(err) } - } - s1 := int(s) + maxMatchLength - 4 - if s1 > len(src) { - s1 = len(src) + if len(src)-int(s) > maxCompressedBlockSize { + panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) + } } // Extend the match to be as long as possible. - return int32(matchLen(src[s:s1], src[t:])) + return int32(matchLen(src[s:], src[t:])) } // Reset the encoding table. -func (e *fastEncoder) Reset() { +func (e *fastBase) Reset() { if e.blk == nil { e.blk = &blockEnc{} e.blk.init() diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 4032fb9fc..67d45efb9 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -71,15 +71,14 @@ func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) { } if w != nil { e.Reset(w) - } else { - e.init.Do(func() { - e.initialize() - }) } return &e, nil } func (e *Encoder) initialize() { + if e.o.concurrent == 0 { + e.o.setDefault() + } e.encoders = make(chan encoder, e.o.concurrent) for i := 0; i < e.o.concurrent; i++ { e.encoders <- e.o.encoder() @@ -89,9 +88,6 @@ func (e *Encoder) initialize() { // Reset will re-initialize the writer and new writes will encode to the supplied writer // as a new, independent stream. func (e *Encoder) Reset(w io.Writer) { - e.init.Do(func() { - e.initialize() - }) s := &e.state s.wg.Wait() s.wWg.Wait() @@ -422,10 +418,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(func() { - e.o.setDefault() - e.initialize() - }) + e.init.Do(e.initialize) enc := <-e.encoders defer func() { // Release encoder reference to last block. diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 40eb45733..0ff970dac 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -39,9 +39,11 @@ func (o *encoderOptions) setDefault() { func (o encoderOptions) encoder() encoder { switch o.level { case SpeedDefault: - return &doubleFastEncoder{fastEncoder: fastEncoder{maxMatchOff: int32(o.windowSize)}} + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} + case SpeedBetterCompression: + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} case SpeedFastest: - return &fastEncoder{maxMatchOff: int32(o.windowSize)} + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} } panic("unknown compression level") } @@ -67,7 +69,7 @@ func WithEncoderConcurrency(n int) EOption { } // WithWindowSize will set the maximum allowed back-reference distance. -// The value must be a power of two between WindowSizeMin and WindowSizeMax. +// The value must be a power of two between MinWindowSize and MaxWindowSize. // A larger value will enable better compression but allocate more memory and, // for above-default values, take considerably longer. // The default value is determined by the compression level. @@ -130,18 +132,18 @@ const ( // This is roughly equivalent to the default Zstandard mode (level 3). SpeedDefault + // SpeedBetterCompression will yield better compression than the default. + // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage. + // By using this, notice that CPU usage may go up in the future. + SpeedBetterCompression + // speedLast should be kept as the last actual compression option. // The is not for external usage, but is used to keep track of the valid options. speedLast - // SpeedBetterCompression will (in the future) yield better compression than the default, - // but at approximately 4x the CPU usage of the default. - // For now this is not implemented. - SpeedBetterCompression = SpeedDefault - // SpeedBestCompression will choose the best available compression option. // For now this is not implemented. - SpeedBestCompression = SpeedDefault + SpeedBestCompression = SpeedBetterCompression ) // EncoderLevelFromString will convert a string representation of an encoding level back @@ -163,8 +165,10 @@ func EncoderLevelFromZstd(level int) EncoderLevel { switch { case level < 3: return SpeedFastest - case level >= 3: + case level >= 3 && level < 6: return SpeedDefault + case level > 5: + return SpeedBetterCompression } return SpeedDefault } @@ -176,6 +180,8 @@ func (e EncoderLevel) String() string { return "fastest" case SpeedDefault: return "default" + case SpeedBetterCompression: + return "better" default: return "invalid" } diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s index d580e32ae..2c9c5357a 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s @@ -179,13 +179,13 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40 MOVQ ·prime2v(SB), R14 // Load slice. - MOVQ b_base+8(FP), CX - MOVQ b_len+16(FP), DX + MOVQ arg1_base+8(FP), CX + MOVQ arg1_len+16(FP), DX LEAQ (CX)(DX*1), BX SUBQ $32, BX // Load vN from d. - MOVQ d+0(FP), AX + MOVQ arg+0(FP), AX MOVQ 0(AX), R8 // v1 MOVQ 8(AX), R9 // v2 MOVQ 16(AX), R10 // v3 @@ -209,7 +209,7 @@ blockLoop: MOVQ R11, 24(AX) // The number of bytes written is CX minus the old base pointer. - SUBQ b_base+8(FP), CX + SUBQ arg1_base+8(FP), CX MOVQ CX, ret+32(FP) RET diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 5e0b64ccc..0807719c8 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -87,6 +87,17 @@ func printf(format string, a ...interface{}) { } } +// matchLenFast does matching, but will not match the last up to 7 bytes. +func matchLenFast(a, b []byte) int { + endI := len(a) & (math.MaxInt32 - 7) + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + return i + bits.TrailingZeros64(diff)>>3 + } + } + return endI +} + // matchLen returns the maximum length. // a must be the shortest of the two. // The function also returns whether all bytes matched. @@ -97,33 +108,18 @@ func matchLen(a, b []byte) int { return i + (bits.TrailingZeros64(diff) >> 3) } } + checked := (len(a) >> 3) << 3 a = a[checked:] b = b[checked:] - // TODO: We could do a 4 check. for i := range a { if a[i] != b[i] { - return int(i) + checked + return i + checked } } return len(a) + checked } -// matchLen returns a match length in src between index s and t -func matchLenIn(src []byte, s, t int32) int32 { - s1 := len(src) - b := src[t:] - a := src[s:s1] - b = b[:len(a)] - // Extend the match to be as long as possible. - for i := range a { - if a[i] != b[i] { - return int32(i) - } - } - return int32(len(a)) -} - func load3232(b []byte, i int32) uint32 { // Help the compiler eliminate bounds checks on the read so it can be done in a single read. b = b[i:] diff --git a/vendor/modules.txt b/vendor/modules.txt index 7f068e37b..f0913e13e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -85,7 +85,7 @@ github.com/jmespath/go-jmespath github.com/jstemmer/go-junit-report github.com/jstemmer/go-junit-report/formatter github.com/jstemmer/go-junit-report/parser -# github.com/klauspost/compress v1.10.1 +# github.com/klauspost/compress v1.10.3 github.com/klauspost/compress/flate github.com/klauspost/compress/fse github.com/klauspost/compress/gzip