vendor: update github.com/klauspost/compress from v1.10.1 to v1.10.3

This commit is contained in:
Aliaksandr Valialkin 2020-03-12 17:32:07 +02:00
parent c8dc1cd218
commit 3935038e20
25 changed files with 2091 additions and 271 deletions

2
go.mod
View file

@ -7,7 +7,7 @@ require (
github.com/aws/aws-sdk-go v1.29.10 github.com/aws/aws-sdk-go v1.29.10
github.com/cespare/xxhash/v2 v2.1.1 github.com/cespare/xxhash/v2 v2.1.1
github.com/golang/snappy v0.0.1 github.com/golang/snappy v0.0.1
github.com/klauspost/compress v1.10.1 github.com/klauspost/compress v1.10.3
github.com/valyala/fasthttp v1.9.0 github.com/valyala/fasthttp v1.9.0
github.com/valyala/fastjson v1.5.0 github.com/valyala/fastjson v1.5.0
github.com/valyala/fastrand v1.0.0 github.com/valyala/fastrand v1.0.0

4
go.sum
View file

@ -105,8 +105,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.10.1 h1:a/QY0o9S6wCi0XhxaMX/QmusicNUqCqFugR6WKPOSoQ= github.com/klauspost/compress v1.10.3 h1:OP96hzwJVBIHYU52pVTI6CczrxPvrGfgqF9N5eTO0Q8=
github.com/klauspost/compress v1.10.1/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=

View file

@ -48,6 +48,8 @@ const (
maxHashOffset = 1 << 24 maxHashOffset = 1 << 24
skipNever = math.MaxInt32 skipNever = math.MaxInt32
debugDeflate = false
) )
type compressionLevel struct { type compressionLevel struct {
@ -365,7 +367,7 @@ func (d *compressor) deflateLazy() {
// Sanity enables additional runtime tests. // Sanity enables additional runtime tests.
// It's intended to be used during development // It's intended to be used during development
// to supplement the currently ad-hoc unit tests. // to supplement the currently ad-hoc unit tests.
const sanity = false const sanity = debugDeflate
if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
return return

View file

@ -35,16 +35,16 @@ func newFastEnc(level int) fastEnc {
} }
const ( const (
tableBits = 16 // Bits used in the table tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table tableSize = 1 << tableBits // Size of the table
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
baseMatchOffset = 1 // The smallest match offset baseMatchOffset = 1 // The smallest match offset
baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
maxMatchOffset = 1 << 15 // The largest match offset maxMatchOffset = 1 << 15 // The largest match offset
bTableBits = 18 // Bits used in the big tables bTableBits = 17 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxStoreBlockSize * 20 // Size to preallocate for history. allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
) )
@ -92,7 +92,6 @@ func hash(u uint32) uint32 {
} }
type tableEntry struct { type tableEntry struct {
val uint32
offset int32 offset int32
} }

View file

@ -0,0 +1,274 @@
// +build generate
//go:generate go run $GOFILE && gofmt -w inflate_gen.go
package main
import (
"os"
"strings"
)
func main() {
f, err := os.Create("inflate_gen.go")
if err != nil {
panic(err)
}
defer f.Close()
types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"}
names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"}
imports := []string{"bytes", "bufio", "io", "strings", "math/bits"}
f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT.
package flate
import (
`)
for _, imp := range imports {
f.WriteString("\t\"" + imp + "\"\n")
}
f.WriteString(")\n\n")
template := `
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) $FUNCNAME$() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.($TYPE$)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).$FUNCNAME$
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).$FUNCNAME$ // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
`
for i, t := range types {
s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1)
s = strings.Replace(s, "$TYPE$", t, -1)
f.WriteString(s)
}
f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n")
f.WriteString("\tswitch f.r.(type) {\n")
for i, t := range types {
f.WriteString("\t\tcase " + t + ":\n")
f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n")
}
f.WriteString("\t\tdefault:\n")
f.WriteString("\t\t\treturn f.huffmanBlockGeneric")
f.WriteString("\t}\n}\n")
}

View file

@ -484,6 +484,9 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
} }
} }
// writeStoredHeader will write a stored header.
// If the stored block is only used for EOF,
// it is replaced with a fixed huffman block.
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil { if w.err != nil {
return return
@ -493,6 +496,16 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0 w.lastHeader = 0
} }
// To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes.
if length == 0 && isEof {
w.writeFixedHeader(isEof)
// EOB: 7 bits, value: 0
w.writeBits(0, 7)
w.flush()
return
}
var flag int32 var flag int32
if isEof { if isEof {
flag = 1 flag = 1

View file

@ -109,8 +109,8 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
return h return h
} }
var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() var fixedLiteralEncoding = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() var fixedOffsetEncoding = generateFixedOffsetEncoding()
func (h *huffmanEncoder) bitLength(freq []uint16) int { func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int var total int

View file

@ -106,7 +106,7 @@ const (
) )
type huffmanDecoder struct { type huffmanDecoder struct {
min int // the minimum code length maxRead int // the maximum number of bits we can read and not overread
chunks *[huffmanNumChunks]uint16 // chunks as described above chunks *[huffmanNumChunks]uint16 // chunks as described above
links [][]uint16 // overflow links links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table linkMask uint32 // mask the width of the link table
@ -126,12 +126,12 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if h.chunks == nil { if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{} h.chunks = &[huffmanNumChunks]uint16{}
} }
if h.min != 0 { if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links} *h = huffmanDecoder{chunks: h.chunks, links: h.links}
} }
// Count number of codes of each length, // Count number of codes of each length,
// compute min and max length. // compute maxRead and max length.
var count [maxCodeLen]int var count [maxCodeLen]int
var min, max int var min, max int
for _, n := range lengths { for _, n := range lengths {
@ -178,7 +178,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
return false return false
} }
h.min = min h.maxRead = min
chunks := h.chunks[:] chunks := h.chunks[:]
for i := range chunks { for i := range chunks {
chunks[i] = 0 chunks[i] = 0
@ -342,7 +342,7 @@ func (f *decompressor) nextBlock() {
// compressed, fixed Huffman tables // compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder f.hl = &fixedHuffmanDecoder
f.hd = nil f.hd = nil
f.huffmanBlock() f.huffmanBlockDecoder()()
case 2: case 2:
// compressed, dynamic Huffman tables // compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil { if f.err = f.readHuffman(); f.err != nil {
@ -350,7 +350,7 @@ func (f *decompressor) nextBlock() {
} }
f.hl = &f.h1 f.hl = &f.h1
f.hd = &f.h2 f.hd = &f.h2
f.huffmanBlock() f.huffmanBlockDecoder()()
default: default:
// 3 is reserved. // 3 is reserved.
if debugDecode { if debugDecode {
@ -543,12 +543,18 @@ func (f *decompressor) readHuffman() error {
return CorruptInputError(f.roffset) return CorruptInputError(f.roffset)
} }
// As an optimization, we can initialize the min bits to read at a time // As an optimization, we can initialize the maxRead bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that // for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that // every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream. // we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.min < f.bits[endBlockMarker] { if f.h1.maxRead < f.bits[endBlockMarker] {
f.h1.min = f.bits[endBlockMarker] f.h1.maxRead = f.bits[endBlockMarker]
}
if !f.final {
// If not the final block, the smallest block possible is
// a predefined table, BTYPE=01, with a single EOB marker.
// This will take up 3 + 7 bits.
f.h1.maxRead += 10
} }
return nil return nil
@ -558,7 +564,7 @@ func (f *decompressor) readHuffman() error {
// hl and hd are the Huffman states for the lit/length values // hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the // and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks. // fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBlock() { func (f *decompressor) huffmanBlockGeneric() {
const ( const (
stateInit = iota // Zero value must be stateInit stateInit = iota // Zero value must be stateInit
stateDict stateDict
@ -574,19 +580,64 @@ func (f *decompressor) huffmanBlock() {
readLiteral: readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3. // Read literal and/or (length, distance) according to RFC section 3.2.3.
{ {
v, err := f.huffSym(f.hl) var v int
if err != nil { {
f.err = err // Inlined v, err := f.huffSym(f.hl)
return // Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
} }
var n uint // number of bits extra var n uint // number of bits extra
var length int var length int
var err error
switch { switch {
case v < 256: case v < 256:
f.dict.writeByte(byte(v)) f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 { if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush() f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock f.step = (*decompressor).huffmanBlockGeneric
f.stepState = stateInit f.stepState = stateInit
return return
} }
@ -714,7 +765,7 @@ copyHistory:
if f.dict.availWrite() == 0 || f.copyLen > 0 { if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush() f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBlock // We need to continue this work f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work
f.stepState = stateDict f.stepState = stateDict
return return
} }
@ -726,21 +777,33 @@ copyHistory:
func (f *decompressor) dataBlock() { func (f *decompressor) dataBlock() {
// Uncompressed. // Uncompressed.
// Discard current half-byte. // Discard current half-byte.
f.nb = 0 left := (f.nb) & 7
f.b = 0 f.nb -= left
f.b >>= left
offBytes := f.nb >> 3
// Unfilled values will be overwritten.
f.buf[0] = uint8(f.b)
f.buf[1] = uint8(f.b >> 8)
f.buf[2] = uint8(f.b >> 16)
f.buf[3] = uint8(f.b >> 24)
f.roffset += int64(offBytes)
f.nb, f.b = 0, 0
// Length then ones-complement of length. // Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4]) nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
f.roffset += int64(nr) f.roffset += int64(nr)
if err != nil { if err != nil {
f.err = noEOF(err) f.err = noEOF(err)
return return
} }
n := int(f.buf[0]) | int(f.buf[1])<<8 n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8 nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
if uint16(nn) != uint16(^n) { if nn != ^n {
if debugDecode { if debugDecode {
fmt.Println("uint16(nn) != uint16(^n)", nn, ^n) ncomp := ^n
fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
} }
f.err = CorruptInputError(f.roffset) f.err = CorruptInputError(f.roffset)
return return
@ -752,7 +815,7 @@ func (f *decompressor) dataBlock() {
return return
} }
f.copyLen = n f.copyLen = int(n)
f.copyData() f.copyData()
} }
@ -816,7 +879,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// with single element, huffSym must error on these two edge cases. In both // with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it // cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below. // satisfy the n == 0 check below.
n := uint(h.min) n := uint(h.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b, // but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return. // inline call to moreBits and reassign b,nb back to f on return.

View file

@ -0,0 +1,922 @@
// Code generated by go generate gen_inflate.go. DO NOT EDIT.
package flate
import (
"bufio"
"bytes"
"fmt"
"math/bits"
"strings"
)
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBytesBuffer() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.(*bytes.Buffer)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBytesReader() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.(*bytes.Reader)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesReader
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBytesReader // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBufioReader() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.(*bufio.Reader)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBufioReader
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanBufioReader // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanStringsReader() {
const (
stateInit = iota // Zero value must be stateInit
stateDict
)
fr := f.r.(*strings.Reader)
moreBits := func() error {
c, err := fr.ReadByte()
if err != nil {
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
switch f.stepState {
case stateInit:
goto readLiteral
case stateDict:
goto copyHistory
}
readLiteral:
// Read literal and/or (length, distance) according to RFC section 3.2.3.
{
var v int
{
// Inlined v, err := f.huffSym(f.hl)
// Since a huffmanDecoder can be empty or be composed of a degenerate tree
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(f.hl.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for nb < n {
c, err := fr.ReadByte()
if err != nil {
f.b = b
f.nb = nb
f.err = noEOF(err)
return
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := f.hl.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
if debugDecode {
fmt.Println("huffsym: n==0")
}
f.err = CorruptInputError(f.roffset)
return
}
f.b = b >> (n & 31)
f.nb = nb - n
v = int(chunk >> huffmanValueShift)
break
}
}
}
var n uint // number of bits extra
var length int
var err error
switch {
case v < 256:
f.dict.writeByte(byte(v))
if f.dict.availWrite() == 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanStringsReader
f.stepState = stateInit
return
}
goto readLiteral
case v == 256:
f.finishBlock()
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
case v < maxNumLit:
length = 258
n = 0
default:
if debugDecode {
fmt.Println(v, ">= maxNumLit")
}
f.err = CorruptInputError(f.roffset)
return
}
if n > 0 {
for f.nb < n {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits n>0:", err)
}
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<5:", err)
}
f.err = err
return
}
}
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
if debugDecode {
fmt.Println("huffsym:", err)
}
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist < maxNumDist:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = moreBits(); err != nil {
if debugDecode {
fmt.Println("morebits f.nb<nb:", err)
}
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
default:
if debugDecode {
fmt.Println("dist too big:", dist, maxNumDist)
}
f.err = CorruptInputError(f.roffset)
return
}
// No check on length; encoding can be prescient.
if dist > f.dict.histSize() {
if debugDecode {
fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize())
}
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
goto copyHistory
}
copyHistory:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
if cnt == 0 {
cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
}
f.copyLen -= cnt
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).huffmanStringsReader // We need to continue this work
f.stepState = stateDict
return
}
goto readLiteral
}
}
func (f *decompressor) huffmanBlockDecoder() func() {
switch f.r.(type) {
case *bytes.Buffer:
return f.huffmanBytesBuffer
case *bytes.Reader:
return f.huffmanBytesReader
case *bufio.Reader:
return f.huffmanBufioReader
case *strings.Reader:
return f.huffmanStringsReader
default:
return f.huffmanBlockGeneric
}
}

View file

@ -16,7 +16,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
) )
if debugDecode && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
} }
@ -81,12 +81,12 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
} }
now := load6432(src, nextS) now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash(uint32(now)) nextHash = hash(uint32(now))
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val { if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break break
} }
@ -96,11 +96,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
nextS++ nextS++
candidate = e.table[nextHash] candidate = e.table[nextHash]
now >>= 8 now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur) offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val { if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break break
} }
cv = uint32(now) cv = uint32(now)
@ -139,7 +139,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// Index first pair after match end. // Index first pair after match end.
if int(s+l+4) < len(src) { if int(s+l+4) < len(src) {
cv := load3232(src, s) cv := load3232(src, s)
e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv} e.table[hash(cv)] = tableEntry{offset: s + e.cur}
} }
goto emitRemainder goto emitRemainder
} }
@ -153,14 +153,14 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
x := load6432(src, s-2) x := load6432(src, s-2)
o := e.cur + s - 2 o := e.cur + s - 2
prevHash := hash(uint32(x)) prevHash := hash(uint32(x))
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} e.table[prevHash] = tableEntry{offset: o}
x >>= 16 x >>= 16
currHash := hash(uint32(x)) currHash := hash(uint32(x))
candidate = e.table[currHash] candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)} e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != candidate.val { if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 8) cv = uint32(x >> 8)
s++ s++
break break

View file

@ -18,7 +18,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
) )
if debugDecode && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
} }
@ -83,12 +83,12 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
} }
candidate = e.table[nextHash] candidate = e.table[nextHash]
now := load6432(src, nextS) now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash4u(uint32(now), bTableBits) nextHash = hash4u(uint32(now), bTableBits)
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val { if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)} e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break break
} }
@ -98,10 +98,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
nextS++ nextS++
candidate = e.table[nextHash] candidate = e.table[nextHash]
now >>= 8 now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv} e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur) offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val { if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
break break
} }
cv = uint32(now) cv = uint32(now)
@ -148,7 +148,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// Index first pair after match end. // Index first pair after match end.
if int(s+l+4) < len(src) { if int(s+l+4) < len(src) {
cv := load3232(src, s) cv := load3232(src, s)
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv} e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur}
} }
goto emitRemainder goto emitRemainder
} }
@ -157,15 +157,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
for i := s - l + 2; i < s-5; i += 7 { for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, int32(i)) x := load6432(src, int32(i))
nextHash := hash4u(uint32(x), bTableBits) nextHash := hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)} e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one // Skip one
x >>= 16 x >>= 16
nextHash = hash4u(uint32(x), bTableBits) nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)} e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one // Skip one
x >>= 16 x >>= 16
nextHash = hash4u(uint32(x), bTableBits) nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)} e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
} }
// We could immediately start working at s now, but to improve // We could immediately start working at s now, but to improve
@ -178,14 +178,14 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
o := e.cur + s - 2 o := e.cur + s - 2
prevHash := hash4u(uint32(x), bTableBits) prevHash := hash4u(uint32(x), bTableBits)
prevHash2 := hash4u(uint32(x>>8), bTableBits) prevHash2 := hash4u(uint32(x>>8), bTableBits)
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)} e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)} e.table[prevHash2] = tableEntry{offset: o + 1}
currHash := hash4u(uint32(x>>16), bTableBits) currHash := hash4u(uint32(x>>16), bTableBits)
candidate = e.table[currHash] candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)} e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != candidate.val { if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 24) cv = uint32(x >> 24)
s++ s++
break break

View file

@ -15,7 +15,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
) )
if debugDecode && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
} }
@ -81,22 +81,26 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
} }
candidates := e.table[nextHash] candidates := e.table[nextHash]
now := load3232(src, nextS) now := load3232(src, nextS)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}
// Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
// Check both candidates // Check both candidates
candidate = candidates.Cur candidate = candidates.Cur
offset := s - (candidate.offset - e.cur) if candidate.offset < minOffset {
if cv == candidate.val { cv = now
if offset > maxMatchOffset { // Previous will also be invalid, we have nothing.
cv = now continue
// Previous will also be invalid, we have nothing. }
continue
} if cv == load3232(src, candidate.offset-e.cur) {
o2 := s - (candidates.Prev.offset - e.cur) if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) {
if cv != candidates.Prev.val || o2 > maxMatchOffset {
break break
} }
// Both match and are valid, pick longest. // Both match and are valid, pick longest.
offset := s - (candidate.offset - e.cur)
o2 := s - (candidates.Prev.offset - e.cur)
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
if l2 > l1 { if l2 > l1 {
candidate = candidates.Prev candidate = candidates.Prev
@ -106,11 +110,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// We only check if value mismatches. // We only check if value mismatches.
// Offset will always be invalid in other cases. // Offset will always be invalid in other cases.
candidate = candidates.Prev candidate = candidates.Prev
if cv == candidate.val { if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
offset := s - (candidate.offset - e.cur) break
if offset <= maxMatchOffset {
break
}
} }
} }
cv = now cv = now
@ -158,7 +159,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
nextHash := hash(cv) nextHash := hash(cv)
e.table[nextHash] = tableEntryPrev{ e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur, Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t, val: cv}, Cur: tableEntry{offset: e.cur + t},
} }
} }
goto emitRemainder goto emitRemainder
@ -170,21 +171,21 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
prevHash := hash(uint32(x)) prevHash := hash(uint32(x))
e.table[prevHash] = tableEntryPrev{ e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur, Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)}, Cur: tableEntry{offset: e.cur + s - 3},
} }
x >>= 8 x >>= 8
prevHash = hash(uint32(x)) prevHash = hash(uint32(x))
e.table[prevHash] = tableEntryPrev{ e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur, Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)}, Cur: tableEntry{offset: e.cur + s - 2},
} }
x >>= 8 x >>= 8
prevHash = hash(uint32(x)) prevHash = hash(uint32(x))
e.table[prevHash] = tableEntryPrev{ e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur, Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)}, Cur: tableEntry{offset: e.cur + s - 1},
} }
x >>= 8 x >>= 8
currHash := hash(uint32(x)) currHash := hash(uint32(x))
@ -192,21 +193,18 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
cv = uint32(x) cv = uint32(x)
e.table[currHash] = tableEntryPrev{ e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur, Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur, val: cv}, Cur: tableEntry{offset: s + e.cur},
} }
// Check both candidates // Check both candidates
candidate = candidates.Cur candidate = candidates.Cur
if cv == candidate.val { minOffset := e.cur + s - (maxMatchOffset - 4)
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset { if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) {
continue
}
} else {
// We only check if value mismatches. // We only check if value mismatches.
// Offset will always be invalid in other cases. // Offset will always be invalid in other cases.
candidate = candidates.Prev candidate = candidates.Prev
if cv == candidate.val { if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
offset := s - (candidate.offset - e.cur) offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset { if offset <= maxMatchOffset {
continue continue

View file

@ -13,7 +13,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
) )
if debugDecode && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
} }
// Protect against e.cur wraparound. // Protect against e.cur wraparound.
@ -92,24 +92,24 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
sCandidate := e.table[nextHashS] sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL] lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS) next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)} entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry e.table[nextHashS] = entry
e.bTable[nextHashL] = entry e.bTable[nextHashL] = entry
t = lCandidate.offset - e.cur t = lCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.val { if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) {
// We got a long match. Use that. // We got a long match. Use that.
break break
} }
t = sCandidate.offset - e.cur t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match... // Found a 4 match...
lCandidate = e.bTable[hash7(next, tableBits)] lCandidate = e.bTable[hash7(next, tableBits)]
// If the next long is a candidate, check if we should use that instead... // If the next long is a candidate, check if we should use that instead...
lOff := nextS - (lCandidate.offset - e.cur) lOff := nextS - (lCandidate.offset - e.cur)
if lOff < maxMatchOffset && lCandidate.val == uint32(next) { if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) {
l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:])
if l2 > l1 { if l2 > l1 {
s = nextS s = nextS
@ -137,7 +137,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
if nextEmit < s { if nextEmit < s {
emitLiteral(dst, src[nextEmit:s]) emitLiteral(dst, src[nextEmit:s])
} }
if false { if debugDeflate {
if t >= s { if t >= s {
panic("s-t") panic("s-t")
} }
@ -160,8 +160,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// Index first pair after match end. // Index first pair after match end.
if int(s+8) < len(src) { if int(s+8) < len(src) {
cv := load6432(src, s) cv := load6432(src, s)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur, val: uint32(cv)} e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
} }
goto emitRemainder goto emitRemainder
} }
@ -171,20 +171,20 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
i := nextS i := nextS
if i < s-1 { if i < s-1 {
cv := load6432(src, i) cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)} t := tableEntry{offset: i + e.cur}
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2 e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(t2.val, tableBits)] = t2 e.table[hash4u(uint32(cv>>8), tableBits)] = t2
i += 3 i += 3
for ; i < s-1; i += 3 { for ; i < s-1; i += 3 {
cv := load6432(src, i) cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)} t := tableEntry{offset: i + e.cur}
t2 := tableEntry{val: uint32(cv >> 8), offset: t.offset + 1} t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2 e.bTable[hash7(cv>>8, tableBits)] = t2
e.table[hash4u(t2.val, tableBits)] = t2 e.table[hash4u(uint32(cv>>8), tableBits)] = t2
} }
} }
} }
@ -195,8 +195,8 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
o := e.cur + s - 1 o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits) prevHashS := hash4x64(x, tableBits)
prevHashL := hash7(x, tableBits) prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} e.table[prevHashS] = tableEntry{offset: o}
e.bTable[prevHashL] = tableEntry{offset: o, val: uint32(x)} e.bTable[prevHashL] = tableEntry{offset: o}
cv = x >> 8 cv = x >> 8
} }

View file

@ -13,7 +13,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
) )
if debugDecode && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
} }
@ -100,7 +100,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
sCandidate := e.table[nextHashS] sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL] lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS) next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)} entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur eLong.Cur, eLong.Prev = entry, eLong.Cur
@ -110,14 +110,14 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
t = lCandidate.Cur.offset - e.cur t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset { if s-t < maxMatchOffset {
if uint32(cv) == lCandidate.Cur.val { if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Store the next match // Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
t2 := lCandidate.Prev.offset - e.cur t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4 l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4 ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l { if ml1 > l {
@ -129,30 +129,30 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
break break
} }
t = lCandidate.Prev.offset - e.cur t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match // Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break break
} }
} }
t = sCandidate.offset - e.cur t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match... // Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4 l = e.matchlen(s+4, t+4, src) + 4
lCandidate = e.bTable[nextHashL] lCandidate = e.bTable[nextHashL]
// Store the next match // Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// If the next long is a candidate, use that... // If the next long is a candidate, use that...
t2 := lCandidate.Cur.offset - e.cur t2 := lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset { if nextS-t2 < maxMatchOffset {
if lCandidate.Cur.val == uint32(next) { if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4 ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l { if ml > l {
t = t2 t = t2
@ -163,7 +163,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
} }
// If the previous long is a candidate, use that... // If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4 ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l { if ml > l {
t = t2 t = t2
@ -197,7 +197,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
if nextEmit < s { if nextEmit < s {
emitLiteral(dst, src[nextEmit:s]) emitLiteral(dst, src[nextEmit:s])
} }
if false { if debugDeflate {
if t >= s { if t >= s {
panic(fmt.Sprintln("s-t", s, t)) panic(fmt.Sprintln("s-t", s, t))
} }
@ -226,31 +226,31 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
i := s - l + 1 i := s - l + 1
if i < s-1 { if i < s-1 {
cv := load6432(src, i) cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)} t := tableEntry{offset: i + e.cur}
e.table[hash4x64(cv, tableBits)] = t e.table[hash4x64(cv, tableBits)] = t
eLong := &e.bTable[hash7(cv, tableBits)] eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur eLong.Cur, eLong.Prev = t, eLong.Cur
// Do an long at i+1 // Do an long at i+1
cv >>= 8 cv >>= 8
t = tableEntry{offset: t.offset + 1, val: uint32(cv)} t = tableEntry{offset: t.offset + 1}
eLong = &e.bTable[hash7(cv, tableBits)] eLong = &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur eLong.Cur, eLong.Prev = t, eLong.Cur
// We only have enough bits for a short entry at i+2 // We only have enough bits for a short entry at i+2
cv >>= 8 cv >>= 8
t = tableEntry{offset: t.offset + 1, val: uint32(cv)} t = tableEntry{offset: t.offset + 1}
e.table[hash4x64(cv, tableBits)] = t e.table[hash4x64(cv, tableBits)] = t
// Skip one - otherwise we risk hitting 's' // Skip one - otherwise we risk hitting 's'
i += 4 i += 4
for ; i < s-1; i += hashEvery { for ; i < s-1; i += hashEvery {
cv := load6432(src, i) cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)} t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)] eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur eLong.Cur, eLong.Prev = t, eLong.Cur
e.table[hash4u(t2.val, tableBits)] = t2 e.table[hash4u(uint32(cv>>8), tableBits)] = t2
} }
} }
} }
@ -261,9 +261,9 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
o := e.cur + s - 1 o := e.cur + s - 1
prevHashS := hash4x64(x, tableBits) prevHashS := hash4x64(x, tableBits)
prevHashL := hash7(x, tableBits) prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o, val: uint32(x)} e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL] eLong := &e.bTable[prevHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: o, val: uint32(x)}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
cv = x >> 8 cv = x >> 8
} }

View file

@ -13,7 +13,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1 inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin minNonLiteralBlockSize = 1 + 1 + inputMargin
) )
if debugDecode && e.cur < 0 { if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur)) panic(fmt.Sprint("e.cur < 0: ", e.cur))
} }
@ -101,7 +101,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
sCandidate := e.table[nextHashS] sCandidate := e.table[nextHashS]
lCandidate := e.bTable[nextHashL] lCandidate := e.bTable[nextHashL]
next := load6432(src, nextS) next := load6432(src, nextS)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)} entry := tableEntry{offset: s + e.cur}
e.table[nextHashS] = entry e.table[nextHashS] = entry
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur eLong.Cur, eLong.Prev = entry, eLong.Cur
@ -112,17 +112,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
t = lCandidate.Cur.offset - e.cur t = lCandidate.Cur.offset - e.cur
if s-t < maxMatchOffset { if s-t < maxMatchOffset {
if uint32(cv) == lCandidate.Cur.val { if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
// Long candidate matches at least 4 bytes. // Long candidate matches at least 4 bytes.
// Store the next match // Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// Check the previous long candidate as well. // Check the previous long candidate as well.
t2 := lCandidate.Prev.offset - e.cur t2 := lCandidate.Prev.offset - e.cur
if s-t2 < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
l = e.matchlen(s+4, t+4, src) + 4 l = e.matchlen(s+4, t+4, src) + 4
ml1 := e.matchlen(s+4, t2+4, src) + 4 ml1 := e.matchlen(s+4, t2+4, src) + 4
if ml1 > l { if ml1 > l {
@ -135,17 +135,17 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
} }
// Current value did not match, but check if previous long value does. // Current value did not match, but check if previous long value does.
t = lCandidate.Prev.offset - e.cur t = lCandidate.Prev.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == lCandidate.Prev.val { if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
// Store the next match // Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
break break
} }
} }
t = sCandidate.offset - e.cur t = sCandidate.offset - e.cur
if s-t < maxMatchOffset && uint32(cv) == sCandidate.val { if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
// Found a 4 match... // Found a 4 match...
l = e.matchlen(s+4, t+4, src) + 4 l = e.matchlen(s+4, t+4, src) + 4
@ -153,9 +153,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
lCandidate = e.bTable[nextHashL] lCandidate = e.bTable[nextHashL]
// Store the next match // Store the next match
e.table[nextHashS] = tableEntry{offset: nextS + e.cur, val: uint32(next)} e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
eLong := &e.bTable[nextHashL] eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur, val: uint32(next)}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
// Check repeat at s + repOff // Check repeat at s + repOff
const repOff = 1 const repOff = 1
@ -174,7 +174,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// If the next long is a candidate, use that... // If the next long is a candidate, use that...
t2 = lCandidate.Cur.offset - e.cur t2 = lCandidate.Cur.offset - e.cur
if nextS-t2 < maxMatchOffset { if nextS-t2 < maxMatchOffset {
if lCandidate.Cur.val == uint32(next) { if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4 ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l { if ml > l {
t = t2 t = t2
@ -185,7 +185,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
} }
// If the previous long is a candidate, use that... // If the previous long is a candidate, use that...
t2 = lCandidate.Prev.offset - e.cur t2 = lCandidate.Prev.offset - e.cur
if nextS-t2 < maxMatchOffset && lCandidate.Prev.val == uint32(next) { if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
ml := e.matchlen(nextS+4, t2+4, src) + 4 ml := e.matchlen(nextS+4, t2+4, src) + 4
if ml > l { if ml > l {
t = t2 t = t2
@ -244,9 +244,9 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// Index after match end. // Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 { for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i) cv := load6432(src, i)
e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur, val: uint32(cv)} e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur}
eLong := &e.bTable[hash7(cv, tableBits)] eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur, val: uint32(cv)}, eLong.Cur eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
} }
goto emitRemainder goto emitRemainder
} }
@ -255,8 +255,8 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
if true { if true {
for i := nextS + 1; i < s-1; i += 2 { for i := nextS + 1; i < s-1; i += 2 {
cv := load6432(src, i) cv := load6432(src, i)
t := tableEntry{offset: i + e.cur, val: uint32(cv)} t := tableEntry{offset: i + e.cur}
t2 := tableEntry{offset: t.offset + 1, val: uint32(cv >> 8)} t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)] eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)] eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
e.table[hash4x64(cv, tableBits)] = t e.table[hash4x64(cv, tableBits)] = t

View file

@ -262,7 +262,7 @@ func (t *tokens) EstimatedBits() int {
// AddMatch adds a match to the tokens. // AddMatch adds a match to the tokens.
// This function is very sensitive to inlining and right on the border. // This function is very sensitive to inlining and right on the border.
func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
if debugDecode { if debugDeflate {
if xlength >= maxMatchLength+baseMatchLength { if xlength >= maxMatchLength+baseMatchLength {
panic(fmt.Errorf("invalid length: %v", xlength)) panic(fmt.Errorf("invalid length: %v", xlength))
} }
@ -281,7 +281,7 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
// AddMatchLong adds a match to the tokens, potentially longer than max match length. // AddMatchLong adds a match to the tokens, potentially longer than max match length.
// Length should NOT have the base subtracted, only offset should. // Length should NOT have the base subtracted, only offset should.
func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
if debugDecode { if debugDeflate {
if xoffset >= maxMatchOffset+baseMatchOffset { if xoffset >= maxMatchOffset+baseMatchOffset {
panic(fmt.Errorf("invalid offset: %v", xoffset)) panic(fmt.Errorf("invalid offset: %v", xoffset))
} }

View file

@ -66,7 +66,7 @@ var (
// A Decoder can be used in two modes: // A Decoder can be used in two modes:
// //
// 1) As a stream, or // 1) As a stream, or
// 2) For stateless decoding using DecodeAll or DecodeBuffer. // 2) For stateless decoding using DecodeAll.
// //
// Only a single stream can be decoded concurrently, but the same decoder // Only a single stream can be decoded concurrently, but the same decoder
// can run multiple concurrent stateless decodes. It is even possible to // can run multiple concurrent stateless decodes. It is even possible to
@ -315,7 +315,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
if size > 1<<20 { if size > 1<<20 {
size = 1 << 20 size = 1 << 20
} }
dst = make([]byte, 0, frame.WindowSize) dst = make([]byte, 0, size)
} }
dst, err = frame.runDecoder(dst, block) dst, err = frame.runDecoder(dst, block)

521
vendor/github.com/klauspost/compress/zstd/enc_better.go generated vendored Normal file
View file

@ -0,0 +1,521 @@
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
// Based on work by Yann Collet, released under BSD License.
package zstd
import "fmt"
const (
betterLongTableBits = 19 // Bits used in the long match table
betterLongTableSize = 1 << betterLongTableBits // Size of the table
// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
// long match table and match offsets are quite big.
// This greatly depends on the type of input.
betterShortTableBits = 13 // Bits used in the short match table
betterShortTableSize = 1 << betterShortTableBits // Size of the table
)
type prevEntry struct {
offset int32
prev int32
}
// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
// The long match table contains the previous entry with the same hash,
// effectively making it a "chain" of length 2.
// When we find a long match we choose between the two values and select the longest.
// When we find a short match, after checking the long, we check if we can find a long at n+1
// and that it is longer (lazy matching).
type betterFastEncoder struct {
fastBase
table [betterShortTableSize]tableEntry
longTable [betterLongTableSize]prevEntry
}
// Encode improves compression...
func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
const (
// Input margin is the number of bytes we read (8)
// and the maximum we will read ahead (2)
inputMargin = 8 + 2
minNonLiteralBlockSize = 16
)
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.longTable[:] {
e.longTable[i] = prevEntry{}
}
e.cur = e.maxMatchOff
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
for i := range e.table[:] {
v := e.table[i].offset
if v < minOff {
v = 0
} else {
v = v - e.cur + e.maxMatchOff
}
e.table[i].offset = v
}
for i := range e.longTable[:] {
v := e.longTable[i].offset
v2 := e.longTable[i].prev
if v < minOff {
v = 0
v2 = 0
} else {
v = v - e.cur + e.maxMatchOff
if v2 < minOff {
v2 = 0
} else {
v2 = v2 - e.cur + e.maxMatchOff
}
}
e.longTable[i] = prevEntry{
offset: v,
prev: v2,
}
}
e.cur = e.maxMatchOff
break
}
s := e.addBlock(src)
blk.size = len(src)
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
copy(blk.literals, src)
return
}
// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
// stepSize is the number of bytes to skip on every main loop iteration.
// It should be >= 1.
stepSize := int32(e.o.targetLength)
if stepSize == 0 {
stepSize++
}
const kSearchStrength = 9
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
offset2 := int32(blk.recentOffsets[1])
addLiterals := func(s *seq, until int32) {
if until == nextEmit {
return
}
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
println("recent offsets:", blk.recentOffsets)
}
encodeLoop:
for {
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
panic("offset0 was 0")
}
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
const repOff = 1
repIndex := s - offset1 + repOff
off := s + e.cur
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
seq.matchLen = uint32(lenght - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := s + repOff
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 0
seq.offset = 1
if debugSequences {
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
index0 := s + repOff
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
}
break encodeLoop
}
// Index skipped...
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
continue
}
const repOff2 = 1
// We deviate from the reference encoder and also check offset 2.
// Still slower and not much better, so disabled.
// repIndex = s - offset2 + repOff2
if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
// Consider history as well.
var seq seq
lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
seq.matchLen = uint32(lenght - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := s + repOff2
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 2
seq.offset = 2
if debugSequences {
println("repeat sequence 2", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
}
break encodeLoop
}
// Index skipped...
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
// Swap offsets
offset1, offset2 = offset2, offset1
continue
}
}
// Find the offsets of our two matches.
coffsetL := candidateL.offset - e.cur
coffsetLP := candidateL.prev - e.cur
// Check if we have a long match.
if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
// Found a long match, at least 8 bytes.
matched = e.matchlen(s+8, coffsetL+8, src) + 8
t = coffsetL
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
println("long match")
}
if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
// Found a long match, at least 8 bytes.
prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
if prevMatch > matched {
matched = prevMatch
t = coffsetLP
}
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
println("long match")
}
}
break
}
// Check if we have a long match on prev.
if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
// Found a long match, at least 8 bytes.
matched = e.matchlen(s+8, coffsetLP+8, src) + 8
t = coffsetLP
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
println("long match")
}
break
}
coffsetS := candidateS.offset - e.cur
// Check if we have a short match.
if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
// found a regular match
matched = e.matchlen(s+4, coffsetS+4, src) + 4
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, betterLongTableBits)
candidateL = e.longTable[nextHashL]
coffsetL = candidateL.offset - e.cur
// We can store it, since we have at least a 4 byte match.
e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
// Found a long match, at least 8 bytes.
matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
if matchedNext > matched {
t = coffsetL
s += checkAt
matched = matchedNext
if debugMatches {
println("long match (after short)")
}
break
}
}
// Check prev long...
coffsetL = candidateL.prev - e.cur
if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
// Found a long match, at least 8 bytes.
matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
if matchedNext > matched {
t = coffsetL
s += checkAt
matched = matchedNext
if debugMatches {
println("prev long match (after short)")
}
break
}
}
t = coffsetS
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugAsserts && t < 0 {
panic("t<0")
}
if debugMatches {
println("short match")
}
break
}
// No match found, move forward in input.
s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
if s >= sLimit {
break encodeLoop
}
cv = load6432(src, s)
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
offset2 = offset1
offset1 = s - t
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && canRepeat && int(offset1) > len(src) {
panic("invalid offset")
}
// Extend the n-byte match as long as possible.
l := matched
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
l++
}
// Write our sequence
var seq seq
seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 {
blk.literals = append(blk.literals, src[nextEmit:s]...)
}
seq.offset = uint32(s-t) + 3
s += l
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
if !canRepeat {
continue
}
// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
// Store this, since we have it.
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
seq.offset = 1
s += l
nextEmit = s
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Swap offset 1 and 2.
offset1, offset2 = offset2, offset1
if s >= sLimit {
// Finished
break encodeLoop
}
cv = load6432(src, s)
}
}
if int(nextEmit) < len(src) {
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
// EncodeNoHist will encode a block with no history and no following blocks.
// Most notable difference is that src will not be copied for history and
// we do not need to check for max match length.
func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
e.Encode(blk, src)
}

View file

@ -172,55 +172,6 @@ encodeLoop:
cv = load6432(src, s) cv = load6432(src, s)
continue continue
} }
const repOff2 = 1
// We deviate from the reference encoder and also check offset 2.
// Slower and not consistently better, so disabled.
// repIndex = s - offset2 + repOff2
if false && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff2*8)) {
// Consider history as well.
var seq seq
lenght := 4 + e.matchlen(s+4+repOff2, repIndex+4, src)
seq.matchLen = uint32(lenght - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := s + repOff2
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 2
seq.offset = 2
if debugSequences {
println("repeat sequence 2", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
}
break encodeLoop
}
cv = load6432(src, s)
// Swap offsets
offset1, offset2 = offset2, offset1
continue
}
} }
// Find the offsets of our two matches. // Find the offsets of our two matches.
coffsetL := s - (candidateL.offset - e.cur) coffsetL := s - (candidateL.offset - e.cur)
@ -372,7 +323,7 @@ encodeLoop:
} }
// Store this, since we have it. // Store this, since we have it.
nextHashS := hash5(cv1>>8, dFastShortTableBits) nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits) nextHashL := hash8(cv, dFastLongTableBits)
// We have at least 4 byte match. // We have at least 4 byte match.

View file

@ -6,6 +6,7 @@ package zstd
import ( import (
"fmt" "fmt"
"math"
"math/bits" "math/bits"
"github.com/klauspost/compress/zstd/internal/xxhash" "github.com/klauspost/compress/zstd/internal/xxhash"
@ -23,7 +24,7 @@ type tableEntry struct {
offset int32 offset int32
} }
type fastEncoder struct { type fastBase struct {
o encParams o encParams
// cur is the offset at the start of hist // cur is the offset at the start of hist
cur int32 cur int32
@ -31,18 +32,22 @@ type fastEncoder struct {
maxMatchOff int32 maxMatchOff int32
hist []byte hist []byte
crc *xxhash.Digest crc *xxhash.Digest
table [tableSize]tableEntry
tmp [8]byte tmp [8]byte
blk *blockEnc blk *blockEnc
} }
type fastEncoder struct {
fastBase
table [tableSize]tableEntry
}
// CRC returns the underlying CRC writer. // CRC returns the underlying CRC writer.
func (e *fastEncoder) CRC() *xxhash.Digest { func (e *fastBase) CRC() *xxhash.Digest {
return e.crc return e.crc
} }
// AppendCRC will append the CRC to the destination slice and return it. // AppendCRC will append the CRC to the destination slice and return it.
func (e *fastEncoder) AppendCRC(dst []byte) []byte { func (e *fastBase) AppendCRC(dst []byte) []byte {
crc := e.crc.Sum(e.tmp[:0]) crc := e.crc.Sum(e.tmp[:0])
dst = append(dst, crc[7], crc[6], crc[5], crc[4]) dst = append(dst, crc[7], crc[6], crc[5], crc[4])
return dst return dst
@ -50,7 +55,7 @@ func (e *fastEncoder) AppendCRC(dst []byte) []byte {
// WindowSize returns the window size of the encoder, // WindowSize returns the window size of the encoder,
// or a window size small enough to contain the input size, if > 0. // or a window size small enough to contain the input size, if > 0.
func (e *fastEncoder) WindowSize(size int) int32 { func (e *fastBase) WindowSize(size int) int32 {
if size > 0 && size < int(e.maxMatchOff) { if size > 0 && size < int(e.maxMatchOff) {
b := int32(1) << uint(bits.Len(uint(size))) b := int32(1) << uint(bits.Len(uint(size)))
// Keep minimum window. // Keep minimum window.
@ -63,7 +68,7 @@ func (e *fastEncoder) WindowSize(size int) int32 {
} }
// Block returns the current block. // Block returns the current block.
func (e *fastEncoder) Block() *blockEnc { func (e *fastBase) Block() *blockEnc {
return e.blk return e.blk
} }
@ -169,9 +174,22 @@ encodeLoop:
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well. // Consider history as well.
var seq seq var seq seq
lenght := 4 + e.matchlen(s+6, repIndex+4, src) var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
seq.matchLen = uint32(lenght - zstdMinMatch) seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards. // We might be able to match backwards.
// Extend as long as we can. // Extend as long as we can.
@ -197,11 +215,11 @@ encodeLoop:
println("repeat sequence", seq, "next s:", s) println("repeat sequence", seq, "next s:", s)
} }
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
s += lenght + 2 s += length + 2
nextEmit = s nextEmit = s
if s >= sLimit { if s >= sLimit {
if debug { if debug {
println("repeat ended", s, lenght) println("repeat ended", s, length)
} }
break encodeLoop break encodeLoop
@ -257,7 +275,20 @@ encodeLoop:
} }
// Extend the 4-byte match as long as possible. // Extend the 4-byte match as long as possible.
l := e.matchlen(s+4, t+4, src) + 4 //l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Extend backwards // Extend backwards
tMin := s - e.maxMatchOff tMin := s - e.maxMatchOff
@ -294,7 +325,20 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match. // We have at least 4 byte match.
// No need to check backwards. We come straight from a match // No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src) //l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Store this, since we have it. // Store this, since we have it.
nextHash := hash6(cv, hashLog) nextHash := hash6(cv, hashLog)
@ -412,10 +456,23 @@ encodeLoop:
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well. // Consider history as well.
var seq seq var seq seq
// lenght := 4 + e.matchlen(s+6, repIndex+4, src) // length := 4 + e.matchlen(s+6, repIndex+4, src)
lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
var length int32
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
seq.matchLen = uint32(lenght - zstdMinMatch) seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards. // We might be able to match backwards.
// Extend as long as we can. // Extend as long as we can.
@ -441,11 +498,11 @@ encodeLoop:
println("repeat sequence", seq, "next s:", s) println("repeat sequence", seq, "next s:", s)
} }
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
s += lenght + 2 s += length + 2
nextEmit = s nextEmit = s
if s >= sLimit { if s >= sLimit {
if debug { if debug {
println("repeat ended", s, lenght) println("repeat ended", s, length)
} }
break encodeLoop break encodeLoop
@ -498,7 +555,20 @@ encodeLoop:
// Extend the 4-byte match as long as possible. // Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4 //l := e.matchlenNoHist(s+4, t+4, src) + 4
l := int32(matchLen(src[s+4:], src[t+4:])) + 4 // l := int32(matchLen(src[s+4:], src[t+4:])) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Extend backwards // Extend backwards
tMin := s - e.maxMatchOff tMin := s - e.maxMatchOff
@ -536,7 +606,20 @@ encodeLoop:
// We have at least 4 byte match. // We have at least 4 byte match.
// No need to check backwards. We come straight from a match // No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src) //l := 4 + e.matchlenNoHist(s+4, o2+4, src)
l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) // l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Store this, since we have it. // Store this, since we have it.
nextHash := hash6(cv, hashLog) nextHash := hash6(cv, hashLog)
@ -571,7 +654,7 @@ encodeLoop:
} }
} }
func (e *fastEncoder) addBlock(src []byte) int32 { func (e *fastBase) addBlock(src []byte) int32 {
if debugAsserts && e.cur > bufferReset { if debugAsserts && e.cur > bufferReset {
panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
} }
@ -602,17 +685,17 @@ func (e *fastEncoder) addBlock(src []byte) int32 {
// useBlock will replace the block with the provided one, // useBlock will replace the block with the provided one,
// but transfer recent offsets from the previous. // but transfer recent offsets from the previous.
func (e *fastEncoder) UseBlock(enc *blockEnc) { func (e *fastBase) UseBlock(enc *blockEnc) {
enc.reset(e.blk) enc.reset(e.blk)
e.blk = enc e.blk = enc
} }
func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 { func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
// Extend the match to be as long as possible. // Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:])) return int32(matchLen(src[s:], src[t:]))
} }
func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
if debugAsserts { if debugAsserts {
if s < 0 { if s < 0 {
err := fmt.Sprintf("s (%d) < 0", s) err := fmt.Sprintf("s (%d) < 0", s)
@ -626,18 +709,17 @@ func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 {
err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
panic(err) panic(err)
} }
} if len(src)-int(s) > maxCompressedBlockSize {
s1 := int(s) + maxMatchLength - 4 panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
if s1 > len(src) { }
s1 = len(src)
} }
// Extend the match to be as long as possible. // Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:])) return int32(matchLen(src[s:], src[t:]))
} }
// Reset the encoding table. // Reset the encoding table.
func (e *fastEncoder) Reset() { func (e *fastBase) Reset() {
if e.blk == nil { if e.blk == nil {
e.blk = &blockEnc{} e.blk = &blockEnc{}
e.blk.init() e.blk.init()

View file

@ -71,15 +71,14 @@ func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) {
} }
if w != nil { if w != nil {
e.Reset(w) e.Reset(w)
} else {
e.init.Do(func() {
e.initialize()
})
} }
return &e, nil return &e, nil
} }
func (e *Encoder) initialize() { func (e *Encoder) initialize() {
if e.o.concurrent == 0 {
e.o.setDefault()
}
e.encoders = make(chan encoder, e.o.concurrent) e.encoders = make(chan encoder, e.o.concurrent)
for i := 0; i < e.o.concurrent; i++ { for i := 0; i < e.o.concurrent; i++ {
e.encoders <- e.o.encoder() e.encoders <- e.o.encoder()
@ -89,9 +88,6 @@ func (e *Encoder) initialize() {
// Reset will re-initialize the writer and new writes will encode to the supplied writer // Reset will re-initialize the writer and new writes will encode to the supplied writer
// as a new, independent stream. // as a new, independent stream.
func (e *Encoder) Reset(w io.Writer) { func (e *Encoder) Reset(w io.Writer) {
e.init.Do(func() {
e.initialize()
})
s := &e.state s := &e.state
s.wg.Wait() s.wg.Wait()
s.wWg.Wait() s.wWg.Wait()
@ -422,10 +418,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
} }
return dst return dst
} }
e.init.Do(func() { e.init.Do(e.initialize)
e.o.setDefault()
e.initialize()
})
enc := <-e.encoders enc := <-e.encoders
defer func() { defer func() {
// Release encoder reference to last block. // Release encoder reference to last block.

View file

@ -39,9 +39,11 @@ func (o *encoderOptions) setDefault() {
func (o encoderOptions) encoder() encoder { func (o encoderOptions) encoder() encoder {
switch o.level { switch o.level {
case SpeedDefault: case SpeedDefault:
return &doubleFastEncoder{fastEncoder: fastEncoder{maxMatchOff: int32(o.windowSize)}} return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}}
case SpeedBetterCompression:
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
case SpeedFastest: case SpeedFastest:
return &fastEncoder{maxMatchOff: int32(o.windowSize)} return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
} }
panic("unknown compression level") panic("unknown compression level")
} }
@ -67,7 +69,7 @@ func WithEncoderConcurrency(n int) EOption {
} }
// WithWindowSize will set the maximum allowed back-reference distance. // WithWindowSize will set the maximum allowed back-reference distance.
// The value must be a power of two between WindowSizeMin and WindowSizeMax. // The value must be a power of two between MinWindowSize and MaxWindowSize.
// A larger value will enable better compression but allocate more memory and, // A larger value will enable better compression but allocate more memory and,
// for above-default values, take considerably longer. // for above-default values, take considerably longer.
// The default value is determined by the compression level. // The default value is determined by the compression level.
@ -130,18 +132,18 @@ const (
// This is roughly equivalent to the default Zstandard mode (level 3). // This is roughly equivalent to the default Zstandard mode (level 3).
SpeedDefault SpeedDefault
// SpeedBetterCompression will yield better compression than the default.
// Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
// By using this, notice that CPU usage may go up in the future.
SpeedBetterCompression
// speedLast should be kept as the last actual compression option. // speedLast should be kept as the last actual compression option.
// The is not for external usage, but is used to keep track of the valid options. // The is not for external usage, but is used to keep track of the valid options.
speedLast speedLast
// SpeedBetterCompression will (in the future) yield better compression than the default,
// but at approximately 4x the CPU usage of the default.
// For now this is not implemented.
SpeedBetterCompression = SpeedDefault
// SpeedBestCompression will choose the best available compression option. // SpeedBestCompression will choose the best available compression option.
// For now this is not implemented. // For now this is not implemented.
SpeedBestCompression = SpeedDefault SpeedBestCompression = SpeedBetterCompression
) )
// EncoderLevelFromString will convert a string representation of an encoding level back // EncoderLevelFromString will convert a string representation of an encoding level back
@ -163,8 +165,10 @@ func EncoderLevelFromZstd(level int) EncoderLevel {
switch { switch {
case level < 3: case level < 3:
return SpeedFastest return SpeedFastest
case level >= 3: case level >= 3 && level < 6:
return SpeedDefault return SpeedDefault
case level > 5:
return SpeedBetterCompression
} }
return SpeedDefault return SpeedDefault
} }
@ -176,6 +180,8 @@ func (e EncoderLevel) String() string {
return "fastest" return "fastest"
case SpeedDefault: case SpeedDefault:
return "default" return "default"
case SpeedBetterCompression:
return "better"
default: default:
return "invalid" return "invalid"
} }

View file

@ -179,13 +179,13 @@ TEXT ·writeBlocks(SB), NOSPLIT, $0-40
MOVQ ·prime2v(SB), R14 MOVQ ·prime2v(SB), R14
// Load slice. // Load slice.
MOVQ b_base+8(FP), CX MOVQ arg1_base+8(FP), CX
MOVQ b_len+16(FP), DX MOVQ arg1_len+16(FP), DX
LEAQ (CX)(DX*1), BX LEAQ (CX)(DX*1), BX
SUBQ $32, BX SUBQ $32, BX
// Load vN from d. // Load vN from d.
MOVQ d+0(FP), AX MOVQ arg+0(FP), AX
MOVQ 0(AX), R8 // v1 MOVQ 0(AX), R8 // v1
MOVQ 8(AX), R9 // v2 MOVQ 8(AX), R9 // v2
MOVQ 16(AX), R10 // v3 MOVQ 16(AX), R10 // v3
@ -209,7 +209,7 @@ blockLoop:
MOVQ R11, 24(AX) MOVQ R11, 24(AX)
// The number of bytes written is CX minus the old base pointer. // The number of bytes written is CX minus the old base pointer.
SUBQ b_base+8(FP), CX SUBQ arg1_base+8(FP), CX
MOVQ CX, ret+32(FP) MOVQ CX, ret+32(FP)
RET RET

View file

@ -87,6 +87,17 @@ func printf(format string, a ...interface{}) {
} }
} }
// matchLenFast does matching, but will not match the last up to 7 bytes.
func matchLenFast(a, b []byte) int {
endI := len(a) & (math.MaxInt32 - 7)
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
return i + bits.TrailingZeros64(diff)>>3
}
}
return endI
}
// matchLen returns the maximum length. // matchLen returns the maximum length.
// a must be the shortest of the two. // a must be the shortest of the two.
// The function also returns whether all bytes matched. // The function also returns whether all bytes matched.
@ -97,33 +108,18 @@ func matchLen(a, b []byte) int {
return i + (bits.TrailingZeros64(diff) >> 3) return i + (bits.TrailingZeros64(diff) >> 3)
} }
} }
checked := (len(a) >> 3) << 3 checked := (len(a) >> 3) << 3
a = a[checked:] a = a[checked:]
b = b[checked:] b = b[checked:]
// TODO: We could do a 4 check.
for i := range a { for i := range a {
if a[i] != b[i] { if a[i] != b[i] {
return int(i) + checked return i + checked
} }
} }
return len(a) + checked return len(a) + checked
} }
// matchLen returns a match length in src between index s and t
func matchLenIn(src []byte, s, t int32) int32 {
s1 := len(src)
b := src[t:]
a := src[s:s1]
b = b[:len(a)]
// Extend the match to be as long as possible.
for i := range a {
if a[i] != b[i] {
return int32(i)
}
}
return int32(len(a))
}
func load3232(b []byte, i int32) uint32 { func load3232(b []byte, i int32) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read. // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:] b = b[i:]

2
vendor/modules.txt vendored
View file

@ -85,7 +85,7 @@ github.com/jmespath/go-jmespath
github.com/jstemmer/go-junit-report github.com/jstemmer/go-junit-report
github.com/jstemmer/go-junit-report/formatter github.com/jstemmer/go-junit-report/formatter
github.com/jstemmer/go-junit-report/parser github.com/jstemmer/go-junit-report/parser
# github.com/klauspost/compress v1.10.1 # github.com/klauspost/compress v1.10.3
github.com/klauspost/compress/flate github.com/klauspost/compress/flate
github.com/klauspost/compress/fse github.com/klauspost/compress/fse
github.com/klauspost/compress/gzip github.com/klauspost/compress/gzip