#215: update klauspost/compress lib (#217)

* #215: update klauspost/compress lib

* #215: bump klauspost/compress lib to 1.9.1
This commit is contained in:
Roman Khavronenko 2019-10-28 11:36:35 +00:00 committed by Aliaksandr Valialkin
parent a42b5db39f
commit 6ab48838bf
9 changed files with 106 additions and 37 deletions

2
go.mod
View file

@ -6,7 +6,7 @@ require (
github.com/cespare/xxhash/v2 v2.1.0
github.com/golang/snappy v0.0.1
github.com/google/go-cmp v0.3.0 // indirect
github.com/klauspost/compress v1.8.6
github.com/klauspost/compress v1.9.1
github.com/valyala/fastjson v1.4.1
github.com/valyala/fastrand v1.0.0
github.com/valyala/gozstd v1.6.2

4
go.sum
View file

@ -24,6 +24,10 @@ github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.8.6 h1:970MQcQdxX7hfgc/aqmB4a3grW0ivUVV6i1TLkP8CiE=
github.com/klauspost/compress v1.8.6/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.9.0 h1:GhthINjveNZAdFUD8QoQYfjxnOONZgztK/Yr6M23UTY=
github.com/klauspost/compress v1.9.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.9.1 h1:TWy0o9J9c6LK9C8t7Msh6IAJNXbsU/nvKLTQUU5HdaY=
github.com/klauspost/compress v1.9.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=

View file

@ -54,6 +54,12 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
canReuse = s.canUseTable(s.prevTable)
}
// We want the output size to be less than this:
wantSize := len(in)
if s.WantLogLess > 0 {
wantSize -= wantSize >> s.WantLogLess
}
// Reset for next run.
s.clearCount = true
s.maxCount = 0
@ -77,7 +83,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
s.cTable = s.prevTable
s.Out, err = compressor(in)
s.cTable = keepTable
if err == nil && len(s.Out) < len(in) {
if err == nil && len(s.Out) < wantSize {
s.OutData = s.Out
return s.Out, true, nil
}
@ -100,13 +106,16 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
hSize := len(s.Out)
oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen])
newSize := s.cTable.estimateSize(s.count[:s.symbolLen])
if oldSize <= hSize+newSize || hSize+12 >= len(in) {
if oldSize <= hSize+newSize || hSize+12 >= wantSize {
// Retain cTable even if we re-use.
keepTable := s.cTable
s.cTable = s.prevTable
s.Out, err = compressor(in)
s.cTable = keepTable
if len(s.Out) >= len(in) {
if err != nil {
return nil, false, err
}
if len(s.Out) >= wantSize {
return nil, false, ErrIncompressible
}
s.OutData = s.Out
@ -128,7 +137,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
s.OutTable = nil
return nil, false, err
}
if len(s.Out) >= len(in) {
if len(s.Out) >= wantSize {
s.OutTable = nil
return nil, false, ErrIncompressible
}

View file

@ -89,6 +89,12 @@ type Scratch struct {
// Reuse will specify the reuse policy
Reuse ReusePolicy
// WantLogLess allows to specify a log 2 reduction that should at least be achieved,
// otherwise the block will be returned as incompressible.
// The reduction should then at least be (input size >> WantLogLess)
// If WantLogLess == 0 any improvement will do.
WantLogLess uint8
// MaxDecodedSize will set the maximum allowed output size.
// This value will automatically be set to BlockSizeMax if not set.
// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.

View file

@ -51,7 +51,7 @@ func (b *blockEnc) init() {
b.coders.llEnc = &fseEncoder{}
b.coders.llPrev = &fseEncoder{}
}
b.litEnc = &huff0.Scratch{}
b.litEnc = &huff0.Scratch{WantLogLess: 4}
b.reset(nil)
}
@ -391,6 +391,52 @@ func (b *blockEnc) encodeLits() error {
return nil
}
// fuzzFseEncoder can be used to fuzz the FSE encoder.
func fuzzFseEncoder(data []byte) int {
if len(data) > maxSequences || len(data) < 2 {
return 0
}
enc := fseEncoder{}
hist := enc.Histogram()[:256]
maxSym := uint8(0)
for i, v := range data {
v = v & 63
data[i] = v
hist[v]++
if v > maxSym {
maxSym = v
}
}
if maxSym == 0 {
// All 0
return 0
}
maxCount := func(a []uint32) int {
var max uint32
for _, v := range a {
if v > max {
max = v
}
}
return int(max)
}
cnt := maxCount(hist[:maxSym])
if cnt == len(data) {
// RLE
return 0
}
enc.HistogramFinished(maxSym, cnt)
err := enc.normalizeCount(len(data))
if err != nil {
return 0
}
_, err = enc.writeCount(nil)
if err != nil {
panic(err)
}
return 1
}
// encode will encode the block and put the output in b.output.
func (b *blockEnc) encode() error {
if len(b.sequences) == 0 {
@ -415,16 +461,10 @@ func (b *blockEnc) encode() error {
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else {
err = huff0.ErrIncompressible
}
@ -711,7 +751,7 @@ func (b *blockEnc) encode() error {
return nil
}
var errIncompressible = errors.New("uncompressible")
var errIncompressible = errors.New("incompressible")
func (b *blockEnc) genCodes() {
if len(b.sequences) == 0 {

View file

@ -235,7 +235,7 @@ encodeLoop:
if debug && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debug {
if debugMatches {
println("long match")
}
break
@ -259,7 +259,7 @@ encodeLoop:
// but the likelihood of both the first 4 bytes and the hash matching should be enough.
t = candidateL.offset - e.cur
s += checkAt
if debug {
if debugMatches {
println("long match (after short)")
}
break
@ -275,7 +275,7 @@ encodeLoop:
if debug && t < 0 {
panic("t<0")
}
if debug {
if debugMatches {
println("short match")
}
break

View file

@ -502,21 +502,6 @@ func (s *fseEncoder) validateNorm() (err error) {
// writeCount will write the normalized histogram count to header.
// This is read back by readNCount.
func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
var (
tableLog = s.actualTableLog
tableSize = 1 << tableLog
previous0 bool
charnum uint16
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
// Write Table Size
bitStream = uint32(tableLog - minEncTablelog)
bitCount = uint(4)
remaining = int16(tableSize + 1) /* +1 for extra accuracy */
threshold = int16(tableSize)
nbBits = uint(tableLog + 1)
)
if s.useRLE {
return append(out, s.rleVal), nil
}
@ -524,7 +509,28 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
// Never write predefined.
return out, nil
}
outP := len(out)
var (
tableLog = s.actualTableLog
tableSize = 1 << tableLog
previous0 bool
charnum uint16
// maximum header size plus 2 extra bytes for final output if bitCount == 0.
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + 2
// Write Table Size
bitStream = uint32(tableLog - minEncTablelog)
bitCount = uint(4)
remaining = int16(tableSize + 1) /* +1 for extra accuracy */
threshold = int16(tableSize)
nbBits = uint(tableLog + 1)
outP = len(out)
)
if cap(out) < outP+maxHeaderSize {
out = append(out, make([]byte, maxHeaderSize*3)...)
out = out[:len(out)-maxHeaderSize*3]
}
out = out[:outP+maxHeaderSize]
// stops at 1
@ -594,11 +600,14 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
}
}
if outP+2 > len(out) {
return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen])
}
out[outP] = byte(bitStream)
out[outP+1] = byte(bitStream >> 8)
outP += int((bitCount + 7) / 8)
if uint16(charnum) > s.symbolLen {
if charnum > s.symbolLen {
return nil, errors.New("internal error: charnum > s.symbolLen")
}
return out[:outP], nil

View file

@ -11,6 +11,7 @@ import (
const debug = false
const debugSequences = false
const debugMatches = false
// force encoder to use predefined tables.
const forcePreDef = false

8
vendor/modules.txt vendored
View file

@ -6,17 +6,17 @@ github.com/VictoriaMetrics/metrics
github.com/cespare/xxhash/v2
# github.com/golang/snappy v0.0.1
github.com/golang/snappy
# github.com/klauspost/compress v1.8.6
github.com/klauspost/compress/fse
# github.com/klauspost/compress v1.9.1
github.com/klauspost/compress/zstd
github.com/klauspost/compress/huff0
github.com/klauspost/compress/snappy
github.com/klauspost/compress/zstd
github.com/klauspost/compress/zstd/internal/xxhash
github.com/klauspost/compress/fse
# github.com/valyala/bytebufferpool v1.0.0
github.com/valyala/bytebufferpool
# github.com/valyala/fastjson v1.4.1
github.com/valyala/fastjson
github.com/valyala/fastjson/fastfloat
github.com/valyala/fastjson
# github.com/valyala/fastrand v1.0.0
github.com/valyala/fastrand
# github.com/valyala/gozstd v1.6.2