#215: update klauspost/compress lib (#217)

* #215: update klauspost/compress lib

* #215: bump klauspost/compress lib to 1.9.1
This commit is contained in:
Roman Khavronenko 2019-10-28 11:36:35 +00:00 committed by Aliaksandr Valialkin
parent a42b5db39f
commit 6ab48838bf
9 changed files with 106 additions and 37 deletions

2
go.mod
View file

@ -6,7 +6,7 @@ require (
github.com/cespare/xxhash/v2 v2.1.0 github.com/cespare/xxhash/v2 v2.1.0
github.com/golang/snappy v0.0.1 github.com/golang/snappy v0.0.1
github.com/google/go-cmp v0.3.0 // indirect github.com/google/go-cmp v0.3.0 // indirect
github.com/klauspost/compress v1.8.6 github.com/klauspost/compress v1.9.1
github.com/valyala/fastjson v1.4.1 github.com/valyala/fastjson v1.4.1
github.com/valyala/fastrand v1.0.0 github.com/valyala/fastrand v1.0.0
github.com/valyala/gozstd v1.6.2 github.com/valyala/gozstd v1.6.2

4
go.sum
View file

@ -24,6 +24,10 @@ github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.8.6 h1:970MQcQdxX7hfgc/aqmB4a3grW0ivUVV6i1TLkP8CiE= github.com/klauspost/compress v1.8.6 h1:970MQcQdxX7hfgc/aqmB4a3grW0ivUVV6i1TLkP8CiE=
github.com/klauspost/compress v1.8.6/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.8.6/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.9.0 h1:GhthINjveNZAdFUD8QoQYfjxnOONZgztK/Yr6M23UTY=
github.com/klauspost/compress v1.9.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.9.1 h1:TWy0o9J9c6LK9C8t7Msh6IAJNXbsU/nvKLTQUU5HdaY=
github.com/klauspost/compress v1.9.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE= github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=

View file

@ -54,6 +54,12 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
canReuse = s.canUseTable(s.prevTable) canReuse = s.canUseTable(s.prevTable)
} }
// We want the output size to be less than this:
wantSize := len(in)
if s.WantLogLess > 0 {
wantSize -= wantSize >> s.WantLogLess
}
// Reset for next run. // Reset for next run.
s.clearCount = true s.clearCount = true
s.maxCount = 0 s.maxCount = 0
@ -77,7 +83,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
s.cTable = s.prevTable s.cTable = s.prevTable
s.Out, err = compressor(in) s.Out, err = compressor(in)
s.cTable = keepTable s.cTable = keepTable
if err == nil && len(s.Out) < len(in) { if err == nil && len(s.Out) < wantSize {
s.OutData = s.Out s.OutData = s.Out
return s.Out, true, nil return s.Out, true, nil
} }
@ -100,13 +106,16 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
hSize := len(s.Out) hSize := len(s.Out)
oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen]) oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen])
newSize := s.cTable.estimateSize(s.count[:s.symbolLen]) newSize := s.cTable.estimateSize(s.count[:s.symbolLen])
if oldSize <= hSize+newSize || hSize+12 >= len(in) { if oldSize <= hSize+newSize || hSize+12 >= wantSize {
// Retain cTable even if we re-use. // Retain cTable even if we re-use.
keepTable := s.cTable keepTable := s.cTable
s.cTable = s.prevTable s.cTable = s.prevTable
s.Out, err = compressor(in) s.Out, err = compressor(in)
s.cTable = keepTable s.cTable = keepTable
if len(s.Out) >= len(in) { if err != nil {
return nil, false, err
}
if len(s.Out) >= wantSize {
return nil, false, ErrIncompressible return nil, false, ErrIncompressible
} }
s.OutData = s.Out s.OutData = s.Out
@ -128,7 +137,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)
s.OutTable = nil s.OutTable = nil
return nil, false, err return nil, false, err
} }
if len(s.Out) >= len(in) { if len(s.Out) >= wantSize {
s.OutTable = nil s.OutTable = nil
return nil, false, ErrIncompressible return nil, false, ErrIncompressible
} }

View file

@ -89,6 +89,12 @@ type Scratch struct {
// Reuse will specify the reuse policy // Reuse will specify the reuse policy
Reuse ReusePolicy Reuse ReusePolicy
// WantLogLess allows to specify a log 2 reduction that should at least be achieved,
// otherwise the block will be returned as incompressible.
// The reduction should then at least be (input size >> WantLogLess)
// If WantLogLess == 0 any improvement will do.
WantLogLess uint8
// MaxDecodedSize will set the maximum allowed output size. // MaxDecodedSize will set the maximum allowed output size.
// This value will automatically be set to BlockSizeMax if not set. // This value will automatically be set to BlockSizeMax if not set.
// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.

View file

@ -51,7 +51,7 @@ func (b *blockEnc) init() {
b.coders.llEnc = &fseEncoder{} b.coders.llEnc = &fseEncoder{}
b.coders.llPrev = &fseEncoder{} b.coders.llPrev = &fseEncoder{}
} }
b.litEnc = &huff0.Scratch{} b.litEnc = &huff0.Scratch{WantLogLess: 4}
b.reset(nil) b.reset(nil)
} }
@ -391,6 +391,52 @@ func (b *blockEnc) encodeLits() error {
return nil return nil
} }
// fuzzFseEncoder can be used to fuzz the FSE encoder.
func fuzzFseEncoder(data []byte) int {
if len(data) > maxSequences || len(data) < 2 {
return 0
}
enc := fseEncoder{}
hist := enc.Histogram()[:256]
maxSym := uint8(0)
for i, v := range data {
v = v & 63
data[i] = v
hist[v]++
if v > maxSym {
maxSym = v
}
}
if maxSym == 0 {
// All 0
return 0
}
maxCount := func(a []uint32) int {
var max uint32
for _, v := range a {
if v > max {
max = v
}
}
return int(max)
}
cnt := maxCount(hist[:maxSym])
if cnt == len(data) {
// RLE
return 0
}
enc.HistogramFinished(maxSym, cnt)
err := enc.normalizeCount(len(data))
if err != nil {
return 0
}
_, err = enc.writeCount(nil)
if err != nil {
panic(err)
}
return 1
}
// encode will encode the block and put the output in b.output. // encode will encode the block and put the output in b.output.
func (b *blockEnc) encode() error { func (b *blockEnc) encode() error {
if len(b.sequences) == 0 { if len(b.sequences) == 0 {
@ -415,16 +461,10 @@ func (b *blockEnc) encode() error {
if len(b.literals) >= 1024 { if len(b.literals) >= 1024 {
// Use 4 Streams. // Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 { } else if len(b.literals) > 32 {
// Use 1 stream // Use 1 stream
single = true single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else { } else {
err = huff0.ErrIncompressible err = huff0.ErrIncompressible
} }
@ -711,7 +751,7 @@ func (b *blockEnc) encode() error {
return nil return nil
} }
var errIncompressible = errors.New("uncompressible") var errIncompressible = errors.New("incompressible")
func (b *blockEnc) genCodes() { func (b *blockEnc) genCodes() {
if len(b.sequences) == 0 { if len(b.sequences) == 0 {

View file

@ -235,7 +235,7 @@ encodeLoop:
if debug && s-t > e.maxMatchOff { if debug && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff") panic("s - t >e.maxMatchOff")
} }
if debug { if debugMatches {
println("long match") println("long match")
} }
break break
@ -259,7 +259,7 @@ encodeLoop:
// but the likelihood of both the first 4 bytes and the hash matching should be enough. // but the likelihood of both the first 4 bytes and the hash matching should be enough.
t = candidateL.offset - e.cur t = candidateL.offset - e.cur
s += checkAt s += checkAt
if debug { if debugMatches {
println("long match (after short)") println("long match (after short)")
} }
break break
@ -275,7 +275,7 @@ encodeLoop:
if debug && t < 0 { if debug && t < 0 {
panic("t<0") panic("t<0")
} }
if debug { if debugMatches {
println("short match") println("short match")
} }
break break

View file

@ -502,21 +502,6 @@ func (s *fseEncoder) validateNorm() (err error) {
// writeCount will write the normalized histogram count to header. // writeCount will write the normalized histogram count to header.
// This is read back by readNCount. // This is read back by readNCount.
func (s *fseEncoder) writeCount(out []byte) ([]byte, error) { func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
var (
tableLog = s.actualTableLog
tableSize = 1 << tableLog
previous0 bool
charnum uint16
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
// Write Table Size
bitStream = uint32(tableLog - minEncTablelog)
bitCount = uint(4)
remaining = int16(tableSize + 1) /* +1 for extra accuracy */
threshold = int16(tableSize)
nbBits = uint(tableLog + 1)
)
if s.useRLE { if s.useRLE {
return append(out, s.rleVal), nil return append(out, s.rleVal), nil
} }
@ -524,7 +509,28 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
// Never write predefined. // Never write predefined.
return out, nil return out, nil
} }
outP := len(out)
var (
tableLog = s.actualTableLog
tableSize = 1 << tableLog
previous0 bool
charnum uint16
// maximum header size plus 2 extra bytes for final output if bitCount == 0.
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + 2
// Write Table Size
bitStream = uint32(tableLog - minEncTablelog)
bitCount = uint(4)
remaining = int16(tableSize + 1) /* +1 for extra accuracy */
threshold = int16(tableSize)
nbBits = uint(tableLog + 1)
outP = len(out)
)
if cap(out) < outP+maxHeaderSize {
out = append(out, make([]byte, maxHeaderSize*3)...)
out = out[:len(out)-maxHeaderSize*3]
}
out = out[:outP+maxHeaderSize] out = out[:outP+maxHeaderSize]
// stops at 1 // stops at 1
@ -594,11 +600,14 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
} }
} }
if outP+2 > len(out) {
return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen])
}
out[outP] = byte(bitStream) out[outP] = byte(bitStream)
out[outP+1] = byte(bitStream >> 8) out[outP+1] = byte(bitStream >> 8)
outP += int((bitCount + 7) / 8) outP += int((bitCount + 7) / 8)
if uint16(charnum) > s.symbolLen { if charnum > s.symbolLen {
return nil, errors.New("internal error: charnum > s.symbolLen") return nil, errors.New("internal error: charnum > s.symbolLen")
} }
return out[:outP], nil return out[:outP], nil

View file

@ -11,6 +11,7 @@ import (
const debug = false const debug = false
const debugSequences = false const debugSequences = false
const debugMatches = false
// force encoder to use predefined tables. // force encoder to use predefined tables.
const forcePreDef = false const forcePreDef = false

8
vendor/modules.txt vendored
View file

@ -6,17 +6,17 @@ github.com/VictoriaMetrics/metrics
github.com/cespare/xxhash/v2 github.com/cespare/xxhash/v2
# github.com/golang/snappy v0.0.1 # github.com/golang/snappy v0.0.1
github.com/golang/snappy github.com/golang/snappy
# github.com/klauspost/compress v1.8.6 # github.com/klauspost/compress v1.9.1
github.com/klauspost/compress/fse github.com/klauspost/compress/zstd
github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0
github.com/klauspost/compress/snappy github.com/klauspost/compress/snappy
github.com/klauspost/compress/zstd
github.com/klauspost/compress/zstd/internal/xxhash github.com/klauspost/compress/zstd/internal/xxhash
github.com/klauspost/compress/fse
# github.com/valyala/bytebufferpool v1.0.0 # github.com/valyala/bytebufferpool v1.0.0
github.com/valyala/bytebufferpool github.com/valyala/bytebufferpool
# github.com/valyala/fastjson v1.4.1 # github.com/valyala/fastjson v1.4.1
github.com/valyala/fastjson
github.com/valyala/fastjson/fastfloat github.com/valyala/fastjson/fastfloat
github.com/valyala/fastjson
# github.com/valyala/fastrand v1.0.0 # github.com/valyala/fastrand v1.0.0
github.com/valyala/fastrand github.com/valyala/fastrand
# github.com/valyala/gozstd v1.6.2 # github.com/valyala/gozstd v1.6.2