diff --git a/go.mod b/go.mod index 0c418eef8..b4c9bc4c0 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/cespare/xxhash/v2 v2.1.0 github.com/golang/snappy v0.0.1 github.com/google/go-cmp v0.3.0 // indirect - github.com/klauspost/compress v1.8.6 + github.com/klauspost/compress v1.9.1 github.com/valyala/fastjson v1.4.1 github.com/valyala/fastrand v1.0.0 github.com/valyala/gozstd v1.6.2 diff --git a/go.sum b/go.sum index 99feb32a3..6e451a393 100644 --- a/go.sum +++ b/go.sum @@ -24,6 +24,10 @@ github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.8.6 h1:970MQcQdxX7hfgc/aqmB4a3grW0ivUVV6i1TLkP8CiE= github.com/klauspost/compress v1.8.6/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/compress v1.9.0 h1:GhthINjveNZAdFUD8QoQYfjxnOONZgztK/Yr6M23UTY= +github.com/klauspost/compress v1.9.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/compress v1.9.1 h1:TWy0o9J9c6LK9C8t7Msh6IAJNXbsU/nvKLTQUU5HdaY= +github.com/klauspost/compress v1.9.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index dd4f7fefb..51e00aaeb 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -54,6 +54,12 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) canReuse = s.canUseTable(s.prevTable) } + // We want the output size to be less than this: + wantSize := len(in) + if s.WantLogLess > 0 { + wantSize -= wantSize >> s.WantLogLess + } + // Reset for next run. s.clearCount = true s.maxCount = 0 @@ -77,7 +83,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) s.cTable = s.prevTable s.Out, err = compressor(in) s.cTable = keepTable - if err == nil && len(s.Out) < len(in) { + if err == nil && len(s.Out) < wantSize { s.OutData = s.Out return s.Out, true, nil } @@ -100,13 +106,16 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) hSize := len(s.Out) oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen]) newSize := s.cTable.estimateSize(s.count[:s.symbolLen]) - if oldSize <= hSize+newSize || hSize+12 >= len(in) { + if oldSize <= hSize+newSize || hSize+12 >= wantSize { // Retain cTable even if we re-use. keepTable := s.cTable s.cTable = s.prevTable s.Out, err = compressor(in) s.cTable = keepTable - if len(s.Out) >= len(in) { + if err != nil { + return nil, false, err + } + if len(s.Out) >= wantSize { return nil, false, ErrIncompressible } s.OutData = s.Out @@ -128,7 +137,7 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) s.OutTable = nil return nil, false, err } - if len(s.Out) >= len(in) { + if len(s.Out) >= wantSize { s.OutTable = nil return nil, false, ErrIncompressible } diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go index 6f823f94d..6bc23bbf0 100644 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -89,6 +89,12 @@ type Scratch struct { // Reuse will specify the reuse policy Reuse ReusePolicy + // WantLogLess allows to specify a log 2 reduction that should at least be achieved, + // otherwise the block will be returned as incompressible. + // The reduction should then at least be (input size >> WantLogLess) + // If WantLogLess == 0 any improvement will do. + WantLogLess uint8 + // MaxDecodedSize will set the maximum allowed output size. // This value will automatically be set to BlockSizeMax if not set. // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index 9d9151a0e..8383279d2 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -51,7 +51,7 @@ func (b *blockEnc) init() { b.coders.llEnc = &fseEncoder{} b.coders.llPrev = &fseEncoder{} } - b.litEnc = &huff0.Scratch{} + b.litEnc = &huff0.Scratch{WantLogLess: 4} b.reset(nil) } @@ -391,6 +391,52 @@ func (b *blockEnc) encodeLits() error { return nil } +// fuzzFseEncoder can be used to fuzz the FSE encoder. +func fuzzFseEncoder(data []byte) int { + if len(data) > maxSequences || len(data) < 2 { + return 0 + } + enc := fseEncoder{} + hist := enc.Histogram()[:256] + maxSym := uint8(0) + for i, v := range data { + v = v & 63 + data[i] = v + hist[v]++ + if v > maxSym { + maxSym = v + } + } + if maxSym == 0 { + // All 0 + return 0 + } + maxCount := func(a []uint32) int { + var max uint32 + for _, v := range a { + if v > max { + max = v + } + } + return int(max) + } + cnt := maxCount(hist[:maxSym]) + if cnt == len(data) { + // RLE + return 0 + } + enc.HistogramFinished(maxSym, cnt) + err := enc.normalizeCount(len(data)) + if err != nil { + return 0 + } + _, err = enc.writeCount(nil) + if err != nil { + panic(err) + } + return 1 +} + // encode will encode the block and put the output in b.output. func (b *blockEnc) encode() error { if len(b.sequences) == 0 { @@ -415,16 +461,10 @@ func (b *blockEnc) encode() error { if len(b.literals) >= 1024 { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) - if len(out) > len(b.literals)-len(b.literals)>>4 { - err = huff0.ErrIncompressible - } } else if len(b.literals) > 32 { // Use 1 stream single = true out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) - if len(out) > len(b.literals)-len(b.literals)>>4 { - err = huff0.ErrIncompressible - } } else { err = huff0.ErrIncompressible } @@ -711,7 +751,7 @@ func (b *blockEnc) encode() error { return nil } -var errIncompressible = errors.New("uncompressible") +var errIncompressible = errors.New("incompressible") func (b *blockEnc) genCodes() { if len(b.sequences) == 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index e120625d8..2f41bcd0d 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -235,7 +235,7 @@ encodeLoop: if debug && s-t > e.maxMatchOff { panic("s - t >e.maxMatchOff") } - if debug { + if debugMatches { println("long match") } break @@ -259,7 +259,7 @@ encodeLoop: // but the likelihood of both the first 4 bytes and the hash matching should be enough. t = candidateL.offset - e.cur s += checkAt - if debug { + if debugMatches { println("long match (after short)") } break @@ -275,7 +275,7 @@ encodeLoop: if debug && t < 0 { panic("t<0") } - if debug { + if debugMatches { println("short match") } break diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index dfa6cf7ce..619836f52 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -502,21 +502,6 @@ func (s *fseEncoder) validateNorm() (err error) { // writeCount will write the normalized histogram count to header. // This is read back by readNCount. func (s *fseEncoder) writeCount(out []byte) ([]byte, error) { - var ( - tableLog = s.actualTableLog - tableSize = 1 << tableLog - previous0 bool - charnum uint16 - - maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 - - // Write Table Size - bitStream = uint32(tableLog - minEncTablelog) - bitCount = uint(4) - remaining = int16(tableSize + 1) /* +1 for extra accuracy */ - threshold = int16(tableSize) - nbBits = uint(tableLog + 1) - ) if s.useRLE { return append(out, s.rleVal), nil } @@ -524,7 +509,28 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) { // Never write predefined. return out, nil } - outP := len(out) + + var ( + tableLog = s.actualTableLog + tableSize = 1 << tableLog + previous0 bool + charnum uint16 + + // maximum header size plus 2 extra bytes for final output if bitCount == 0. + maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + 2 + + // Write Table Size + bitStream = uint32(tableLog - minEncTablelog) + bitCount = uint(4) + remaining = int16(tableSize + 1) /* +1 for extra accuracy */ + threshold = int16(tableSize) + nbBits = uint(tableLog + 1) + outP = len(out) + ) + if cap(out) < outP+maxHeaderSize { + out = append(out, make([]byte, maxHeaderSize*3)...) + out = out[:len(out)-maxHeaderSize*3] + } out = out[:outP+maxHeaderSize] // stops at 1 @@ -594,11 +600,14 @@ func (s *fseEncoder) writeCount(out []byte) ([]byte, error) { } } + if outP+2 > len(out) { + return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen]) + } out[outP] = byte(bitStream) out[outP+1] = byte(bitStream >> 8) outP += int((bitCount + 7) / 8) - if uint16(charnum) > s.symbolLen { + if charnum > s.symbolLen { return nil, errors.New("internal error: charnum > s.symbolLen") } return out[:outP], nil diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index b975954c1..57a8a2f5b 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -11,6 +11,7 @@ import ( const debug = false const debugSequences = false +const debugMatches = false // force encoder to use predefined tables. const forcePreDef = false diff --git a/vendor/modules.txt b/vendor/modules.txt index 3ebffec05..7187c499c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -6,17 +6,17 @@ github.com/VictoriaMetrics/metrics github.com/cespare/xxhash/v2 # github.com/golang/snappy v0.0.1 github.com/golang/snappy -# github.com/klauspost/compress v1.8.6 -github.com/klauspost/compress/fse +# github.com/klauspost/compress v1.9.1 +github.com/klauspost/compress/zstd github.com/klauspost/compress/huff0 github.com/klauspost/compress/snappy -github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd/internal/xxhash +github.com/klauspost/compress/fse # github.com/valyala/bytebufferpool v1.0.0 github.com/valyala/bytebufferpool # github.com/valyala/fastjson v1.4.1 -github.com/valyala/fastjson github.com/valyala/fastjson/fastfloat +github.com/valyala/fastjson # github.com/valyala/fastrand v1.0.0 github.com/valyala/fastrand # github.com/valyala/gozstd v1.6.2