From 90e18180686246a9b493d36c7073d20ae3c7d76e Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 14 Mar 2023 16:14:25 -0700 Subject: [PATCH] vendor: update github.com/klauspost/compress from v1.16.0 to v1.16.3 --- go.mod | 2 +- go.sum | 4 +- .../github.com/klauspost/compress/README.md | 15 + .../klauspost/compress/fse/decompress.go | 4 +- .../klauspost/compress/gzhttp/README.md | 61 ++ .../klauspost/compress/gzhttp/compress.go | 152 +++- .../compress/gzhttp/writer/gzkp/gzkp.go | 9 + .../compress/gzhttp/writer/interface.go | 23 +- .../klauspost/compress/huff0/bitwriter.go | 16 + .../klauspost/compress/huff0/compress.go | 3 +- .../klauspost/compress/s2/encode_go.go | 8 + .../compress/s2/encodeblock_amd64.go | 12 +- .../klauspost/compress/s2/encodeblock_amd64.s | 753 ++++++++++++++++++ .../klauspost/compress/s2/lz4sconvert.go | 467 +++++++++++ .../klauspost/compress/zstd/blockdec.go | 4 + .../klauspost/compress/zstd/bytebuf.go | 2 +- .../klauspost/compress/zstd/enc_best.go | 65 +- .../klauspost/compress/zstd/seqdec.go | 6 +- .../klauspost/compress/zstd/seqdec_amd64.go | 1 - vendor/modules.txt | 2 +- 20 files changed, 1540 insertions(+), 69 deletions(-) create mode 100644 vendor/github.com/klauspost/compress/s2/lz4sconvert.go diff --git a/go.mod b/go.mod index d80073ec3..ab0278bdc 100644 --- a/go.mod +++ b/go.mod @@ -23,7 +23,7 @@ require ( github.com/golang/snappy v0.0.4 github.com/googleapis/gax-go/v2 v2.7.1 github.com/influxdata/influxdb v1.11.0 - github.com/klauspost/compress v1.16.0 + github.com/klauspost/compress v1.16.3 github.com/prometheus/prometheus v0.42.0 github.com/urfave/cli/v2 v2.25.0 github.com/valyala/fastjson v1.6.4 diff --git a/go.sum b/go.sum index 6c1c7d2be..0f1d0fd05 100644 --- a/go.sum +++ b/go.sum @@ -313,8 +313,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4= -github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY= +github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 958666ed8..c2c7252fe 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,21 @@ This package provides various compression algorithms. # changelog +* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) + * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 + * gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767 + * s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766 + * zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773 + * huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774 + +* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0) + * s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support. https://github.com/klauspost/compress/pull/685 + * s2: Add Compression Size Estimate. https://github.com/klauspost/compress/pull/752 + * s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755 + * s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748 + * s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747 + * s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746 + * Jan 21st, 2023 (v1.15.15) * deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739 * zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728 diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index 926f5f153..cc05d0f7e 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -260,7 +260,9 @@ func (s *Scratch) buildDtable() error { // If the buffer is over-read an error is returned. func (s *Scratch) decompress() error { br := &s.bits - br.init(s.br.unread()) + if err := br.init(s.br.unread()); err != nil { + return err + } var s1, s2 decoder // Initialize and decode first state and symbol. diff --git a/vendor/github.com/klauspost/compress/gzhttp/README.md b/vendor/github.com/klauspost/compress/gzhttp/README.md index 571e939e6..8b784d2ba 100644 --- a/vendor/github.com/klauspost/compress/gzhttp/README.md +++ b/vendor/github.com/klauspost/compress/gzhttp/README.md @@ -215,6 +215,67 @@ has been reached. In this case it will assume that the minimum size has been rea If nothing has been written to the response writer, nothing will be flushed. +## BREACH mitigation + +[BREACH](http://css.csail.mit.edu/6.858/2020/readings/breach.pdf) is a specialized attack where attacker controlled data +is injected alongside secret data in a response body. This can lead to sidechannel attacks, where observing the compressed response +size can reveal if there are overlaps between the secret data and the injected data. + +For more information see https://breachattack.com/ + +It can be hard to judge if you are vulnerable to BREACH. +In general, if you do not include any user provided content in the response body you are safe, +but if you do, or you are in doubt, you can apply mitigations. + +`gzhttp` can apply [Heal the Breach](https://ieeexplore.ieee.org/document/9754554), or improved content aware padding. + +```Go +// RandomJitter adds 1->n random bytes to output based on checksum of payload. +// Specify the amount of input to buffer before applying jitter. +// This should cover the sensitive part of your response. +// This can be used to obfuscate the exact compressed size. +// Specifying 0 will use a buffer size of 64KB. +// 'paranoid' will use a slower hashing function, that MAY provide more safety. +// If a negative buffer is given, the amount of jitter will not be content dependent. +// This provides *less* security than applying content based jitter. +func RandomJitter(n, buffer int, paranoid bool) option +... +``` + +The jitter is added as a "Comment" field. This field has a 1 byte overhead, so actual extra size will be 2 -> n+1 (inclusive). + +A good option would be to apply 32 random bytes, with default 64KB buffer: `gzhttp.RandomJitter(32, 0, false)`. + +Note that flushing the data forces the padding to be applied, which means that only data before the flush is considered for content aware padding. + +The *padding* in the comment is the text `Padding-Padding-Padding-Padding-Pad....` + +The *length* is `1 + crc32c(payload) MOD n` or `1 + sha256(payload) MOD n` (paranoid), or just random from `crypto/rand` if buffer < 0. + +### Paranoid? + +The padding size is determined by the remainder of a CRC32 of the content. + +Since the payload contains elements unknown to the attacker, there is no reason to believe they can derive any information +from this remainder, or predict it. + +However, for those that feel uncomfortable with a CRC32 being used for this can enable "paranoid" mode which will use SHA256 for determining the padding. + +The hashing itself is about 2 orders of magnitude slower, but in overall terms will maybe only reduce speed by 10%. + +Paranoid mode has no effect if buffer is < 0 (non-content aware padding). + +### Examples + +Adding the option `gzhttp.RandomJitter(32, 50000)` will apply from 1 up to 32 bytes of random data to the output. + +The number of bytes added depends on the content of the first 50000 bytes, or all of them if the output was less than that. + +Adding the option `gzhttp.RandomJitter(32, -1)` will apply from 1 up to 32 bytes of random data to the output. +Each call will apply a random amount of jitter. This should be considered less secure than content based jitter. + +This can be used if responses are very big, deterministic and the buffer size would be too big to cover where the mutation occurs. + ## License [Apache 2.0](LICENSE) diff --git a/vendor/github.com/klauspost/compress/gzhttp/compress.go b/vendor/github.com/klauspost/compress/gzhttp/compress.go index 56941d209..6ca11b1c1 100644 --- a/vendor/github.com/klauspost/compress/gzhttp/compress.go +++ b/vendor/github.com/klauspost/compress/gzhttp/compress.go @@ -2,8 +2,15 @@ package gzhttp import ( "bufio" + "crypto/rand" + "crypto/sha256" + "encoding/binary" + "errors" "fmt" + "hash/crc32" "io" + "math" + "math/bits" "mime" "net" "net/http" @@ -67,6 +74,9 @@ type GzipResponseWriter struct { setContentType bool // Add content type, if missing and detected. suffixETag string // Suffix to add to ETag header if response is compressed. dropETag bool // Drop ETag header if response is compressed (supersedes suffixETag). + sha256Jitter bool // Use sha256 for jitter. + randomJitter string // Add random bytes to output as header field. + jitterBuffer int // Maximum buffer to accumulate before doing jitter. contentTypeFilter func(ct string) bool // Only compress if the response is one of these content-types. All are accepted if empty. } @@ -97,6 +107,9 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) { if w.minSize > wantBuf { wantBuf = w.minSize } + if w.jitterBuffer > 0 && w.jitterBuffer > wantBuf { + wantBuf = w.jitterBuffer + } toAdd := len(b) if len(w.buf)+toAdd > wantBuf { toAdd = wantBuf - len(w.buf) @@ -112,7 +125,7 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) { ct := hdr.Get(contentType) if cl == 0 || cl >= w.minSize && (ct == "" || w.contentTypeFilter(ct)) { // If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data. - if len(w.buf) < w.minSize && cl == 0 { + if len(w.buf) < w.minSize && cl == 0 || (w.jitterBuffer > 0 && len(w.buf) < w.jitterBuffer) { return len(b), nil } @@ -131,7 +144,7 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) { // If the Content-Type is acceptable to GZIP, initialize the GZIP writer. if w.contentTypeFilter(ct) { - if err := w.startGzip(); err != nil { + if err := w.startGzip(remain); err != nil { return 0, err } if len(remain) > 0 { @@ -156,8 +169,10 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) { return len(b), nil } +var castagnoliTable = crc32.MakeTable(crc32.Castagnoli) + // startGzip initializes a GZIP writer and writes the buffer. -func (w *GzipResponseWriter) startGzip() error { +func (w *GzipResponseWriter) startGzip(remain []byte) error { // Set the GZIP header. w.Header().Set(contentEncoding, "gzip") @@ -199,6 +214,49 @@ func (w *GzipResponseWriter) startGzip() error { if len(w.buf) > 0 { // Initialize the GZIP response. w.init() + + // Set random jitter based on CRC or SHA-256 of current buffer. + // Before first write. + if len(w.randomJitter) > 0 { + var jitRNG uint32 + if w.jitterBuffer > 0 { + if w.sha256Jitter { + h := sha256.New() + h.Write(w.buf) + // Use only up to "w.jitterBuffer", otherwise the output depends on write sizes. + if len(remain) > 0 && len(w.buf) < w.jitterBuffer { + remain := remain + if len(remain)+len(w.buf) > w.jitterBuffer { + remain = remain[:w.jitterBuffer-len(w.buf)] + } + h.Write(remain) + } + var tmp [sha256.Size]byte + jitRNG = binary.LittleEndian.Uint32(h.Sum(tmp[:0])) + } else { + h := crc32.Update(0, castagnoliTable, w.buf) + // Use only up to "w.jitterBuffer", otherwise the output depends on write sizes. + if len(remain) > 0 && len(w.buf) < w.jitterBuffer { + remain := remain + if len(remain)+len(w.buf) > w.jitterBuffer { + remain = remain[:w.jitterBuffer-len(w.buf)] + } + h = crc32.Update(h, castagnoliTable, remain) + } + jitRNG = bits.RotateLeft32(h, 19) ^ 0xab0755de + } + } else { + // Get from rand.Reader + var tmp [4]byte + _, err := rand.Read(tmp[:]) + if err != nil { + return fmt.Errorf("gzhttp: %w", err) + } + jitRNG = binary.LittleEndian.Uint32(tmp[:]) + } + jit := w.randomJitter[:1+jitRNG%uint32(len(w.randomJitter)-1)] + w.gw.(writer.GzipWriterExt).SetHeader(writer.Header{Comment: jit}) + } n, err := w.gw.Write(w.buf) // This should never happen (per io.Writer docs), but if the write didn't @@ -259,15 +317,21 @@ func (w *GzipResponseWriter) Close() error { if w.ignore { return nil } - if w.gw == nil { - // GZIP not triggered yet, write out regular response. - err := w.startPlain() - // Returns the error if any at write. - if err != nil { - err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error()) + var ( + ct = w.Header().Get(contentType) + ce = w.Header().Get(contentEncoding) + cr = w.Header().Get(contentRange) + ) + // fmt.Println(len(w.buf) == 0, len(w.buf) < w.minSize, len(w.Header()[HeaderNoCompression]) != 0, ce != "", cr != "", !w.contentTypeFilter(ct)) + if len(w.buf) == 0 || len(w.buf) < w.minSize || len(w.Header()[HeaderNoCompression]) != 0 || ce != "" || cr != "" || !w.contentTypeFilter(ct) { + // GZIP not triggered, write out regular response. + return w.startPlain() + } + err := w.startGzip(nil) + if err != nil { + return err } - return err } err := w.gw.Close() @@ -310,7 +374,7 @@ func (w *GzipResponseWriter) Flush() { // See if we should compress... if len(w.Header()[HeaderNoCompression]) == 0 && ce == "" && cr == "" && cl >= w.minSize && w.contentTypeFilter(ct) { - w.startGzip() + w.startGzip(nil) } else { w.startPlain() } @@ -392,6 +456,9 @@ func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) { suffixETag: c.suffixETag, buf: gw.buf, setContentType: c.setContentType, + randomJitter: c.randomJitter, + jitterBuffer: c.jitterBuffer, + sha256Jitter: c.sha256Jitter, } if len(gw.buf) > 0 { gw.buf = gw.buf[:0] @@ -408,6 +475,7 @@ func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) { } else { h.ServeHTTP(gw, r) } + w.Header().Del(HeaderNoCompression) } else { h.ServeHTTP(newNoGzipResponseWriter(w), r) w.Header().Del(HeaderNoCompression) @@ -455,6 +523,9 @@ type config struct { setContentType bool suffixETag string dropETag bool + jitterBuffer int + randomJitter string + sha256Jitter bool } func (c *config) validate() error { @@ -466,7 +537,16 @@ func (c *config) validate() error { if c.minSize < 0 { return fmt.Errorf("minimum size must be more than zero") } - + if len(c.randomJitter) >= math.MaxUint16 { + return fmt.Errorf("random jitter size exceeded") + } + if len(c.randomJitter) > 0 { + gzw, ok := c.writer.New(io.Discard, c.level).(writer.GzipWriterExt) + if !ok { + return errors.New("the custom compressor does not allow setting headers for random jitter") + } + gzw.Close() + } return nil } @@ -496,8 +576,9 @@ func SetContentType(b bool) option { // Implementation changes the implementation of GzipWriter // -// The default implementation is writer/stdlib/NewWriter -// which is backed by standard library's compress/zlib +// The default implementation is backed by github.com/klauspost/compress +// To support RandomJitter, the GzipWriterExt must also be +// supported by the returned writers. func Implementation(writer writer.GzipWriterFactory) option { return func(c *config) { c.writer = writer @@ -625,6 +706,31 @@ func DropETag() option { } } +// RandomJitter adds 1->n random bytes to output based on checksum of payload. +// Specify the amount of input to buffer before applying jitter. +// This should cover the sensitive part of your response. +// This can be used to obfuscate the exact compressed size. +// Specifying 0 will use a buffer size of 64KB. +// 'paranoid' will use a slower hashing function, that MAY provide more safety. +// See README.md for more information. +// If a negative buffer is given, the amount of jitter will not be content dependent. +// This provides *less* security than applying content based jitter. +func RandomJitter(n, buffer int, paranoid bool) option { + return func(c *config) { + if n > 0 { + c.sha256Jitter = paranoid + c.randomJitter = strings.Repeat("Padding-", 1+(n/8))[:n+1] + c.jitterBuffer = buffer + if c.jitterBuffer == 0 { + c.jitterBuffer = 64 << 10 + } + } else { + c.randomJitter = "" + c.jitterBuffer = 0 + } + } +} + // acceptsGzip returns true if the given HTTP request indicates that it will // accept a gzipped response. func acceptsGzip(r *http.Request) bool { @@ -702,10 +808,23 @@ func parseEncodings(s string) (codings, error) { return c, nil } +var errEmptyEncoding = errors.New("empty content-coding") + // parseCoding parses a single coding (content-coding with an optional qvalue), // as might appear in an Accept-Encoding header. It attempts to forgive minor // formatting errors. func parseCoding(s string) (coding string, qvalue float64, err error) { + // Avoid splitting if we can... + if len(s) == 0 { + return "", 0, errEmptyEncoding + } + if !strings.ContainsRune(s, ';') { + coding = strings.ToLower(strings.TrimSpace(s)) + if coding == "" { + err = errEmptyEncoding + } + return coding, DefaultQValue, err + } for n, part := range strings.Split(s, ";") { part = strings.TrimSpace(part) qvalue = DefaultQValue @@ -724,7 +843,7 @@ func parseCoding(s string) (coding string, qvalue float64, err error) { } if coding == "" { - err = fmt.Errorf("empty content-coding") + err = errEmptyEncoding } return @@ -766,6 +885,9 @@ const intSize = 32 << (^uint(0) >> 63) // atoi is equivalent to ParseInt(s, 10, 0), converted to type int. func atoi(s string) (int, bool) { + if len(s) == 0 { + return 0, false + } sLen := len(s) if intSize == 32 && (0 < sLen && sLen < 10) || intSize == 64 && (0 < sLen && sLen < 19) { diff --git a/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go b/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go index 053f01247..e31c46c4c 100644 --- a/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go +++ b/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go @@ -61,6 +61,15 @@ func NewWriter(w io.Writer, level int) writer.GzipWriter { } } +// SetHeader will override the gzip header on pw. +func (pw *pooledWriter) SetHeader(h writer.Header) { + pw.Name = h.Name + pw.Extra = h.Extra + pw.Comment = h.Comment + pw.ModTime = h.ModTime + pw.OS = h.OS +} + func Levels() (min, max int) { return gzip.StatelessCompression, gzip.BestCompression } diff --git a/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go b/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go index 20c516129..1ad16806f 100644 --- a/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go +++ b/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go @@ -1,6 +1,9 @@ package writer -import "io" +import ( + "io" + "time" +) // GzipWriter implements the functions needed for compressing content. type GzipWriter interface { @@ -9,6 +12,24 @@ type GzipWriter interface { Flush() error } +// GzipWriterExt implements the functions needed for compressing content +// and optional extensions. +type GzipWriterExt interface { + GzipWriter + + // SetHeader will populate header fields with non-nil values in h. + SetHeader(h Header) +} + +// Header is a gzip header. +type Header struct { + Comment string // comment + Extra []byte // "extra data" + ModTime time.Time // modification time + Name string // file name + OS byte // operating system type +} + // GzipWriterFactory contains the information needed for custom gzip implementations. type GzipWriterFactory struct { // Must return the minimum and maximum supported level. diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go index ec71f7a34..aed2347ce 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -60,6 +60,22 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) { b.nBits += encA.nBits + encB.nBits } +// encFourSymbols adds up to 32 bits from four symbols. +// It will not check if there is space for them, +// so the caller must ensure that b has been flushed recently. +func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) { + bitsA := encA.nBits + bitsB := bitsA + encB.nBits + bitsC := bitsB + encC.nBits + bitsD := bitsC + encD.nBits + combined := uint64(encA.val) | + (uint64(encB.val) << (bitsA & 63)) | + (uint64(encC.val) << (bitsB & 63)) | + (uint64(encD.val) << (bitsC & 63)) + b.bitContainer |= combined << (b.nBits & 63) + b.nBits += bitsD +} + // flush32 will flush out, so there are at least 32 bits available for writing. func (b *bitWriter) flush32() { if b.nBits < 32 { diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index cdc94856f..4ee4fa18d 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -248,8 +248,7 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { tmp := src[n : n+4] // tmp should be len 4 bw.flush32() - bw.encTwoSymbols(cTable, tmp[3], tmp[2]) - bw.encTwoSymbols(cTable, tmp[1], tmp[0]) + bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]]) } } else { for ; n >= 0; n -= 4 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index d7749d75c..0d39c7b0e 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -717,3 +717,11 @@ func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) { func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) { panic("cvtLZ4BlockSnappyAsm should be unreachable") } + +func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) { + panic("cvtLZ4sBlockAsm should be unreachable") +} + +func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) { + panic("cvtLZ4sBlockSnappyAsm should be unreachable") +} diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go index 9f3dc8c29..297e41501 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -212,7 +212,17 @@ func matchLen(a []byte, b []byte) int //go:noescape func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) -// cvtLZ4Block converts an LZ4 block to S2 +// cvtLZ4sBlock converts an LZ4s block to S2 +// +//go:noescape +func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) + +// cvtLZ4Block converts an LZ4 block to Snappy // //go:noescape func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) + +// cvtLZ4sBlock converts an LZ4s block to Snappy +// +//go:noescape +func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 19bd5237b..12a4de3be 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -19271,6 +19271,491 @@ lz4_s2_dstfull: MOVQ SI, uncompressed+48(FP) RET +// func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) +// Requires: SSE2 +TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64 + XORQ SI, SI + MOVQ dst_base+0(FP), AX + MOVQ dst_len+8(FP), CX + MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), BX + LEAQ (DX)(BX*1), BX + LEAQ -10(AX)(CX*1), CX + XORQ DI, DI + +lz4s_s2_loop: + CMPQ DX, BX + JAE lz4s_s2_corrupt + CMPQ AX, CX + JAE lz4s_s2_dstfull + MOVBQZX (DX), R8 + MOVQ R8, R9 + MOVQ R8, R10 + SHRQ $0x04, R9 + ANDQ $0x0f, R10 + CMPQ R8, $0xf0 + JB lz4s_s2_ll_end + +lz4s_s2_ll_loop: + INCQ DX + CMPQ DX, BX + JAE lz4s_s2_corrupt + MOVBQZX (DX), R8 + ADDQ R8, R9 + CMPQ R8, $0xff + JEQ lz4s_s2_ll_loop + +lz4s_s2_ll_end: + LEAQ (DX)(R9*1), R8 + ADDQ $0x03, R10 + CMPQ R8, BX + JAE lz4s_s2_corrupt + INCQ DX + INCQ R8 + TESTQ R9, R9 + JZ lz4s_s2_lits_done + LEAQ (AX)(R9*1), R11 + CMPQ R11, CX + JAE lz4s_s2_dstfull + ADDQ R9, SI + LEAL -1(R9), R11 + CMPL R11, $0x3c + JLT one_byte_lz4s_s2 + CMPL R11, $0x00000100 + JLT two_bytes_lz4s_s2 + CMPL R11, $0x00010000 + JLT three_bytes_lz4s_s2 + CMPL R11, $0x01000000 + JLT four_bytes_lz4s_s2 + MOVB $0xfc, (AX) + MOVL R11, 1(AX) + ADDQ $0x05, AX + JMP memmove_long_lz4s_s2 + +four_bytes_lz4s_s2: + MOVL R11, R12 + SHRL $0x10, R12 + MOVB $0xf8, (AX) + MOVW R11, 1(AX) + MOVB R12, 3(AX) + ADDQ $0x04, AX + JMP memmove_long_lz4s_s2 + +three_bytes_lz4s_s2: + MOVB $0xf4, (AX) + MOVW R11, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_lz4s_s2 + +two_bytes_lz4s_s2: + MOVB $0xf0, (AX) + MOVB R11, 1(AX) + ADDQ $0x02, AX + CMPL R11, $0x40 + JL memmove_lz4s_s2 + JMP memmove_long_lz4s_s2 + +one_byte_lz4s_s2: + SHLB $0x02, R11 + MOVB R11, (AX) + ADDQ $0x01, AX + +memmove_lz4s_s2: + LEAQ (AX)(R9*1), R11 + + // genMemMoveShort + CMPQ R9, $0x08 + JLE emit_lit_memmove_lz4s_s2_memmove_move_8 + CMPQ R9, $0x10 + JBE emit_lit_memmove_lz4s_s2_memmove_move_8through16 + CMPQ R9, $0x20 + JBE emit_lit_memmove_lz4s_s2_memmove_move_17through32 + JMP emit_lit_memmove_lz4s_s2_memmove_move_33through64 + +emit_lit_memmove_lz4s_s2_memmove_move_8: + MOVQ (DX), R12 + MOVQ R12, (AX) + JMP memmove_end_copy_lz4s_s2 + +emit_lit_memmove_lz4s_s2_memmove_move_8through16: + MOVQ (DX), R12 + MOVQ -8(DX)(R9*1), DX + MOVQ R12, (AX) + MOVQ DX, -8(AX)(R9*1) + JMP memmove_end_copy_lz4s_s2 + +emit_lit_memmove_lz4s_s2_memmove_move_17through32: + MOVOU (DX), X0 + MOVOU -16(DX)(R9*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R9*1) + JMP memmove_end_copy_lz4s_s2 + +emit_lit_memmove_lz4s_s2_memmove_move_33through64: + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R9*1), X2 + MOVOU -16(DX)(R9*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R9*1) + MOVOU X3, -16(AX)(R9*1) + +memmove_end_copy_lz4s_s2: + MOVQ R11, AX + JMP lz4s_s2_lits_emit_done + +memmove_long_lz4s_s2: + LEAQ (AX)(R9*1), R11 + + // genMemMoveLong + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R9*1), X2 + MOVOU -16(DX)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ AX, R12 + ANDL $0x0000001f, R12 + MOVQ $0x00000040, R14 + SUBQ R12, R14 + DECQ R13 + JA emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32 + LEAQ -32(DX)(R14*1), R12 + LEAQ -32(AX)(R14*1), R15 + +emit_lit_memmove_long_lz4s_s2large_big_loop_back: + MOVOU (R12), X4 + MOVOU 16(R12), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R12 + ADDQ $0x20, R14 + DECQ R13 + JNA emit_lit_memmove_long_lz4s_s2large_big_loop_back + +emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32: + MOVOU -32(DX)(R14*1), X4 + MOVOU -16(DX)(R14*1), X5 + MOVOA X4, -32(AX)(R14*1) + MOVOA X5, -16(AX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 + JAE emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R9*1) + MOVOU X3, -16(AX)(R9*1) + MOVQ R11, AX + +lz4s_s2_lits_emit_done: + MOVQ R8, DX + +lz4s_s2_lits_done: + CMPQ DX, BX + JNE lz4s_s2_match + CMPQ R10, $0x03 + JEQ lz4s_s2_done + JMP lz4s_s2_corrupt + +lz4s_s2_match: + CMPQ R10, $0x03 + JEQ lz4s_s2_loop + LEAQ 2(DX), R8 + CMPQ R8, BX + JAE lz4s_s2_corrupt + MOVWQZX (DX), R9 + MOVQ R8, DX + TESTQ R9, R9 + JZ lz4s_s2_corrupt + CMPQ R9, SI + JA lz4s_s2_corrupt + CMPQ R10, $0x12 + JNE lz4s_s2_ml_done + +lz4s_s2_ml_loop: + MOVBQZX (DX), R8 + INCQ DX + ADDQ R8, R10 + CMPQ DX, BX + JAE lz4s_s2_corrupt + CMPQ R8, $0xff + JEQ lz4s_s2_ml_loop + +lz4s_s2_ml_done: + ADDQ R10, SI + CMPQ R9, DI + JNE lz4s_s2_docopy + + // emitRepeat +emit_repeat_again_lz4_s2: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JLE repeat_two_lz4_s2 + CMPL R8, $0x0c + JGE cant_repeat_two_offset_lz4_s2 + CMPL R9, $0x00000800 + JLT repeat_two_offset_lz4_s2 + +cant_repeat_two_offset_lz4_s2: + CMPL R10, $0x00000104 + JLT repeat_three_lz4_s2 + CMPL R10, $0x00010100 + JLT repeat_four_lz4_s2 + CMPL R10, $0x0100ffff + JLT repeat_five_lz4_s2 + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2 + +repeat_five_lz4_s2: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4s_s2_loop + +repeat_four_lz4_s2: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4s_s2_loop + +repeat_three_lz4_s2: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4s_s2_loop + +repeat_two_lz4_s2: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +repeat_two_offset_lz4_s2: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +lz4s_s2_docopy: + MOVQ R9, DI + + // emitCopy + CMPL R10, $0x40 + JLE two_byte_offset_short_lz4_s2 + CMPL R9, $0x00000800 + JAE long_offset_short_lz4_s2 + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB R9, 1(AX) + MOVL R9, R11 + SHRL $0x08, R11 + SHLL $0x05, R11 + ORL R11, R8 + MOVB R8, (AX) + ADDQ $0x02, AX + SUBL $0x08, R10 + + // emitRepeat + LEAL -4(R10), R10 + JMP cant_repeat_two_offset_lz4_s2_emit_copy_short_2b + +emit_repeat_again_lz4_s2_emit_copy_short_2b: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JLE repeat_two_lz4_s2_emit_copy_short_2b + CMPL R8, $0x0c + JGE cant_repeat_two_offset_lz4_s2_emit_copy_short_2b + CMPL R9, $0x00000800 + JLT repeat_two_offset_lz4_s2_emit_copy_short_2b + +cant_repeat_two_offset_lz4_s2_emit_copy_short_2b: + CMPL R10, $0x00000104 + JLT repeat_three_lz4_s2_emit_copy_short_2b + CMPL R10, $0x00010100 + JLT repeat_four_lz4_s2_emit_copy_short_2b + CMPL R10, $0x0100ffff + JLT repeat_five_lz4_s2_emit_copy_short_2b + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2_emit_copy_short_2b + +repeat_five_lz4_s2_emit_copy_short_2b: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4s_s2_loop + +repeat_four_lz4_s2_emit_copy_short_2b: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4s_s2_loop + +repeat_three_lz4_s2_emit_copy_short_2b: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4s_s2_loop + +repeat_two_lz4_s2_emit_copy_short_2b: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +repeat_two_offset_lz4_s2_emit_copy_short_2b: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +long_offset_short_lz4_s2: + MOVB $0xee, (AX) + MOVW R9, 1(AX) + LEAL -60(R10), R10 + ADDQ $0x03, AX + + // emitRepeat +emit_repeat_again_lz4_s2_emit_copy_short: + MOVL R10, R8 + LEAL -4(R10), R10 + CMPL R8, $0x08 + JLE repeat_two_lz4_s2_emit_copy_short + CMPL R8, $0x0c + JGE cant_repeat_two_offset_lz4_s2_emit_copy_short + CMPL R9, $0x00000800 + JLT repeat_two_offset_lz4_s2_emit_copy_short + +cant_repeat_two_offset_lz4_s2_emit_copy_short: + CMPL R10, $0x00000104 + JLT repeat_three_lz4_s2_emit_copy_short + CMPL R10, $0x00010100 + JLT repeat_four_lz4_s2_emit_copy_short + CMPL R10, $0x0100ffff + JLT repeat_five_lz4_s2_emit_copy_short + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (AX) + MOVB $0xff, 4(AX) + ADDQ $0x05, AX + JMP emit_repeat_again_lz4_s2_emit_copy_short + +repeat_five_lz4_s2_emit_copy_short: + LEAL -65536(R10), R10 + MOVL R10, R9 + MOVW $0x001d, (AX) + MOVW R10, 2(AX) + SARL $0x10, R9 + MOVB R9, 4(AX) + ADDQ $0x05, AX + JMP lz4s_s2_loop + +repeat_four_lz4_s2_emit_copy_short: + LEAL -256(R10), R10 + MOVW $0x0019, (AX) + MOVW R10, 2(AX) + ADDQ $0x04, AX + JMP lz4s_s2_loop + +repeat_three_lz4_s2_emit_copy_short: + LEAL -4(R10), R10 + MOVW $0x0015, (AX) + MOVB R10, 2(AX) + ADDQ $0x03, AX + JMP lz4s_s2_loop + +repeat_two_lz4_s2_emit_copy_short: + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +repeat_two_offset_lz4_s2_emit_copy_short: + XORQ R8, R8 + LEAL 1(R8)(R10*4), R10 + MOVB R9, 1(AX) + SARL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R10 + MOVB R10, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +two_byte_offset_short_lz4_s2: + MOVL R10, R8 + SHLL $0x02, R8 + CMPL R10, $0x0c + JGE emit_copy_three_lz4_s2 + CMPL R9, $0x00000800 + JGE emit_copy_three_lz4_s2 + LEAL -15(R8), R8 + MOVB R9, 1(AX) + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (AX) + ADDQ $0x02, AX + JMP lz4s_s2_loop + +emit_copy_three_lz4_s2: + LEAL -2(R8), R8 + MOVB R8, (AX) + MOVW R9, 1(AX) + ADDQ $0x03, AX + JMP lz4s_s2_loop + +lz4s_s2_done: + MOVQ dst_base+0(FP), CX + SUBQ CX, AX + MOVQ SI, uncompressed+48(FP) + MOVQ AX, dstUsed+56(FP) + RET + +lz4s_s2_corrupt: + XORQ AX, AX + LEAQ -1(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +lz4s_s2_dstfull: + XORQ AX, AX + LEAQ -2(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + // func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) // Requires: SSE2 TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64 @@ -19536,3 +20021,271 @@ lz4_snappy_dstfull: LEAQ -2(AX), SI MOVQ SI, uncompressed+48(FP) RET + +// func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) +// Requires: SSE2 +TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64 + XORQ SI, SI + MOVQ dst_base+0(FP), AX + MOVQ dst_len+8(FP), CX + MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), BX + LEAQ (DX)(BX*1), BX + LEAQ -10(AX)(CX*1), CX + +lz4s_snappy_loop: + CMPQ DX, BX + JAE lz4s_snappy_corrupt + CMPQ AX, CX + JAE lz4s_snappy_dstfull + MOVBQZX (DX), DI + MOVQ DI, R8 + MOVQ DI, R9 + SHRQ $0x04, R8 + ANDQ $0x0f, R9 + CMPQ DI, $0xf0 + JB lz4s_snappy_ll_end + +lz4s_snappy_ll_loop: + INCQ DX + CMPQ DX, BX + JAE lz4s_snappy_corrupt + MOVBQZX (DX), DI + ADDQ DI, R8 + CMPQ DI, $0xff + JEQ lz4s_snappy_ll_loop + +lz4s_snappy_ll_end: + LEAQ (DX)(R8*1), DI + ADDQ $0x03, R9 + CMPQ DI, BX + JAE lz4s_snappy_corrupt + INCQ DX + INCQ DI + TESTQ R8, R8 + JZ lz4s_snappy_lits_done + LEAQ (AX)(R8*1), R10 + CMPQ R10, CX + JAE lz4s_snappy_dstfull + ADDQ R8, SI + LEAL -1(R8), R10 + CMPL R10, $0x3c + JLT one_byte_lz4s_snappy + CMPL R10, $0x00000100 + JLT two_bytes_lz4s_snappy + CMPL R10, $0x00010000 + JLT three_bytes_lz4s_snappy + CMPL R10, $0x01000000 + JLT four_bytes_lz4s_snappy + MOVB $0xfc, (AX) + MOVL R10, 1(AX) + ADDQ $0x05, AX + JMP memmove_long_lz4s_snappy + +four_bytes_lz4s_snappy: + MOVL R10, R11 + SHRL $0x10, R11 + MOVB $0xf8, (AX) + MOVW R10, 1(AX) + MOVB R11, 3(AX) + ADDQ $0x04, AX + JMP memmove_long_lz4s_snappy + +three_bytes_lz4s_snappy: + MOVB $0xf4, (AX) + MOVW R10, 1(AX) + ADDQ $0x03, AX + JMP memmove_long_lz4s_snappy + +two_bytes_lz4s_snappy: + MOVB $0xf0, (AX) + MOVB R10, 1(AX) + ADDQ $0x02, AX + CMPL R10, $0x40 + JL memmove_lz4s_snappy + JMP memmove_long_lz4s_snappy + +one_byte_lz4s_snappy: + SHLB $0x02, R10 + MOVB R10, (AX) + ADDQ $0x01, AX + +memmove_lz4s_snappy: + LEAQ (AX)(R8*1), R10 + + // genMemMoveShort + CMPQ R8, $0x08 + JLE emit_lit_memmove_lz4s_snappy_memmove_move_8 + CMPQ R8, $0x10 + JBE emit_lit_memmove_lz4s_snappy_memmove_move_8through16 + CMPQ R8, $0x20 + JBE emit_lit_memmove_lz4s_snappy_memmove_move_17through32 + JMP emit_lit_memmove_lz4s_snappy_memmove_move_33through64 + +emit_lit_memmove_lz4s_snappy_memmove_move_8: + MOVQ (DX), R11 + MOVQ R11, (AX) + JMP memmove_end_copy_lz4s_snappy + +emit_lit_memmove_lz4s_snappy_memmove_move_8through16: + MOVQ (DX), R11 + MOVQ -8(DX)(R8*1), DX + MOVQ R11, (AX) + MOVQ DX, -8(AX)(R8*1) + JMP memmove_end_copy_lz4s_snappy + +emit_lit_memmove_lz4s_snappy_memmove_move_17through32: + MOVOU (DX), X0 + MOVOU -16(DX)(R8*1), X1 + MOVOU X0, (AX) + MOVOU X1, -16(AX)(R8*1) + JMP memmove_end_copy_lz4s_snappy + +emit_lit_memmove_lz4s_snappy_memmove_move_33through64: + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R8*1), X2 + MOVOU -16(DX)(R8*1), X3 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + +memmove_end_copy_lz4s_snappy: + MOVQ R10, AX + JMP lz4s_snappy_lits_emit_done + +memmove_long_lz4s_snappy: + LEAQ (AX)(R8*1), R10 + + // genMemMoveLong + MOVOU (DX), X0 + MOVOU 16(DX), X1 + MOVOU -32(DX)(R8*1), X2 + MOVOU -16(DX)(R8*1), X3 + MOVQ R8, R12 + SHRQ $0x05, R12 + MOVQ AX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 + JA emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32 + LEAQ -32(DX)(R13*1), R11 + LEAQ -32(AX)(R13*1), R14 + +emit_lit_memmove_long_lz4s_snappylarge_big_loop_back: + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 + ADDQ $0x20, R13 + DECQ R12 + JNA emit_lit_memmove_long_lz4s_snappylarge_big_loop_back + +emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32: + MOVOU -32(DX)(R13*1), X4 + MOVOU -16(DX)(R13*1), X5 + MOVOA X4, -32(AX)(R13*1) + MOVOA X5, -16(AX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 + JAE emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32 + MOVOU X0, (AX) + MOVOU X1, 16(AX) + MOVOU X2, -32(AX)(R8*1) + MOVOU X3, -16(AX)(R8*1) + MOVQ R10, AX + +lz4s_snappy_lits_emit_done: + MOVQ DI, DX + +lz4s_snappy_lits_done: + CMPQ DX, BX + JNE lz4s_snappy_match + CMPQ R9, $0x03 + JEQ lz4s_snappy_done + JMP lz4s_snappy_corrupt + +lz4s_snappy_match: + CMPQ R9, $0x03 + JEQ lz4s_snappy_loop + LEAQ 2(DX), DI + CMPQ DI, BX + JAE lz4s_snappy_corrupt + MOVWQZX (DX), R8 + MOVQ DI, DX + TESTQ R8, R8 + JZ lz4s_snappy_corrupt + CMPQ R8, SI + JA lz4s_snappy_corrupt + CMPQ R9, $0x12 + JNE lz4s_snappy_ml_done + +lz4s_snappy_ml_loop: + MOVBQZX (DX), DI + INCQ DX + ADDQ DI, R9 + CMPQ DX, BX + JAE lz4s_snappy_corrupt + CMPQ DI, $0xff + JEQ lz4s_snappy_ml_loop + +lz4s_snappy_ml_done: + ADDQ R9, SI + + // emitCopy +two_byte_offset_lz4_s2: + CMPL R9, $0x40 + JLE two_byte_offset_short_lz4_s2 + MOVB $0xee, (AX) + MOVW R8, 1(AX) + LEAL -60(R9), R9 + ADDQ $0x03, AX + CMPQ AX, CX + JAE lz4s_snappy_loop + JMP two_byte_offset_lz4_s2 + +two_byte_offset_short_lz4_s2: + MOVL R9, DI + SHLL $0x02, DI + CMPL R9, $0x0c + JGE emit_copy_three_lz4_s2 + CMPL R8, $0x00000800 + JGE emit_copy_three_lz4_s2 + LEAL -15(DI), DI + MOVB R8, 1(AX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (AX) + ADDQ $0x02, AX + JMP lz4s_snappy_loop + +emit_copy_three_lz4_s2: + LEAL -2(DI), DI + MOVB DI, (AX) + MOVW R8, 1(AX) + ADDQ $0x03, AX + JMP lz4s_snappy_loop + +lz4s_snappy_done: + MOVQ dst_base+0(FP), CX + SUBQ CX, AX + MOVQ SI, uncompressed+48(FP) + MOVQ AX, dstUsed+56(FP) + RET + +lz4s_snappy_corrupt: + XORQ AX, AX + LEAQ -1(AX), SI + MOVQ SI, uncompressed+48(FP) + RET + +lz4s_snappy_dstfull: + XORQ AX, AX + LEAQ -2(AX), SI + MOVQ SI, uncompressed+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/s2/lz4sconvert.go b/vendor/github.com/klauspost/compress/s2/lz4sconvert.go new file mode 100644 index 000000000..000f39719 --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2/lz4sconvert.go @@ -0,0 +1,467 @@ +// Copyright (c) 2022 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package s2 + +import ( + "encoding/binary" + "fmt" +) + +// LZ4sConverter provides conversion from LZ4s. +// (Intel modified LZ4 Blocks) +// https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf +// LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format. +// The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData. +// The LZ4s block returned by the Intel® QAT hardware can be used by an external +// software post-processing to generate other compressed data formats. +// The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses +// the same high-level formatting as LZ4 block format with the following encoding changes: +// For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte. +// ONLY "Min match of 4 bytes" is supported. +type LZ4sConverter struct { +} + +// ConvertBlock will convert an LZ4s block and append it as an S2 +// block without block length to dst. +// The uncompressed size is returned as well. +// dst must have capacity to contain the entire compressed block. +func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) { + if len(src) == 0 { + return dst, 0, nil + } + const debug = false + const inline = true + const lz4MinMatch = 3 + + s, d := 0, len(dst) + dst = dst[:cap(dst)] + if !debug && hasAmd64Asm { + res, sz := cvtLZ4sBlockAsm(dst[d:], src) + if res < 0 { + const ( + errCorrupt = -1 + errDstTooSmall = -2 + ) + switch res { + case errCorrupt: + return nil, 0, ErrCorrupt + case errDstTooSmall: + return nil, 0, ErrDstTooSmall + default: + return nil, 0, fmt.Errorf("unexpected result: %d", res) + } + } + if d+sz > len(dst) { + return nil, 0, ErrDstTooSmall + } + return dst[:d+sz], res, nil + } + + dLimit := len(dst) - 10 + var lastOffset uint16 + var uncompressed int + if debug { + fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst)) + } + + for { + if s >= len(src) { + return dst[:d], 0, ErrCorrupt + } + // Read literal info + token := src[s] + ll := int(token >> 4) + ml := int(lz4MinMatch + (token & 0xf)) + + // If upper nibble is 15, literal length is extended + if token >= 0xf0 { + for { + s++ + if s >= len(src) { + if debug { + fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return dst[:d], 0, ErrCorrupt + } + val := src[s] + ll += int(val) + if val != 255 { + break + } + } + } + // Skip past token + if s+ll >= len(src) { + if debug { + fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src)) + } + return nil, 0, ErrCorrupt + } + s++ + if ll > 0 { + if d+ll > dLimit { + return nil, 0, ErrDstTooSmall + } + if debug { + fmt.Printf("emit %d literals\n", ll) + } + d += emitLiteralGo(dst[d:], src[s:s+ll]) + s += ll + uncompressed += ll + } + + // Check if we are done... + if ml == lz4MinMatch { + if s == len(src) { + break + } + // 0 bytes. + continue + } + // 2 byte offset + if s >= len(src)-2 { + if debug { + fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2) + } + return nil, 0, ErrCorrupt + } + offset := binary.LittleEndian.Uint16(src[s:]) + s += 2 + if offset == 0 { + if debug { + fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s) + } + return nil, 0, ErrCorrupt + } + if int(offset) > uncompressed { + if debug { + fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed) + } + return nil, 0, ErrCorrupt + } + + if ml == lz4MinMatch+15 { + for { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + s++ + ml += int(val) + if val != 255 { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + break + } + } + } + if offset == lastOffset { + if debug { + fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset) + } + if !inline { + d += emitRepeat16(dst[d:], offset, ml) + } else { + length := ml + dst := dst[d:] + for len(dst) > 5 { + // Repeat offset, make length cheaper + length -= 4 + if length <= 4 { + dst[0] = uint8(length)<<2 | tagCopy1 + dst[1] = 0 + d += 2 + break + } + if length < 8 && offset < 2048 { + // Encode WITH offset + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1 + d += 2 + break + } + if length < (1<<8)+4 { + length -= 4 + dst[2] = uint8(length) + dst[1] = 0 + dst[0] = 5<<2 | tagCopy1 + d += 3 + break + } + if length < (1<<16)+(1<<8) { + length -= 1 << 8 + dst[3] = uint8(length >> 8) + dst[2] = uint8(length >> 0) + dst[1] = 0 + dst[0] = 6<<2 | tagCopy1 + d += 4 + break + } + const maxRepeat = (1 << 24) - 1 + length -= 1 << 16 + left := 0 + if length > maxRepeat { + left = length - maxRepeat + 4 + length = maxRepeat - 4 + } + dst[4] = uint8(length >> 16) + dst[3] = uint8(length >> 8) + dst[2] = uint8(length >> 0) + dst[1] = 0 + dst[0] = 7<<2 | tagCopy1 + if left > 0 { + d += 5 + emitRepeat16(dst[5:], offset, left) + break + } + d += 5 + break + } + } + } else { + if debug { + fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset) + } + if !inline { + d += emitCopy16(dst[d:], offset, ml) + } else { + length := ml + dst := dst[d:] + for len(dst) > 5 { + // Offset no more than 2 bytes. + if length > 64 { + off := 3 + if offset < 2048 { + // emit 8 bytes as tagCopy1, rest as repeats. + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1 + length -= 8 + off = 2 + } else { + // Emit a length 60 copy, encoded as 3 bytes. + // Emit remaining as repeat value (minimum 4 bytes). + dst[2] = uint8(offset >> 8) + dst[1] = uint8(offset) + dst[0] = 59<<2 | tagCopy2 + length -= 60 + } + // Emit remaining as repeats, at least 4 bytes remain. + d += off + emitRepeat16(dst[off:], offset, length) + break + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[2] = uint8(offset >> 8) + dst[1] = uint8(offset) + dst[0] = uint8(length-1)<<2 | tagCopy2 + d += 3 + break + } + // Emit the remaining copy, encoded as 2 bytes. + dst[1] = uint8(offset) + dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + d += 2 + break + } + } + lastOffset = offset + } + uncompressed += ml + if d > dLimit { + return nil, 0, ErrDstTooSmall + } + } + + return dst[:d], uncompressed, nil +} + +// ConvertBlockSnappy will convert an LZ4s block and append it +// as a Snappy block without block length to dst. +// The uncompressed size is returned as well. +// dst must have capacity to contain the entire compressed block. +func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) { + if len(src) == 0 { + return dst, 0, nil + } + const debug = false + const lz4MinMatch = 3 + + s, d := 0, len(dst) + dst = dst[:cap(dst)] + // Use assembly when possible + if !debug && hasAmd64Asm { + res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src) + if res < 0 { + const ( + errCorrupt = -1 + errDstTooSmall = -2 + ) + switch res { + case errCorrupt: + return nil, 0, ErrCorrupt + case errDstTooSmall: + return nil, 0, ErrDstTooSmall + default: + return nil, 0, fmt.Errorf("unexpected result: %d", res) + } + } + if d+sz > len(dst) { + return nil, 0, ErrDstTooSmall + } + return dst[:d+sz], res, nil + } + + dLimit := len(dst) - 10 + var uncompressed int + if debug { + fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst)) + } + + for { + if s >= len(src) { + return nil, 0, ErrCorrupt + } + // Read literal info + token := src[s] + ll := int(token >> 4) + ml := int(lz4MinMatch + (token & 0xf)) + + // If upper nibble is 15, literal length is extended + if token >= 0xf0 { + for { + s++ + if s >= len(src) { + if debug { + fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + ll += int(val) + if val != 255 { + break + } + } + } + // Skip past token + if s+ll >= len(src) { + if debug { + fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src)) + } + return nil, 0, ErrCorrupt + } + s++ + if ll > 0 { + if d+ll > dLimit { + return nil, 0, ErrDstTooSmall + } + if debug { + fmt.Printf("emit %d literals\n", ll) + } + d += emitLiteralGo(dst[d:], src[s:s+ll]) + s += ll + uncompressed += ll + } + + // Check if we are done... + if ml == lz4MinMatch { + if s == len(src) { + break + } + // 0 bytes. + continue + } + // 2 byte offset + if s >= len(src)-2 { + if debug { + fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2) + } + return nil, 0, ErrCorrupt + } + offset := binary.LittleEndian.Uint16(src[s:]) + s += 2 + if offset == 0 { + if debug { + fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s) + } + return nil, 0, ErrCorrupt + } + if int(offset) > uncompressed { + if debug { + fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed) + } + return nil, 0, ErrCorrupt + } + + if ml == lz4MinMatch+15 { + for { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + val := src[s] + s++ + ml += int(val) + if val != 255 { + if s >= len(src) { + if debug { + fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src)) + } + return nil, 0, ErrCorrupt + } + break + } + } + } + if debug { + fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset) + } + length := ml + // d += emitCopyNoRepeat(dst[d:], int(offset), ml) + for length > 0 { + if d >= dLimit { + return nil, 0, ErrDstTooSmall + } + + // Offset no more than 2 bytes. + if length > 64 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[d+2] = uint8(offset >> 8) + dst[d+1] = uint8(offset) + dst[d+0] = 63<<2 | tagCopy2 + length -= 64 + d += 3 + continue + } + if length >= 12 || offset >= 2048 || length < 4 { + // Emit the remaining copy, encoded as 3 bytes. + dst[d+2] = uint8(offset >> 8) + dst[d+1] = uint8(offset) + dst[d+0] = uint8(length-1)<<2 | tagCopy2 + d += 3 + break + } + // Emit the remaining copy, encoded as 2 bytes. + dst[d+1] = uint8(offset) + dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + d += 2 + break + } + uncompressed += ml + if d > dLimit { + return nil, 0, ErrDstTooSmall + } + } + + return dst[:d], uncompressed, nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 2445bb4fe..5f272d87f 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -9,6 +9,7 @@ import ( "encoding/binary" "errors" "fmt" + "hash/crc32" "io" "os" "path/filepath" @@ -442,6 +443,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err } } var err error + if debugDecoder { + println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals)) + } huff, literals, err = huff0.ReadTable(literals, huff) if err != nil { println("reading huffman table:", err) diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go index 176788f25..512ffe5b9 100644 --- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go +++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -54,7 +54,7 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) { func (b *byteBuf) readByte() (byte, error) { bb := *b if len(bb) < 1 { - return 0, nil + return 0, io.ErrUnexpectedEOF } r := bb[0] *b = bb[1:] diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index 830f5ba74..07f657d36 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -32,7 +32,6 @@ type match struct { length int32 rep int32 est int32 - _ [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes } const highScore = 25000 @@ -189,12 +188,6 @@ encodeLoop: panic("offset0 was 0") } - bestOf := func(a, b *match) *match { - if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 { - return a - } - return b - } const goodEnough = 100 nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) @@ -202,40 +195,41 @@ encodeLoop: candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] - matchAt := func(offset int32, s int32, first uint32, rep int32) match { + // Set m to a match at offset if it looks like that will improve compression. + improve := func(m *match, offset int32, s int32, first uint32, rep int32) { if s-offset >= e.maxMatchOff || load3232(src, offset) != first { - return match{s: s, est: highScore} + return } if debugAsserts { if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) } } - m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} - m.estBits(bitsPerByte) - return m + cand := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} + cand.estBits(bitsPerByte) + if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { + *m = cand + } } - m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1) - m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1) - m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1) - m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1) - best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4)) + best := match{s: s, est: highScore} + improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1) + improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1) if canRepeat && best.length < goodEnough { cv32 := uint32(cv >> 8) spp := s + 1 - m1 := matchAt(spp-offset1, spp, cv32, 1) - m2 := matchAt(spp-offset2, spp, cv32, 2) - m3 := matchAt(spp-offset3, spp, cv32, 3) - best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3)) + improve(&best, spp-offset1, spp, cv32, 1) + improve(&best, spp-offset2, spp, cv32, 2) + improve(&best, spp-offset3, spp, cv32, 3) if best.length > 0 { cv32 = uint32(cv >> 24) spp += 2 - m1 := matchAt(spp-offset1, spp, cv32, 1) - m2 := matchAt(spp-offset2, spp, cv32, 2) - m3 := matchAt(spp-offset3, spp, cv32, 3) - best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3)) + improve(&best, spp-offset1, spp, cv32, 1) + improve(&best, spp-offset2, spp, cv32, 2) + improve(&best, spp-offset3, spp, cv32, 3) } } // Load next and check... @@ -262,18 +256,16 @@ encodeLoop: candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] // Short at s+1 - m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1) // Long at s+1, s+2 - m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1) - m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1) - m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1) - m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1) - best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5)) + improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1) + improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1) + improve(&best, candidateL2.offset-e.cur, s+1, uint32(cv2), -1) + improve(&best, candidateL2.prev-e.cur, s+1, uint32(cv2), -1) if false { // Short at s+3. // Too often worse... - m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1) - best = bestOf(best, &m) + improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1) } // See if we can find a better match by checking where the current best ends. // Use that offset to see if we can find a better full match. @@ -284,13 +276,10 @@ encodeLoop: // For this compression level 2 yields the best results. const skipBeginning = 2 if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 { - m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) - bestEnd := bestOf(best, &m) + improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 { - m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) - bestEnd = bestOf(bestEnd, &m) + improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) } - best = bestEnd } } } diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index f833d1541..27fdf90fb 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -314,9 +314,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { } size := ll + ml + len(out) if size-startSize > maxBlockSize { - if size-startSize == 424242 { - panic("here") - } return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) } if size > cap(out) { @@ -427,8 +424,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { } } - // Check if space for literals - if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize { + if size := len(s.literals) + len(out) - startSize; size > maxBlockSize { return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) } diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go index 191384adf..387a30e99 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -148,7 +148,6 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { s.seqSize += ctx.litRemain if s.seqSize > maxBlockSize { return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) - } err := br.close() if err != nil { diff --git a/vendor/modules.txt b/vendor/modules.txt index bedd2cd97..dd6e69d86 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -335,7 +335,7 @@ github.com/jmespath/go-jmespath # github.com/jpillora/backoff v1.0.0 ## explicit; go 1.13 github.com/jpillora/backoff -# github.com/klauspost/compress v1.16.0 +# github.com/klauspost/compress v1.16.3 ## explicit; go 1.18 github.com/klauspost/compress github.com/klauspost/compress/flate