vendor: update github.com/klauspost/compress from v1.16.0 to v1.16.3

This commit is contained in:
Aliaksandr Valialkin 2023-03-14 16:14:25 -07:00
parent 8f6d5217d1
commit 90e1818068
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
20 changed files with 1540 additions and 69 deletions

2
go.mod
View file

@ -23,7 +23,7 @@ require (
github.com/golang/snappy v0.0.4
github.com/googleapis/gax-go/v2 v2.7.1
github.com/influxdata/influxdb v1.11.0
github.com/klauspost/compress v1.16.0
github.com/klauspost/compress v1.16.3
github.com/prometheus/prometheus v0.42.0
github.com/urfave/cli/v2 v2.25.0
github.com/valyala/fastjson v1.6.4

4
go.sum
View file

@ -313,8 +313,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4=
github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=

View file

@ -16,6 +16,21 @@ This package provides various compression algorithms.
# changelog
* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)
* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776
* gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767
* s2: Add Intel LZ4s converter https://github.com/klauspost/compress/pull/766
* zstd: Minor bug fixes https://github.com/klauspost/compress/pull/771 https://github.com/klauspost/compress/pull/772 https://github.com/klauspost/compress/pull/773
* huff0: Speed up compress1xDo by @greatroar in https://github.com/klauspost/compress/pull/774
* Feb 26, 2023 - [v1.16.0](https://github.com/klauspost/compress/releases/tag/v1.16.0)
* s2: Add [Dictionary](https://github.com/klauspost/compress/tree/master/s2#dictionaries) support. https://github.com/klauspost/compress/pull/685
* s2: Add Compression Size Estimate. https://github.com/klauspost/compress/pull/752
* s2: Add support for custom stream encoder. https://github.com/klauspost/compress/pull/755
* s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748
* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747
* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746
* Jan 21st, 2023 (v1.15.15)
* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739
* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728

View file

@ -260,7 +260,9 @@ func (s *Scratch) buildDtable() error {
// If the buffer is over-read an error is returned.
func (s *Scratch) decompress() error {
br := &s.bits
br.init(s.br.unread())
if err := br.init(s.br.unread()); err != nil {
return err
}
var s1, s2 decoder
// Initialize and decode first state and symbol.

View file

@ -215,6 +215,67 @@ has been reached. In this case it will assume that the minimum size has been rea
If nothing has been written to the response writer, nothing will be flushed.
## BREACH mitigation
[BREACH](http://css.csail.mit.edu/6.858/2020/readings/breach.pdf) is a specialized attack where attacker controlled data
is injected alongside secret data in a response body. This can lead to sidechannel attacks, where observing the compressed response
size can reveal if there are overlaps between the secret data and the injected data.
For more information see https://breachattack.com/
It can be hard to judge if you are vulnerable to BREACH.
In general, if you do not include any user provided content in the response body you are safe,
but if you do, or you are in doubt, you can apply mitigations.
`gzhttp` can apply [Heal the Breach](https://ieeexplore.ieee.org/document/9754554), or improved content aware padding.
```Go
// RandomJitter adds 1->n random bytes to output based on checksum of payload.
// Specify the amount of input to buffer before applying jitter.
// This should cover the sensitive part of your response.
// This can be used to obfuscate the exact compressed size.
// Specifying 0 will use a buffer size of 64KB.
// 'paranoid' will use a slower hashing function, that MAY provide more safety.
// If a negative buffer is given, the amount of jitter will not be content dependent.
// This provides *less* security than applying content based jitter.
func RandomJitter(n, buffer int, paranoid bool) option
...
```
The jitter is added as a "Comment" field. This field has a 1 byte overhead, so actual extra size will be 2 -> n+1 (inclusive).
A good option would be to apply 32 random bytes, with default 64KB buffer: `gzhttp.RandomJitter(32, 0, false)`.
Note that flushing the data forces the padding to be applied, which means that only data before the flush is considered for content aware padding.
The *padding* in the comment is the text `Padding-Padding-Padding-Padding-Pad....`
The *length* is `1 + crc32c(payload) MOD n` or `1 + sha256(payload) MOD n` (paranoid), or just random from `crypto/rand` if buffer < 0.
### Paranoid?
The padding size is determined by the remainder of a CRC32 of the content.
Since the payload contains elements unknown to the attacker, there is no reason to believe they can derive any information
from this remainder, or predict it.
However, for those that feel uncomfortable with a CRC32 being used for this can enable "paranoid" mode which will use SHA256 for determining the padding.
The hashing itself is about 2 orders of magnitude slower, but in overall terms will maybe only reduce speed by 10%.
Paranoid mode has no effect if buffer is < 0 (non-content aware padding).
### Examples
Adding the option `gzhttp.RandomJitter(32, 50000)` will apply from 1 up to 32 bytes of random data to the output.
The number of bytes added depends on the content of the first 50000 bytes, or all of them if the output was less than that.
Adding the option `gzhttp.RandomJitter(32, -1)` will apply from 1 up to 32 bytes of random data to the output.
Each call will apply a random amount of jitter. This should be considered less secure than content based jitter.
This can be used if responses are very big, deterministic and the buffer size would be too big to cover where the mutation occurs.
## License
[Apache 2.0](LICENSE)

View file

@ -2,8 +2,15 @@ package gzhttp
import (
"bufio"
"crypto/rand"
"crypto/sha256"
"encoding/binary"
"errors"
"fmt"
"hash/crc32"
"io"
"math"
"math/bits"
"mime"
"net"
"net/http"
@ -67,6 +74,9 @@ type GzipResponseWriter struct {
setContentType bool // Add content type, if missing and detected.
suffixETag string // Suffix to add to ETag header if response is compressed.
dropETag bool // Drop ETag header if response is compressed (supersedes suffixETag).
sha256Jitter bool // Use sha256 for jitter.
randomJitter string // Add random bytes to output as header field.
jitterBuffer int // Maximum buffer to accumulate before doing jitter.
contentTypeFilter func(ct string) bool // Only compress if the response is one of these content-types. All are accepted if empty.
}
@ -97,6 +107,9 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) {
if w.minSize > wantBuf {
wantBuf = w.minSize
}
if w.jitterBuffer > 0 && w.jitterBuffer > wantBuf {
wantBuf = w.jitterBuffer
}
toAdd := len(b)
if len(w.buf)+toAdd > wantBuf {
toAdd = wantBuf - len(w.buf)
@ -112,7 +125,7 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) {
ct := hdr.Get(contentType)
if cl == 0 || cl >= w.minSize && (ct == "" || w.contentTypeFilter(ct)) {
// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
if len(w.buf) < w.minSize && cl == 0 {
if len(w.buf) < w.minSize && cl == 0 || (w.jitterBuffer > 0 && len(w.buf) < w.jitterBuffer) {
return len(b), nil
}
@ -131,7 +144,7 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) {
// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
if w.contentTypeFilter(ct) {
if err := w.startGzip(); err != nil {
if err := w.startGzip(remain); err != nil {
return 0, err
}
if len(remain) > 0 {
@ -156,8 +169,10 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) {
return len(b), nil
}
var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
// startGzip initializes a GZIP writer and writes the buffer.
func (w *GzipResponseWriter) startGzip() error {
func (w *GzipResponseWriter) startGzip(remain []byte) error {
// Set the GZIP header.
w.Header().Set(contentEncoding, "gzip")
@ -199,6 +214,49 @@ func (w *GzipResponseWriter) startGzip() error {
if len(w.buf) > 0 {
// Initialize the GZIP response.
w.init()
// Set random jitter based on CRC or SHA-256 of current buffer.
// Before first write.
if len(w.randomJitter) > 0 {
var jitRNG uint32
if w.jitterBuffer > 0 {
if w.sha256Jitter {
h := sha256.New()
h.Write(w.buf)
// Use only up to "w.jitterBuffer", otherwise the output depends on write sizes.
if len(remain) > 0 && len(w.buf) < w.jitterBuffer {
remain := remain
if len(remain)+len(w.buf) > w.jitterBuffer {
remain = remain[:w.jitterBuffer-len(w.buf)]
}
h.Write(remain)
}
var tmp [sha256.Size]byte
jitRNG = binary.LittleEndian.Uint32(h.Sum(tmp[:0]))
} else {
h := crc32.Update(0, castagnoliTable, w.buf)
// Use only up to "w.jitterBuffer", otherwise the output depends on write sizes.
if len(remain) > 0 && len(w.buf) < w.jitterBuffer {
remain := remain
if len(remain)+len(w.buf) > w.jitterBuffer {
remain = remain[:w.jitterBuffer-len(w.buf)]
}
h = crc32.Update(h, castagnoliTable, remain)
}
jitRNG = bits.RotateLeft32(h, 19) ^ 0xab0755de
}
} else {
// Get from rand.Reader
var tmp [4]byte
_, err := rand.Read(tmp[:])
if err != nil {
return fmt.Errorf("gzhttp: %w", err)
}
jitRNG = binary.LittleEndian.Uint32(tmp[:])
}
jit := w.randomJitter[:1+jitRNG%uint32(len(w.randomJitter)-1)]
w.gw.(writer.GzipWriterExt).SetHeader(writer.Header{Comment: jit})
}
n, err := w.gw.Write(w.buf)
// This should never happen (per io.Writer docs), but if the write didn't
@ -259,16 +317,22 @@ func (w *GzipResponseWriter) Close() error {
if w.ignore {
return nil
}
if w.gw == nil {
// GZIP not triggered yet, write out regular response.
err := w.startPlain()
// Returns the error if any at write.
if err != nil {
err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
var (
ct = w.Header().Get(contentType)
ce = w.Header().Get(contentEncoding)
cr = w.Header().Get(contentRange)
)
// fmt.Println(len(w.buf) == 0, len(w.buf) < w.minSize, len(w.Header()[HeaderNoCompression]) != 0, ce != "", cr != "", !w.contentTypeFilter(ct))
if len(w.buf) == 0 || len(w.buf) < w.minSize || len(w.Header()[HeaderNoCompression]) != 0 || ce != "" || cr != "" || !w.contentTypeFilter(ct) {
// GZIP not triggered, write out regular response.
return w.startPlain()
}
err := w.startGzip(nil)
if err != nil {
return err
}
}
err := w.gw.Close()
w.gw = nil
@ -310,7 +374,7 @@ func (w *GzipResponseWriter) Flush() {
// See if we should compress...
if len(w.Header()[HeaderNoCompression]) == 0 && ce == "" && cr == "" && cl >= w.minSize && w.contentTypeFilter(ct) {
w.startGzip()
w.startGzip(nil)
} else {
w.startPlain()
}
@ -392,6 +456,9 @@ func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) {
suffixETag: c.suffixETag,
buf: gw.buf,
setContentType: c.setContentType,
randomJitter: c.randomJitter,
jitterBuffer: c.jitterBuffer,
sha256Jitter: c.sha256Jitter,
}
if len(gw.buf) > 0 {
gw.buf = gw.buf[:0]
@ -408,6 +475,7 @@ func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) {
} else {
h.ServeHTTP(gw, r)
}
w.Header().Del(HeaderNoCompression)
} else {
h.ServeHTTP(newNoGzipResponseWriter(w), r)
w.Header().Del(HeaderNoCompression)
@ -455,6 +523,9 @@ type config struct {
setContentType bool
suffixETag string
dropETag bool
jitterBuffer int
randomJitter string
sha256Jitter bool
}
func (c *config) validate() error {
@ -466,7 +537,16 @@ func (c *config) validate() error {
if c.minSize < 0 {
return fmt.Errorf("minimum size must be more than zero")
}
if len(c.randomJitter) >= math.MaxUint16 {
return fmt.Errorf("random jitter size exceeded")
}
if len(c.randomJitter) > 0 {
gzw, ok := c.writer.New(io.Discard, c.level).(writer.GzipWriterExt)
if !ok {
return errors.New("the custom compressor does not allow setting headers for random jitter")
}
gzw.Close()
}
return nil
}
@ -496,8 +576,9 @@ func SetContentType(b bool) option {
// Implementation changes the implementation of GzipWriter
//
// The default implementation is writer/stdlib/NewWriter
// which is backed by standard library's compress/zlib
// The default implementation is backed by github.com/klauspost/compress
// To support RandomJitter, the GzipWriterExt must also be
// supported by the returned writers.
func Implementation(writer writer.GzipWriterFactory) option {
return func(c *config) {
c.writer = writer
@ -625,6 +706,31 @@ func DropETag() option {
}
}
// RandomJitter adds 1->n random bytes to output based on checksum of payload.
// Specify the amount of input to buffer before applying jitter.
// This should cover the sensitive part of your response.
// This can be used to obfuscate the exact compressed size.
// Specifying 0 will use a buffer size of 64KB.
// 'paranoid' will use a slower hashing function, that MAY provide more safety.
// See README.md for more information.
// If a negative buffer is given, the amount of jitter will not be content dependent.
// This provides *less* security than applying content based jitter.
func RandomJitter(n, buffer int, paranoid bool) option {
return func(c *config) {
if n > 0 {
c.sha256Jitter = paranoid
c.randomJitter = strings.Repeat("Padding-", 1+(n/8))[:n+1]
c.jitterBuffer = buffer
if c.jitterBuffer == 0 {
c.jitterBuffer = 64 << 10
}
} else {
c.randomJitter = ""
c.jitterBuffer = 0
}
}
}
// acceptsGzip returns true if the given HTTP request indicates that it will
// accept a gzipped response.
func acceptsGzip(r *http.Request) bool {
@ -702,10 +808,23 @@ func parseEncodings(s string) (codings, error) {
return c, nil
}
var errEmptyEncoding = errors.New("empty content-coding")
// parseCoding parses a single coding (content-coding with an optional qvalue),
// as might appear in an Accept-Encoding header. It attempts to forgive minor
// formatting errors.
func parseCoding(s string) (coding string, qvalue float64, err error) {
// Avoid splitting if we can...
if len(s) == 0 {
return "", 0, errEmptyEncoding
}
if !strings.ContainsRune(s, ';') {
coding = strings.ToLower(strings.TrimSpace(s))
if coding == "" {
err = errEmptyEncoding
}
return coding, DefaultQValue, err
}
for n, part := range strings.Split(s, ";") {
part = strings.TrimSpace(part)
qvalue = DefaultQValue
@ -724,7 +843,7 @@ func parseCoding(s string) (coding string, qvalue float64, err error) {
}
if coding == "" {
err = fmt.Errorf("empty content-coding")
err = errEmptyEncoding
}
return
@ -766,6 +885,9 @@ const intSize = 32 << (^uint(0) >> 63)
// atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
func atoi(s string) (int, bool) {
if len(s) == 0 {
return 0, false
}
sLen := len(s)
if intSize == 32 && (0 < sLen && sLen < 10) ||
intSize == 64 && (0 < sLen && sLen < 19) {

View file

@ -61,6 +61,15 @@ func NewWriter(w io.Writer, level int) writer.GzipWriter {
}
}
// SetHeader will override the gzip header on pw.
func (pw *pooledWriter) SetHeader(h writer.Header) {
pw.Name = h.Name
pw.Extra = h.Extra
pw.Comment = h.Comment
pw.ModTime = h.ModTime
pw.OS = h.OS
}
func Levels() (min, max int) {
return gzip.StatelessCompression, gzip.BestCompression
}

View file

@ -1,6 +1,9 @@
package writer
import "io"
import (
"io"
"time"
)
// GzipWriter implements the functions needed for compressing content.
type GzipWriter interface {
@ -9,6 +12,24 @@ type GzipWriter interface {
Flush() error
}
// GzipWriterExt implements the functions needed for compressing content
// and optional extensions.
type GzipWriterExt interface {
GzipWriter
// SetHeader will populate header fields with non-nil values in h.
SetHeader(h Header)
}
// Header is a gzip header.
type Header struct {
Comment string // comment
Extra []byte // "extra data"
ModTime time.Time // modification time
Name string // file name
OS byte // operating system type
}
// GzipWriterFactory contains the information needed for custom gzip implementations.
type GzipWriterFactory struct {
// Must return the minimum and maximum supported level.

View file

@ -60,6 +60,22 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
b.nBits += encA.nBits + encB.nBits
}
// encFourSymbols adds up to 32 bits from four symbols.
// It will not check if there is space for them,
// so the caller must ensure that b has been flushed recently.
func (b *bitWriter) encFourSymbols(encA, encB, encC, encD cTableEntry) {
bitsA := encA.nBits
bitsB := bitsA + encB.nBits
bitsC := bitsB + encC.nBits
bitsD := bitsC + encD.nBits
combined := uint64(encA.val) |
(uint64(encB.val) << (bitsA & 63)) |
(uint64(encC.val) << (bitsB & 63)) |
(uint64(encD.val) << (bitsC & 63))
b.bitContainer |= combined << (b.nBits & 63)
b.nBits += bitsD
}
// flush32 will flush out, so there are at least 32 bits available for writing.
func (b *bitWriter) flush32() {
if b.nBits < 32 {

View file

@ -248,8 +248,7 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
tmp := src[n : n+4]
// tmp should be len 4
bw.flush32()
bw.encTwoSymbols(cTable, tmp[3], tmp[2])
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
bw.encFourSymbols(cTable[tmp[3]], cTable[tmp[2]], cTable[tmp[1]], cTable[tmp[0]])
}
} else {
for ; n >= 0; n -= 4 {

View file

@ -717,3 +717,11 @@ func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4BlockSnappyAsm should be unreachable")
}
func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4sBlockAsm should be unreachable")
}
func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4sBlockSnappyAsm should be unreachable")
}

View file

@ -212,7 +212,17 @@ func matchLen(a []byte, b []byte) int
//go:noescape
func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// cvtLZ4Block converts an LZ4 block to S2
// cvtLZ4sBlock converts an LZ4s block to S2
//
//go:noescape
func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// cvtLZ4Block converts an LZ4 block to Snappy
//
//go:noescape
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// cvtLZ4sBlock converts an LZ4s block to Snappy
//
//go:noescape
func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)

View file

@ -19271,6 +19271,491 @@ lz4_s2_dstfull:
MOVQ SI, uncompressed+48(FP)
RET
// func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// Requires: SSE2
TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64
XORQ SI, SI
MOVQ dst_base+0(FP), AX
MOVQ dst_len+8(FP), CX
MOVQ src_base+24(FP), DX
MOVQ src_len+32(FP), BX
LEAQ (DX)(BX*1), BX
LEAQ -10(AX)(CX*1), CX
XORQ DI, DI
lz4s_s2_loop:
CMPQ DX, BX
JAE lz4s_s2_corrupt
CMPQ AX, CX
JAE lz4s_s2_dstfull
MOVBQZX (DX), R8
MOVQ R8, R9
MOVQ R8, R10
SHRQ $0x04, R9
ANDQ $0x0f, R10
CMPQ R8, $0xf0
JB lz4s_s2_ll_end
lz4s_s2_ll_loop:
INCQ DX
CMPQ DX, BX
JAE lz4s_s2_corrupt
MOVBQZX (DX), R8
ADDQ R8, R9
CMPQ R8, $0xff
JEQ lz4s_s2_ll_loop
lz4s_s2_ll_end:
LEAQ (DX)(R9*1), R8
ADDQ $0x03, R10
CMPQ R8, BX
JAE lz4s_s2_corrupt
INCQ DX
INCQ R8
TESTQ R9, R9
JZ lz4s_s2_lits_done
LEAQ (AX)(R9*1), R11
CMPQ R11, CX
JAE lz4s_s2_dstfull
ADDQ R9, SI
LEAL -1(R9), R11
CMPL R11, $0x3c
JLT one_byte_lz4s_s2
CMPL R11, $0x00000100
JLT two_bytes_lz4s_s2
CMPL R11, $0x00010000
JLT three_bytes_lz4s_s2
CMPL R11, $0x01000000
JLT four_bytes_lz4s_s2
MOVB $0xfc, (AX)
MOVL R11, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_lz4s_s2
four_bytes_lz4s_s2:
MOVL R11, R12
SHRL $0x10, R12
MOVB $0xf8, (AX)
MOVW R11, 1(AX)
MOVB R12, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_lz4s_s2
three_bytes_lz4s_s2:
MOVB $0xf4, (AX)
MOVW R11, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_lz4s_s2
two_bytes_lz4s_s2:
MOVB $0xf0, (AX)
MOVB R11, 1(AX)
ADDQ $0x02, AX
CMPL R11, $0x40
JL memmove_lz4s_s2
JMP memmove_long_lz4s_s2
one_byte_lz4s_s2:
SHLB $0x02, R11
MOVB R11, (AX)
ADDQ $0x01, AX
memmove_lz4s_s2:
LEAQ (AX)(R9*1), R11
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_lz4s_s2_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_lz4s_s2_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_lz4s_s2_memmove_move_17through32
JMP emit_lit_memmove_lz4s_s2_memmove_move_33through64
emit_lit_memmove_lz4s_s2_memmove_move_8:
MOVQ (DX), R12
MOVQ R12, (AX)
JMP memmove_end_copy_lz4s_s2
emit_lit_memmove_lz4s_s2_memmove_move_8through16:
MOVQ (DX), R12
MOVQ -8(DX)(R9*1), DX
MOVQ R12, (AX)
MOVQ DX, -8(AX)(R9*1)
JMP memmove_end_copy_lz4s_s2
emit_lit_memmove_lz4s_s2_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_lz4s_s2
emit_lit_memmove_lz4s_s2_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(R9*1), X2
MOVOU -16(DX)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_lz4s_s2:
MOVQ R11, AX
JMP lz4s_s2_lits_emit_done
memmove_long_lz4s_s2:
LEAQ (AX)(R9*1), R11
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(R9*1), X2
MOVOU -16(DX)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R12
ANDL $0x0000001f, R12
MOVQ $0x00000040, R14
SUBQ R12, R14
DECQ R13
JA emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
LEAQ -32(DX)(R14*1), R12
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_lz4s_s2large_big_loop_back:
MOVOU (R12), X4
MOVOU 16(R12), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R12
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_lz4s_s2large_big_loop_back
emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32:
MOVOU -32(DX)(R14*1), X4
MOVOU -16(DX)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_lz4s_s2large_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R11, AX
lz4s_s2_lits_emit_done:
MOVQ R8, DX
lz4s_s2_lits_done:
CMPQ DX, BX
JNE lz4s_s2_match
CMPQ R10, $0x03
JEQ lz4s_s2_done
JMP lz4s_s2_corrupt
lz4s_s2_match:
CMPQ R10, $0x03
JEQ lz4s_s2_loop
LEAQ 2(DX), R8
CMPQ R8, BX
JAE lz4s_s2_corrupt
MOVWQZX (DX), R9
MOVQ R8, DX
TESTQ R9, R9
JZ lz4s_s2_corrupt
CMPQ R9, SI
JA lz4s_s2_corrupt
CMPQ R10, $0x12
JNE lz4s_s2_ml_done
lz4s_s2_ml_loop:
MOVBQZX (DX), R8
INCQ DX
ADDQ R8, R10
CMPQ DX, BX
JAE lz4s_s2_corrupt
CMPQ R8, $0xff
JEQ lz4s_s2_ml_loop
lz4s_s2_ml_done:
ADDQ R10, SI
CMPQ R9, DI
JNE lz4s_s2_docopy
// emitRepeat
emit_repeat_again_lz4_s2:
MOVL R10, R8
LEAL -4(R10), R10
CMPL R8, $0x08
JLE repeat_two_lz4_s2
CMPL R8, $0x0c
JGE cant_repeat_two_offset_lz4_s2
CMPL R9, $0x00000800
JLT repeat_two_offset_lz4_s2
cant_repeat_two_offset_lz4_s2:
CMPL R10, $0x00000104
JLT repeat_three_lz4_s2
CMPL R10, $0x00010100
JLT repeat_four_lz4_s2
CMPL R10, $0x0100ffff
JLT repeat_five_lz4_s2
LEAL -16842747(R10), R10
MOVL $0xfffb001d, (AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_lz4_s2
repeat_five_lz4_s2:
LEAL -65536(R10), R10
MOVL R10, R9
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, R9
MOVB R9, 4(AX)
ADDQ $0x05, AX
JMP lz4s_s2_loop
repeat_four_lz4_s2:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP lz4s_s2_loop
repeat_three_lz4_s2:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP lz4s_s2_loop
repeat_two_lz4_s2:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP lz4s_s2_loop
repeat_two_offset_lz4_s2:
XORQ R8, R8
LEAL 1(R8)(R10*4), R10
MOVB R9, 1(AX)
SARL $0x08, R9
SHLL $0x05, R9
ORL R9, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP lz4s_s2_loop
lz4s_s2_docopy:
MOVQ R9, DI
// emitCopy
CMPL R10, $0x40
JLE two_byte_offset_short_lz4_s2
CMPL R9, $0x00000800
JAE long_offset_short_lz4_s2
MOVL $0x00000001, R8
LEAL 16(R8), R8
MOVB R9, 1(AX)
MOVL R9, R11
SHRL $0x08, R11
SHLL $0x05, R11
ORL R11, R8
MOVB R8, (AX)
ADDQ $0x02, AX
SUBL $0x08, R10
// emitRepeat
LEAL -4(R10), R10
JMP cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
emit_repeat_again_lz4_s2_emit_copy_short_2b:
MOVL R10, R8
LEAL -4(R10), R10
CMPL R8, $0x08
JLE repeat_two_lz4_s2_emit_copy_short_2b
CMPL R8, $0x0c
JGE cant_repeat_two_offset_lz4_s2_emit_copy_short_2b
CMPL R9, $0x00000800
JLT repeat_two_offset_lz4_s2_emit_copy_short_2b
cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
CMPL R10, $0x00000104
JLT repeat_three_lz4_s2_emit_copy_short_2b
CMPL R10, $0x00010100
JLT repeat_four_lz4_s2_emit_copy_short_2b
CMPL R10, $0x0100ffff
JLT repeat_five_lz4_s2_emit_copy_short_2b
LEAL -16842747(R10), R10
MOVL $0xfffb001d, (AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_lz4_s2_emit_copy_short_2b
repeat_five_lz4_s2_emit_copy_short_2b:
LEAL -65536(R10), R10
MOVL R10, R9
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, R9
MOVB R9, 4(AX)
ADDQ $0x05, AX
JMP lz4s_s2_loop
repeat_four_lz4_s2_emit_copy_short_2b:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP lz4s_s2_loop
repeat_three_lz4_s2_emit_copy_short_2b:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP lz4s_s2_loop
repeat_two_lz4_s2_emit_copy_short_2b:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP lz4s_s2_loop
repeat_two_offset_lz4_s2_emit_copy_short_2b:
XORQ R8, R8
LEAL 1(R8)(R10*4), R10
MOVB R9, 1(AX)
SARL $0x08, R9
SHLL $0x05, R9
ORL R9, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP lz4s_s2_loop
long_offset_short_lz4_s2:
MOVB $0xee, (AX)
MOVW R9, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
// emitRepeat
emit_repeat_again_lz4_s2_emit_copy_short:
MOVL R10, R8
LEAL -4(R10), R10
CMPL R8, $0x08
JLE repeat_two_lz4_s2_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_lz4_s2_emit_copy_short
CMPL R9, $0x00000800
JLT repeat_two_offset_lz4_s2_emit_copy_short
cant_repeat_two_offset_lz4_s2_emit_copy_short:
CMPL R10, $0x00000104
JLT repeat_three_lz4_s2_emit_copy_short
CMPL R10, $0x00010100
JLT repeat_four_lz4_s2_emit_copy_short
CMPL R10, $0x0100ffff
JLT repeat_five_lz4_s2_emit_copy_short
LEAL -16842747(R10), R10
MOVL $0xfffb001d, (AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_lz4_s2_emit_copy_short
repeat_five_lz4_s2_emit_copy_short:
LEAL -65536(R10), R10
MOVL R10, R9
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, R9
MOVB R9, 4(AX)
ADDQ $0x05, AX
JMP lz4s_s2_loop
repeat_four_lz4_s2_emit_copy_short:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP lz4s_s2_loop
repeat_three_lz4_s2_emit_copy_short:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP lz4s_s2_loop
repeat_two_lz4_s2_emit_copy_short:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP lz4s_s2_loop
repeat_two_offset_lz4_s2_emit_copy_short:
XORQ R8, R8
LEAL 1(R8)(R10*4), R10
MOVB R9, 1(AX)
SARL $0x08, R9
SHLL $0x05, R9
ORL R9, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP lz4s_s2_loop
two_byte_offset_short_lz4_s2:
MOVL R10, R8
SHLL $0x02, R8
CMPL R10, $0x0c
JGE emit_copy_three_lz4_s2
CMPL R9, $0x00000800
JGE emit_copy_three_lz4_s2
LEAL -15(R8), R8
MOVB R9, 1(AX)
SHRL $0x08, R9
SHLL $0x05, R9
ORL R9, R8
MOVB R8, (AX)
ADDQ $0x02, AX
JMP lz4s_s2_loop
emit_copy_three_lz4_s2:
LEAL -2(R8), R8
MOVB R8, (AX)
MOVW R9, 1(AX)
ADDQ $0x03, AX
JMP lz4s_s2_loop
lz4s_s2_done:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ SI, uncompressed+48(FP)
MOVQ AX, dstUsed+56(FP)
RET
lz4s_s2_corrupt:
XORQ AX, AX
LEAQ -1(AX), SI
MOVQ SI, uncompressed+48(FP)
RET
lz4s_s2_dstfull:
XORQ AX, AX
LEAQ -2(AX), SI
MOVQ SI, uncompressed+48(FP)
RET
// func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// Requires: SSE2
TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64
@ -19536,3 +20021,271 @@ lz4_snappy_dstfull:
LEAQ -2(AX), SI
MOVQ SI, uncompressed+48(FP)
RET
// func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// Requires: SSE2
TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64
XORQ SI, SI
MOVQ dst_base+0(FP), AX
MOVQ dst_len+8(FP), CX
MOVQ src_base+24(FP), DX
MOVQ src_len+32(FP), BX
LEAQ (DX)(BX*1), BX
LEAQ -10(AX)(CX*1), CX
lz4s_snappy_loop:
CMPQ DX, BX
JAE lz4s_snappy_corrupt
CMPQ AX, CX
JAE lz4s_snappy_dstfull
MOVBQZX (DX), DI
MOVQ DI, R8
MOVQ DI, R9
SHRQ $0x04, R8
ANDQ $0x0f, R9
CMPQ DI, $0xf0
JB lz4s_snappy_ll_end
lz4s_snappy_ll_loop:
INCQ DX
CMPQ DX, BX
JAE lz4s_snappy_corrupt
MOVBQZX (DX), DI
ADDQ DI, R8
CMPQ DI, $0xff
JEQ lz4s_snappy_ll_loop
lz4s_snappy_ll_end:
LEAQ (DX)(R8*1), DI
ADDQ $0x03, R9
CMPQ DI, BX
JAE lz4s_snappy_corrupt
INCQ DX
INCQ DI
TESTQ R8, R8
JZ lz4s_snappy_lits_done
LEAQ (AX)(R8*1), R10
CMPQ R10, CX
JAE lz4s_snappy_dstfull
ADDQ R8, SI
LEAL -1(R8), R10
CMPL R10, $0x3c
JLT one_byte_lz4s_snappy
CMPL R10, $0x00000100
JLT two_bytes_lz4s_snappy
CMPL R10, $0x00010000
JLT three_bytes_lz4s_snappy
CMPL R10, $0x01000000
JLT four_bytes_lz4s_snappy
MOVB $0xfc, (AX)
MOVL R10, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_lz4s_snappy
four_bytes_lz4s_snappy:
MOVL R10, R11
SHRL $0x10, R11
MOVB $0xf8, (AX)
MOVW R10, 1(AX)
MOVB R11, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_lz4s_snappy
three_bytes_lz4s_snappy:
MOVB $0xf4, (AX)
MOVW R10, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_lz4s_snappy
two_bytes_lz4s_snappy:
MOVB $0xf0, (AX)
MOVB R10, 1(AX)
ADDQ $0x02, AX
CMPL R10, $0x40
JL memmove_lz4s_snappy
JMP memmove_long_lz4s_snappy
one_byte_lz4s_snappy:
SHLB $0x02, R10
MOVB R10, (AX)
ADDQ $0x01, AX
memmove_lz4s_snappy:
LEAQ (AX)(R8*1), R10
// genMemMoveShort
CMPQ R8, $0x08
JLE emit_lit_memmove_lz4s_snappy_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_lz4s_snappy_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_lz4s_snappy_memmove_move_17through32
JMP emit_lit_memmove_lz4s_snappy_memmove_move_33through64
emit_lit_memmove_lz4s_snappy_memmove_move_8:
MOVQ (DX), R11
MOVQ R11, (AX)
JMP memmove_end_copy_lz4s_snappy
emit_lit_memmove_lz4s_snappy_memmove_move_8through16:
MOVQ (DX), R11
MOVQ -8(DX)(R8*1), DX
MOVQ R11, (AX)
MOVQ DX, -8(AX)(R8*1)
JMP memmove_end_copy_lz4s_snappy
emit_lit_memmove_lz4s_snappy_memmove_move_17through32:
MOVOU (DX), X0
MOVOU -16(DX)(R8*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R8*1)
JMP memmove_end_copy_lz4s_snappy
emit_lit_memmove_lz4s_snappy_memmove_move_33through64:
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(R8*1), X2
MOVOU -16(DX)(R8*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
memmove_end_copy_lz4s_snappy:
MOVQ R10, AX
JMP lz4s_snappy_lits_emit_done
memmove_long_lz4s_snappy:
LEAQ (AX)(R8*1), R10
// genMemMoveLong
MOVOU (DX), X0
MOVOU 16(DX), X1
MOVOU -32(DX)(R8*1), X2
MOVOU -16(DX)(R8*1), X3
MOVQ R8, R12
SHRQ $0x05, R12
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R13
SUBQ R11, R13
DECQ R12
JA emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
LEAQ -32(DX)(R13*1), R11
LEAQ -32(AX)(R13*1), R14
emit_lit_memmove_long_lz4s_snappylarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R11
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_lz4s_snappylarge_big_loop_back
emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32:
MOVOU -32(DX)(R13*1), X4
MOVOU -16(DX)(R13*1), X5
MOVOA X4, -32(AX)(R13*1)
MOVOA X5, -16(AX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_lz4s_snappylarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
MOVQ R10, AX
lz4s_snappy_lits_emit_done:
MOVQ DI, DX
lz4s_snappy_lits_done:
CMPQ DX, BX
JNE lz4s_snappy_match
CMPQ R9, $0x03
JEQ lz4s_snappy_done
JMP lz4s_snappy_corrupt
lz4s_snappy_match:
CMPQ R9, $0x03
JEQ lz4s_snappy_loop
LEAQ 2(DX), DI
CMPQ DI, BX
JAE lz4s_snappy_corrupt
MOVWQZX (DX), R8
MOVQ DI, DX
TESTQ R8, R8
JZ lz4s_snappy_corrupt
CMPQ R8, SI
JA lz4s_snappy_corrupt
CMPQ R9, $0x12
JNE lz4s_snappy_ml_done
lz4s_snappy_ml_loop:
MOVBQZX (DX), DI
INCQ DX
ADDQ DI, R9
CMPQ DX, BX
JAE lz4s_snappy_corrupt
CMPQ DI, $0xff
JEQ lz4s_snappy_ml_loop
lz4s_snappy_ml_done:
ADDQ R9, SI
// emitCopy
two_byte_offset_lz4_s2:
CMPL R9, $0x40
JLE two_byte_offset_short_lz4_s2
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R9), R9
ADDQ $0x03, AX
CMPQ AX, CX
JAE lz4s_snappy_loop
JMP two_byte_offset_lz4_s2
two_byte_offset_short_lz4_s2:
MOVL R9, DI
SHLL $0x02, DI
CMPL R9, $0x0c
JGE emit_copy_three_lz4_s2
CMPL R8, $0x00000800
JGE emit_copy_three_lz4_s2
LEAL -15(DI), DI
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, DI
MOVB DI, (AX)
ADDQ $0x02, AX
JMP lz4s_snappy_loop
emit_copy_three_lz4_s2:
LEAL -2(DI), DI
MOVB DI, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP lz4s_snappy_loop
lz4s_snappy_done:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ SI, uncompressed+48(FP)
MOVQ AX, dstUsed+56(FP)
RET
lz4s_snappy_corrupt:
XORQ AX, AX
LEAQ -1(AX), SI
MOVQ SI, uncompressed+48(FP)
RET
lz4s_snappy_dstfull:
XORQ AX, AX
LEAQ -2(AX), SI
MOVQ SI, uncompressed+48(FP)
RET

467
vendor/github.com/klauspost/compress/s2/lz4sconvert.go generated vendored Normal file
View file

@ -0,0 +1,467 @@
// Copyright (c) 2022 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"encoding/binary"
"fmt"
)
// LZ4sConverter provides conversion from LZ4s.
// (Intel modified LZ4 Blocks)
// https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf
// LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format.
// The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData.
// The LZ4s block returned by the Intel® QAT hardware can be used by an external
// software post-processing to generate other compressed data formats.
// The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses
// the same high-level formatting as LZ4 block format with the following encoding changes:
// For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte.
// ONLY "Min match of 4 bytes" is supported.
type LZ4sConverter struct {
}
// ConvertBlock will convert an LZ4s block and append it as an S2
// block without block length to dst.
// The uncompressed size is returned as well.
// dst must have capacity to contain the entire compressed block.
func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
if len(src) == 0 {
return dst, 0, nil
}
const debug = false
const inline = true
const lz4MinMatch = 3
s, d := 0, len(dst)
dst = dst[:cap(dst)]
if !debug && hasAmd64Asm {
res, sz := cvtLZ4sBlockAsm(dst[d:], src)
if res < 0 {
const (
errCorrupt = -1
errDstTooSmall = -2
)
switch res {
case errCorrupt:
return nil, 0, ErrCorrupt
case errDstTooSmall:
return nil, 0, ErrDstTooSmall
default:
return nil, 0, fmt.Errorf("unexpected result: %d", res)
}
}
if d+sz > len(dst) {
return nil, 0, ErrDstTooSmall
}
return dst[:d+sz], res, nil
}
dLimit := len(dst) - 10
var lastOffset uint16
var uncompressed int
if debug {
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}
for {
if s >= len(src) {
return dst[:d], 0, ErrCorrupt
}
// Read literal info
token := src[s]
ll := int(token >> 4)
ml := int(lz4MinMatch + (token & 0xf))
// If upper nibble is 15, literal length is extended
if token >= 0xf0 {
for {
s++
if s >= len(src) {
if debug {
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
}
return dst[:d], 0, ErrCorrupt
}
val := src[s]
ll += int(val)
if val != 255 {
break
}
}
}
// Skip past token
if s+ll >= len(src) {
if debug {
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
}
return nil, 0, ErrCorrupt
}
s++
if ll > 0 {
if d+ll > dLimit {
return nil, 0, ErrDstTooSmall
}
if debug {
fmt.Printf("emit %d literals\n", ll)
}
d += emitLiteralGo(dst[d:], src[s:s+ll])
s += ll
uncompressed += ll
}
// Check if we are done...
if ml == lz4MinMatch {
if s == len(src) {
break
}
// 0 bytes.
continue
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return nil, 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
s += 2
if offset == 0 {
if debug {
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
}
return nil, 0, ErrCorrupt
}
if int(offset) > uncompressed {
if debug {
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
}
return nil, 0, ErrCorrupt
}
if ml == lz4MinMatch+15 {
for {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
s++
ml += int(val)
if val != 255 {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
break
}
}
}
if offset == lastOffset {
if debug {
fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
}
if !inline {
d += emitRepeat16(dst[d:], offset, ml)
} else {
length := ml
dst := dst[d:]
for len(dst) > 5 {
// Repeat offset, make length cheaper
length -= 4
if length <= 4 {
dst[0] = uint8(length)<<2 | tagCopy1
dst[1] = 0
d += 2
break
}
if length < 8 && offset < 2048 {
// Encode WITH offset
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
d += 2
break
}
if length < (1<<8)+4 {
length -= 4
dst[2] = uint8(length)
dst[1] = 0
dst[0] = 5<<2 | tagCopy1
d += 3
break
}
if length < (1<<16)+(1<<8) {
length -= 1 << 8
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 6<<2 | tagCopy1
d += 4
break
}
const maxRepeat = (1 << 24) - 1
length -= 1 << 16
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
length = maxRepeat - 4
}
dst[4] = uint8(length >> 16)
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 7<<2 | tagCopy1
if left > 0 {
d += 5 + emitRepeat16(dst[5:], offset, left)
break
}
d += 5
break
}
}
} else {
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
}
if !inline {
d += emitCopy16(dst[d:], offset, ml)
} else {
length := ml
dst := dst[d:]
for len(dst) > 5 {
// Offset no more than 2 bytes.
if length > 64 {
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
d += off + emitRepeat16(dst[off:], offset, length)
break
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = uint8(length-1)<<2 | tagCopy2
d += 3
break
}
// Emit the remaining copy, encoded as 2 bytes.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
d += 2
break
}
}
lastOffset = offset
}
uncompressed += ml
if d > dLimit {
return nil, 0, ErrDstTooSmall
}
}
return dst[:d], uncompressed, nil
}
// ConvertBlockSnappy will convert an LZ4s block and append it
// as a Snappy block without block length to dst.
// The uncompressed size is returned as well.
// dst must have capacity to contain the entire compressed block.
func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
if len(src) == 0 {
return dst, 0, nil
}
const debug = false
const lz4MinMatch = 3
s, d := 0, len(dst)
dst = dst[:cap(dst)]
// Use assembly when possible
if !debug && hasAmd64Asm {
res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src)
if res < 0 {
const (
errCorrupt = -1
errDstTooSmall = -2
)
switch res {
case errCorrupt:
return nil, 0, ErrCorrupt
case errDstTooSmall:
return nil, 0, ErrDstTooSmall
default:
return nil, 0, fmt.Errorf("unexpected result: %d", res)
}
}
if d+sz > len(dst) {
return nil, 0, ErrDstTooSmall
}
return dst[:d+sz], res, nil
}
dLimit := len(dst) - 10
var uncompressed int
if debug {
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}
for {
if s >= len(src) {
return nil, 0, ErrCorrupt
}
// Read literal info
token := src[s]
ll := int(token >> 4)
ml := int(lz4MinMatch + (token & 0xf))
// If upper nibble is 15, literal length is extended
if token >= 0xf0 {
for {
s++
if s >= len(src) {
if debug {
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
ll += int(val)
if val != 255 {
break
}
}
}
// Skip past token
if s+ll >= len(src) {
if debug {
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
}
return nil, 0, ErrCorrupt
}
s++
if ll > 0 {
if d+ll > dLimit {
return nil, 0, ErrDstTooSmall
}
if debug {
fmt.Printf("emit %d literals\n", ll)
}
d += emitLiteralGo(dst[d:], src[s:s+ll])
s += ll
uncompressed += ll
}
// Check if we are done...
if ml == lz4MinMatch {
if s == len(src) {
break
}
// 0 bytes.
continue
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return nil, 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
s += 2
if offset == 0 {
if debug {
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
}
return nil, 0, ErrCorrupt
}
if int(offset) > uncompressed {
if debug {
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
}
return nil, 0, ErrCorrupt
}
if ml == lz4MinMatch+15 {
for {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
s++
ml += int(val)
if val != 255 {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
break
}
}
}
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
}
length := ml
// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
for length > 0 {
if d >= dLimit {
return nil, 0, ErrDstTooSmall
}
// Offset no more than 2 bytes.
if length > 64 {
// Emit a length 64 copy, encoded as 3 bytes.
dst[d+2] = uint8(offset >> 8)
dst[d+1] = uint8(offset)
dst[d+0] = 63<<2 | tagCopy2
length -= 64
d += 3
continue
}
if length >= 12 || offset >= 2048 || length < 4 {
// Emit the remaining copy, encoded as 3 bytes.
dst[d+2] = uint8(offset >> 8)
dst[d+1] = uint8(offset)
dst[d+0] = uint8(length-1)<<2 | tagCopy2
d += 3
break
}
// Emit the remaining copy, encoded as 2 bytes.
dst[d+1] = uint8(offset)
dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
d += 2
break
}
uncompressed += ml
if d > dLimit {
return nil, 0, ErrDstTooSmall
}
}
return dst[:d], uncompressed, nil
}

View file

@ -9,6 +9,7 @@ import (
"encoding/binary"
"errors"
"fmt"
"hash/crc32"
"io"
"os"
"path/filepath"
@ -442,6 +443,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
}
}
var err error
if debugDecoder {
println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals))
}
huff, literals, err = huff0.ReadTable(literals, huff)
if err != nil {
println("reading huffman table:", err)

View file

@ -54,7 +54,7 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
func (b *byteBuf) readByte() (byte, error) {
bb := *b
if len(bb) < 1 {
return 0, nil
return 0, io.ErrUnexpectedEOF
}
r := bb[0]
*b = bb[1:]

View file

@ -32,7 +32,6 @@ type match struct {
length int32
rep int32
est int32
_ [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
}
const highScore = 25000
@ -189,12 +188,6 @@ encodeLoop:
panic("offset0 was 0")
}
bestOf := func(a, b *match) *match {
if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 {
return a
}
return b
}
const goodEnough = 100
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
@ -202,40 +195,41 @@ encodeLoop:
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
// Set m to a match at offset if it looks like that will improve compression.
improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{s: s, est: highScore}
return
}
if debugAsserts {
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
m.estBits(bitsPerByte)
return m
cand := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
cand.estBits(bitsPerByte)
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
*m = cand
}
}
m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))
best := match{s: s, est: highScore}
improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
if canRepeat && best.length < goodEnough {
cv32 := uint32(cv >> 8)
spp := s + 1
m1 := matchAt(spp-offset1, spp, cv32, 1)
m2 := matchAt(spp-offset2, spp, cv32, 2)
m3 := matchAt(spp-offset3, spp, cv32, 3)
best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
improve(&best, spp-offset1, spp, cv32, 1)
improve(&best, spp-offset2, spp, cv32, 2)
improve(&best, spp-offset3, spp, cv32, 3)
if best.length > 0 {
cv32 = uint32(cv >> 24)
spp += 2
m1 := matchAt(spp-offset1, spp, cv32, 1)
m2 := matchAt(spp-offset2, spp, cv32, 2)
m3 := matchAt(spp-offset3, spp, cv32, 3)
best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
improve(&best, spp-offset1, spp, cv32, 1)
improve(&best, spp-offset2, spp, cv32, 2)
improve(&best, spp-offset3, spp, cv32, 3)
}
}
// Load next and check...
@ -262,18 +256,16 @@ encodeLoop:
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
// Short at s+1
m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
// Long at s+1, s+2
m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5))
improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
improve(&best, candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
improve(&best, candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
if false {
// Short at s+3.
// Too often worse...
m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
best = bestOf(best, &m)
improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
}
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
@ -284,13 +276,10 @@ encodeLoop:
// For this compression level 2 yields the best results.
const skipBeginning = 2
if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
bestEnd := bestOf(best, &m)
improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
bestEnd = bestOf(bestEnd, &m)
improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
}
best = bestEnd
}
}
}

View file

@ -314,9 +314,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
}
size := ll + ml + len(out)
if size-startSize > maxBlockSize {
if size-startSize == 424242 {
panic("here")
}
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
if size > cap(out) {
@ -427,8 +424,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
}
}
// Check if space for literals
if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
if size := len(s.literals) + len(out) - startSize; size > maxBlockSize {
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}

View file

@ -148,7 +148,6 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
err := br.close()
if err != nil {

2
vendor/modules.txt vendored
View file

@ -335,7 +335,7 @@ github.com/jmespath/go-jmespath
# github.com/jpillora/backoff v1.0.0
## explicit; go 1.13
github.com/jpillora/backoff
# github.com/klauspost/compress v1.16.0
# github.com/klauspost/compress v1.16.3
## explicit; go 1.18
github.com/klauspost/compress
github.com/klauspost/compress/flate