vendor: update github.com/klauspost/compress from v1.15.12 to v1.15.13

2024-11-21 14:44:00 +00:00 · 2022-12-11 02:10:51 -08:00 · 2022-12-11 02:10:51 -08:00 · 4a4b3c2462
commit 4a4b3c2462
parent 9f642d10ff
24 changed files with 452 additions and 442 deletions
--- a/go.mod
+++ b/go.mod
@ -23,7 +23,7 @@ require (
 	github.com/golang/snappy v0.0.4
 	github.com/googleapis/gax-go/v2 v2.7.0
 	github.com/influxdata/influxdb v1.11.0
-	github.com/klauspost/compress v1.15.12
+	github.com/klauspost/compress v1.15.13
 	github.com/prometheus/prometheus v0.40.6
 	github.com/urfave/cli/v2 v2.23.7
 	github.com/valyala/fastjson v1.6.3
--- a/go.sum
+++ b/go.sum
@ -320,8 +320,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
 github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
-github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM=
+github.com/klauspost/compress v1.15.13 h1:NFn1Wr8cfnenSJSA46lLq4wHCcBzKTSjnBIexDMMOV0=
-github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM=
+github.com/klauspost/compress v1.15.13/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM=
 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@ -17,6 +17,11 @@ This package provides various compression algorithms.
 # changelog
 * Oct 26, 2022 (v1.15.12)
 	* zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680
 	* gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683
 * Sept 26, 2022 (v1.15.11)
 	* flate: Improve level 1-3 compression  https://github.com/klauspost/compress/pull/678
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@ -365,29 +365,29 @@ func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
 	m := uint32(0)
 	if len(s.prevTable) > 0 {
 		for i, v := range s.count[:] {
 			if v == 0 {
 				continue
 			}
 			if v > m {
 				m = v
 			}
-			if v > 0 {
+			s.symbolLen = uint16(i) + 1
-				s.symbolLen = uint16(i) + 1
+			if i >= len(s.prevTable) {
-				if i >= len(s.prevTable) {
+				reuse = false
-					reuse = false
+			} else if s.prevTable[i].nBits == 0 {
-				} else {
+				reuse = false
 					if s.prevTable[i].nBits == 0 {
 						reuse = false
 					}
 				}
 			}
 		}
 		return int(m), reuse
 	}
 	for i, v := range s.count[:] {
 		if v == 0 {
 			continue
 		}
 		if v > m {
 			m = v
 		}
-		if v > 0 {
+		s.symbolLen = uint16(i) + 1
 			s.symbolLen = uint16(i) + 1
 		}
 	}
 	return int(m), false
 }
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@ -82,8 +82,9 @@ type blockDec struct {
 	err error
-	// Check against this crc
+	// Check against this crc, if hasCRC is true.
-	checkCRC []byte
+	checkCRC uint32
 	hasCRC   bool
 	// Frame to use for singlethreaded decoding.
 	// Should not be used by the decoder itself since parent may be another frame.
--- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@ -4,7 +4,6 @@
 package zstd
 import (
 	"bytes"
 	"encoding/binary"
 	"errors"
 	"io"
@ -102,8 +101,8 @@ func (h *Header) Decode(in []byte) error {
 	}
 	h.HeaderSize += 4
 	b, in := in[:4], in[4:]
-	if !bytes.Equal(b, frameMagic) {
+	if string(b) != frameMagic {
-		if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
+		if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
 			return ErrMagicMismatch
 		}
 		if len(in) < 4 {
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@ -5,7 +5,6 @@
 package zstd
 import (
 	"bytes"
 	"context"
 	"encoding/binary"
 	"io"
@ -459,7 +458,11 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 		println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
 	}
-	if !d.o.ignoreChecksum && len(next.b) > 0 {
+	if d.o.ignoreChecksum {
 		return true
 	}
 	if len(next.b) > 0 {
 		n, err := d.current.crc.Write(next.b)
 		if err == nil {
 			if n != len(next.b) {
@ -467,18 +470,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 			}
 		}
 	}
-	if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 {
+	if next.err == nil && next.d != nil && next.d.hasCRC {
-		got := d.current.crc.Sum64()
+		got := uint32(d.current.crc.Sum64())
-		var tmp [4]byte
+		if got != next.d.checkCRC {
 		binary.LittleEndian.PutUint32(tmp[:], uint32(got))
 		if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
 			if debugDecoder {
-				println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
+				printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC)
 			}
 			d.current.err = ErrCRCMismatch
 		} else {
 			if debugDecoder {
-				println("CRC ok", tmp[:])
+				printf("CRC ok %08x\n", got)
 			}
 		}
 	}
@ -918,18 +919,22 @@ decodeStream:
 				println("next block returned error:", err)
 			}
 			dec.err = err
-			dec.checkCRC = nil
+			dec.hasCRC = false
 			if dec.Last && frame.HasCheckSum && err == nil {
 				crc, err := frame.rawInput.readSmall(4)
-				if err != nil {
+				if len(crc) < 4 {
 					if err == nil {
 						err = io.ErrUnexpectedEOF
 					}
 					println("CRC missing?", err)
 					dec.err = err
-				}
+				} else {
-				var tmp [4]byte
+					dec.checkCRC = binary.LittleEndian.Uint32(crc)
-				copy(tmp[:], crc)
+					dec.hasCRC = true
-				dec.checkCRC = tmp[:]
+					if debugDecoder {
-				if debugDecoder {
+						printf("found crc to check: %08x\n", dec.checkCRC)
-					println("found crc to check:", dec.checkCRC)
+					}
 				}
 			}
 			err = dec.err
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@ -1,7 +1,6 @@
 package zstd
 import (
 	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
@ -20,7 +19,7 @@ type dict struct {
 	content []byte
 }
-var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
+const dictMagic = "\x37\xa4\x30\xec"
 // ID returns the dictionary id or 0 if d is nil.
 func (d *dict) ID() uint32 {
@ -50,7 +49,7 @@ func loadDict(b []byte) (*dict, error) {
 		ofDec: sequenceDec{fse: &fseDecoder{}},
 		mlDec: sequenceDec{fse: &fseDecoder{}},
 	}
-	if !bytes.Equal(b[:4], dictMagic[:]) {
+	if string(b[:4]) != dictMagic {
 		return nil, ErrMagicMismatch
 	}
 	d.id = binary.LittleEndian.Uint32(b[4:8])
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@ -16,6 +16,7 @@ type fastBase struct {
 	cur int32
 	// maximum offset. Should be at least 2x block size.
 	maxMatchOff int32
 	bufferReset int32
 	hist        []byte
 	crc         *xxhash.Digest
 	tmp         [8]byte
@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc {
 }
 func (e *fastBase) addBlock(src []byte) int32 {
-	if debugAsserts && e.cur > bufferReset {
+	if debugAsserts && e.cur > e.bufferReset {
-		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+		panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
 	}
 	// check if we have space already
 	if len(e.hist)+len(src) > cap(e.hist) {
@ -126,24 +127,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
 			panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
 		}
 	}
-	a := src[s:]
+	return int32(matchLen(src[s:], src[t:]))
 	b := src[t:]
 	b = b[:len(a)]
 	end := int32((len(a) >> 3) << 3)
 	for i := int32(0); i < end; i += 8 {
 		if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
 			return i + int32(bits.TrailingZeros64(diff)>>3)
 		}
 	}
 	a = a[end:]
 	b = b[end:]
 	for i := range a {
 		if a[i] != b[i] {
 			return int32(i) + end
 		}
 	}
 	return int32(len(a)) + end
 }
 // Reset the encoding table.
@ -171,7 +155,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
 	// We offset current position so everything will be out of reach.
 	// If above reset line, history will be purged.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += e.maxMatchOff + int32(len(e.hist))
 	}
 	e.hist = e.hist[:0]
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@ -85,14 +85,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
+			e.table = [bestShortTableSize]prevEntry{}
-				e.table[i] = prevEntry{}
+			e.longTable = [bestLongTableSize]prevEntry{}
 			}
 			for i := range e.longTable[:] {
 				e.longTable[i] = prevEntry{}
 			}
 			e.cur = e.maxMatchOff
 			break
 		}
@ -193,8 +189,8 @@ encodeLoop:
 			panic("offset0 was 0")
 		}
-		bestOf := func(a, b match) match {
+		bestOf := func(a, b *match) *match {
-			if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
+			if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 {
 				return a
 			}
 			return b
@ -220,22 +216,26 @@ encodeLoop:
 			return m
 		}
-		best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
+		m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
-		best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+		m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
-		best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+		m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
 		m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)
 		best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4))
 		if canRepeat && best.length < goodEnough {
 			cv32 := uint32(cv >> 8)
 			spp := s + 1
-			best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
+			m1 := matchAt(spp-offset1, spp, cv32, 1)
-			best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
+			m2 := matchAt(spp-offset2, spp, cv32, 2)
-			best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+			m3 := matchAt(spp-offset3, spp, cv32, 3)
 			best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
 			if best.length > 0 {
 				cv32 = uint32(cv >> 24)
 				spp += 2
-				best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
+				m1 := matchAt(spp-offset1, spp, cv32, 1)
-				best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
+				m2 := matchAt(spp-offset2, spp, cv32, 2)
-				best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
+				m3 := matchAt(spp-offset3, spp, cv32, 3)
 				best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3))
 			}
 		}
 		// Load next and check...
@ -262,26 +262,33 @@ encodeLoop:
 			candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
 			// Short at s+1
-			best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+			m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)
 			// Long at s+1, s+2
-			best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
+			m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)
-			best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
+			m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)
-			best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
+			m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
-			best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+			m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
 			best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5))
 			if false {
 				// Short at s+3.
 				// Too often worse...
-				best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
+				m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
 				best = bestOf(best, &m)
 			}
 			// See if we can find a better match by checking where the current best ends.
 			// Use that offset to see if we can find a better full match.
 			if sAt := best.s + best.length; sAt < sLimit {
 				nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
 				candidateEnd := e.longTable[nextHashL]
-				if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
+				// Start check at a fixed offset to allow for a few mismatches.
-					bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
+				// For this compression level 2 yields the best results.
-					if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
+				const skipBeginning = 2
-						bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+				if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
 					m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
 					bestEnd := bestOf(best, &m)
 					if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
 						m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
 						bestEnd = bestOf(bestEnd, &m)
 					}
 					best = bestEnd
 				}
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
+			e.table = [betterShortTableSize]tableEntry{}
-				e.table[i] = tableEntry{}
+			e.longTable = [betterLongTableSize]prevEntry{}
 			}
 			for i := range e.longTable[:] {
 				e.longTable[i] = prevEntry{}
 			}
 			e.cur = e.maxMatchOff
 			break
 		}
@ -587,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 	)
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
+			e.table = [dFastShortTableSize]tableEntry{}
-				e.table[i] = tableEntry{}
+			e.longTable = [dFastLongTableSize]tableEntry{}
 			}
 			for i := range e.longTable[:] {
 				e.longTable[i] = tableEntry{}
 			}
 			e.cur = e.maxMatchOff
 			break
 		}
@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	)
 	// Protect against e.cur wraparound.
-	if e.cur >= bufferReset {
+	if e.cur >= e.bufferReset {
 		for i := range e.table[:] {
 			e.table[i] = tableEntry{}
 		}
@ -685,7 +681,7 @@ encodeLoop:
 	}
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += int32(len(src))
 	}
 }
@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 	)
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 	)
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
 			for i := range e.table[:] {
 				e.table[i] = tableEntry{}
@ -310,7 +310,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 	}
 	// Protect against e.cur wraparound.
-	if e.cur >= bufferReset {
+	if e.cur >= e.bufferReset {
 		for i := range e.table[:] {
 			e.table[i] = tableEntry{}
 		}
@ -538,7 +538,7 @@ encodeLoop:
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
-	if e.cur < bufferReset {
+	if e.cur < e.bufferReset {
 		e.cur += int32(len(src))
 	}
 }
@ -555,11 +555,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
 		return
 	}
 	// Protect against e.cur wraparound.
-	for e.cur >= bufferReset {
+	for e.cur >= e.bufferReset-int32(len(e.hist)) {
 		if len(e.hist) == 0 {
-			for i := range e.table[:] {
+			e.table = [tableSize]tableEntry{}
 				e.table[i] = tableEntry{}
 			}
 			e.cur = e.maxMatchOff
 			break
 		}
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@ -8,6 +8,7 @@ import (
 	"crypto/rand"
 	"fmt"
 	"io"
 	"math"
 	rdebug "runtime/debug"
 	"sync"
@ -639,3 +640,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 	}
 	return dst
 }
 // MaxEncodedSize returns the expected maximum
 // size of an encoded block or stream.
 func (e *Encoder) MaxEncodedSize(size int) int {
 	frameHeader := 4 + 2 // magic + frame header & window descriptor
 	if e.o.dict != nil {
 		frameHeader += 4
 	}
 	// Frame content size:
 	if size < 256 {
 		frameHeader++
 	} else if size < 65536+256 {
 		frameHeader += 2
 	} else if size < math.MaxInt32 {
 		frameHeader += 4
 	} else {
 		frameHeader += 8
 	}
 	// Final crc
 	if e.o.crc {
 		frameHeader += 4
 	}
 	// Max overhead is 3 bytes/block.
 	// There cannot be 0 blocks.
 	blocks := (size + e.o.blockSize) / e.o.blockSize
 	// Combine, add padding.
 	maxSz := frameHeader + 3*blocks + size
 	if e.o.pad > 1 {
 		maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad))
 	}
 	return maxSz
 }
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@ -3,6 +3,7 @@ package zstd
 import (
 	"errors"
 	"fmt"
 	"math"
 	"runtime"
 	"strings"
 )
@ -47,22 +48,22 @@ func (o encoderOptions) encoder() encoder {
 	switch o.level {
 	case SpeedFastest:
 		if o.dict != nil {
-			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 		}
-		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 	case SpeedDefault:
 		if o.dict != nil {
-			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
+			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
 		}
-		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 	case SpeedBetterCompression:
 		if o.dict != nil {
-			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
 		}
-		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 	case SpeedBestCompression:
-		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
 	}
 	panic("unknown compression level")
 }
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@ -5,7 +5,7 @@
 package zstd
 import (
-	"bytes"
+	"encoding/binary"
 	"encoding/hex"
 	"errors"
 	"io"
@ -43,9 +43,9 @@ const (
 	MaxWindowSize = 1 << 29
 )
-var (
+const (
-	frameMagic          = []byte{0x28, 0xb5, 0x2f, 0xfd}
+	frameMagic          = "\x28\xb5\x2f\xfd"
-	skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
+	skippableFrameMagic = "\x2a\x4d\x18"
 )
 func newFrameDec(o decoderOptions) *frameDec {
@ -89,9 +89,9 @@ func (d *frameDec) reset(br byteBuffer) error {
 			copy(signature[1:], b)
 		}
-		if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
+		if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
 			if debugDecoder {
-				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
+				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic)))
 			}
 			// Break if not skippable frame.
 			break
@ -114,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error {
 			return err
 		}
 	}
-	if !bytes.Equal(signature[:], frameMagic) {
+	if string(signature[:]) != frameMagic {
 		if debugDecoder {
-			println("Got magic numbers: ", signature, "want:", frameMagic)
+			println("Got magic numbers: ", signature, "want:", []byte(frameMagic))
 		}
 		return ErrMagicMismatch
 	}
@ -305,7 +305,7 @@ func (d *frameDec) checkCRC() error {
 	}
 	// We can overwrite upper tmp now
-	want, err := d.rawInput.readSmall(4)
+	buf, err := d.rawInput.readSmall(4)
 	if err != nil {
 		println("CRC missing?", err)
 		return err
@ -315,22 +315,17 @@ func (d *frameDec) checkCRC() error {
 		return nil
 	}
-	var tmp [4]byte
+	want := binary.LittleEndian.Uint32(buf[:4])
-	got := d.crc.Sum64()
+	got := uint32(d.crc.Sum64())
 	// Flip to match file order.
 	tmp[0] = byte(got >> 0)
 	tmp[1] = byte(got >> 8)
 	tmp[2] = byte(got >> 16)
 	tmp[3] = byte(got >> 24)
-	if !bytes.Equal(tmp[:], want) {
+	if got != want {
 		if debugDecoder {
-			println("CRC Check Failed:", tmp[:], "!=", want)
+			printf("CRC check failed: got %08x, want %08x\n", got, want)
 		}
 		return ErrCRCMismatch
 	}
 	if debugDecoder {
-		println("CRC ok", tmp[:])
+		printf("CRC ok %08x\n", got)
 	}
 	return nil
 }
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
@ -2,12 +2,7 @@
 VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
-
+xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
 [![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
 [![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
 xxhash is a Go implementation of the 64-bit
 [xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
 high-quality hashing algorithm that is much faster than anything in the Go
 standard library.
@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error)
 func (*Digest) Sum64() uint64
 ```
-This implementation provides a fast pure-Go implementation and an even faster
+The package is written with optimized pure Go and also contains even faster
-assembly implementation for amd64.
+assembly implementations for amd64 and arm64. If desired, the `purego` build tag
 opts into using the Go code even on those architectures.
 [xxHash]: http://cyan4973.github.io/xxHash/
 ## Compatibility
 This package is in a module and the latest code is in version 2 of the module.
 You need a version of Go with at least "minimal module compatibility" to use
 github.com/cespare/xxhash/v2:
 * 1.9.7+ for Go 1.9
 * 1.10.3+ for Go 1.10
 * Go 1.11 or later
 I recommend using the latest release of Go.
 ## Benchmarks
 Here are some quick benchmarks comparing the pure-Go and assembly
 implementations of Sum64.
-| input size | purego | asm |
+| input size | purego    | asm       |
-| --- | --- | --- |
+| ---------- | --------- | --------- |
-| 5 B   |  979.66 MB/s |  1291.17 MB/s  |
+| 4 B        |  1.3 GB/s |  1.2 GB/s |
-| 100 B | 7475.26 MB/s | 7973.40 MB/s  |
+| 16 B       |  2.9 GB/s |  3.5 GB/s |
-| 4 KB  | 17573.46 MB/s | 17602.65 MB/s |
+| 100 B      |  6.9 GB/s |  8.1 GB/s |
-| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
+| 4 KB       | 11.7 GB/s | 16.7 GB/s |
 | 10 MB      | 12.0 GB/s | 17.3 GB/s |
-These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
+These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
-the following commands under Go 1.11.2:
+CPU using the following commands under Go 1.19.2:
 ```
-$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
-$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
 ```
 ## Projects using this package
 - [InfluxDB](https://github.com/influxdata/influxdb)
 - [Prometheus](https://github.com/prometheus/prometheus)
 - [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
 - [FreeCache](https://github.com/coocood/freecache)
 - [FastCache](https://github.com/VictoriaMetrics/fastcache)
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
@ -18,19 +18,11 @@ const (
 	prime5 uint64 = 2870177450012600261
 )
-// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
+// Store the primes in an array as well.
-// possible in the Go code is worth a small (but measurable) performance boost
+//
-// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
+// The consts are used when possible in Go code to avoid MOVs but we need a
-// convenience in the Go code in a few places where we need to intentionally
+// contiguous array of the assembly code.
-// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
+var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
 // result overflows a uint64).
 var (
 	prime1v = prime1
 	prime2v = prime2
 	prime3v = prime3
 	prime4v = prime4
 	prime5v = prime5
 )
 // Digest implements hash.Hash64.
 type Digest struct {
@ -52,10 +44,10 @@ func New() *Digest {
 // Reset clears the Digest's state so that it can be reused.
 func (d *Digest) Reset() {
-	d.v1 = prime1v + prime2
+	d.v1 = primes[0] + prime2
 	d.v2 = prime2
 	d.v3 = 0
-	d.v4 = -prime1v
+	d.v4 = -primes[0]
 	d.total = 0
 	d.n = 0
 }
@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
 	n = len(b)
 	d.total += uint64(n)
 	memleft := d.mem[d.n&(len(d.mem)-1):]
 	if d.n+n < 32 {
 		// This new data doesn't even fill the current block.
-		copy(d.mem[d.n:], b)
+		copy(memleft, b)
 		d.n += n
 		return
 	}
 	if d.n > 0 {
 		// Finish off the partial block.
-		copy(d.mem[d.n:], b)
+		c := copy(memleft, b)
 		d.v1 = round(d.v1, u64(d.mem[0:8]))
 		d.v2 = round(d.v2, u64(d.mem[8:16]))
 		d.v3 = round(d.v3, u64(d.mem[16:24]))
 		d.v4 = round(d.v4, u64(d.mem[24:32]))
-		b = b[32-d.n:]
+		b = b[c:]
 		d.n = 0
 	}
@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 {
 	h += d.total
-	i, end := 0, d.n
+	b := d.mem[:d.n&(len(d.mem)-1)]
-	for ; i+8 <= end; i += 8 {
+	for ; len(b) >= 8; b = b[8:] {
-		k1 := round(0, u64(d.mem[i:i+8]))
+		k1 := round(0, u64(b[:8]))
 		h ^= k1
 		h = rol27(h)*prime1 + prime4
 	}
-	if i+4 <= end {
+	if len(b) >= 4 {
-		h ^= uint64(u32(d.mem[i:i+4])) * prime1
+		h ^= uint64(u32(b[:4])) * prime1
 		h = rol23(h)*prime2 + prime3
-		i += 4
+		b = b[4:]
 	}
-	for i < end {
+	for ; len(b) > 0; b = b[1:] {
-		h ^= uint64(d.mem[i]) * prime5
+		h ^= uint64(b[0]) * prime5
 		h = rol11(h) * prime1
 		i++
 	}
 	h ^= h >> 33
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
@ -1,3 +1,4 @@
 //go:build !appengine && gc && !purego && !noasm
 // +build !appengine
 // +build gc
 // +build !purego
@ -5,212 +6,205 @@
 #include "textflag.h"
-// Register allocation:
+// Registers:
-// AX	h
+#define h      AX
-// SI	pointer to advance through b
+#define d      AX
-// DX	n
+#define p      SI // pointer to advance through b
-// BX	loop end
+#define n      DX
-// R8	v1, k1
+#define end    BX // loop end
-// R9	v2
+#define v1     R8
-// R10	v3
+#define v2     R9
-// R11	v4
+#define v3     R10
-// R12	tmp
+#define v4     R11
-// R13	prime1v
+#define x      R12
-// R14	prime2v
+#define prime1 R13
-// DI	prime4v
+#define prime2 R14
 #define prime4 DI
-// round reads from and advances the buffer pointer in SI.
+#define round(acc, x) \
-// It assumes that R13 has prime1v and R14 has prime2v.
+	IMULQ prime2, x   \
-#define round(r) \
+	ADDQ  x, acc      \
-	MOVQ  (SI), R12 \
+	ROLQ  $31, acc    \
-	ADDQ  $8, SI    \
+	IMULQ prime1, acc
 	IMULQ R14, R12  \
 	ADDQ  R12, r    \
 	ROLQ  $31, r    \
 	IMULQ R13, r
-// mergeRound applies a merge round on the two registers acc and val.
+// round0 performs the operation x = round(0, x).
-// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
+#define round0(x) \
-#define mergeRound(acc, val) \
+	IMULQ prime2, x \
-	IMULQ R14, val \
+	ROLQ  $31, x    \
-	ROLQ  $31, val \
+	IMULQ prime1, x
-	IMULQ R13, val \
+
-	XORQ  val, acc \
+// mergeRound applies a merge round on the two registers acc and x.
-	IMULQ R13, acc \
+// It assumes that prime1, prime2, and prime4 have been loaded.
-	ADDQ  DI, acc
+#define mergeRound(acc, x) \
 	round0(x)         \
 	XORQ  x, acc      \
 	IMULQ prime1, acc \
 	ADDQ  prime4, acc
 // blockLoop processes as many 32-byte blocks as possible,
 // updating v1, v2, v3, and v4. It assumes that there is at least one block
 // to process.
 #define blockLoop() \
 loop:  \
 	MOVQ +0(p), x  \
 	round(v1, x)   \
 	MOVQ +8(p), x  \
 	round(v2, x)   \
 	MOVQ +16(p), x \
 	round(v3, x)   \
 	MOVQ +24(p), x \
 	round(v4, x)   \
 	ADDQ $32, p    \
 	CMPQ p, end    \
 	JLE  loop
 // func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOSPLIT, $0-32
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
 	// Load fixed primes.
-	MOVQ ·prime1v(SB), R13
+	MOVQ ·primes+0(SB), prime1
-	MOVQ ·prime2v(SB), R14
+	MOVQ ·primes+8(SB), prime2
-	MOVQ ·prime4v(SB), DI
+	MOVQ ·primes+24(SB), prime4
 	// Load slice.
-	MOVQ b_base+0(FP), SI
+	MOVQ b_base+0(FP), p
-	MOVQ b_len+8(FP), DX
+	MOVQ b_len+8(FP), n
-	LEAQ (SI)(DX*1), BX
+	LEAQ (p)(n*1), end
 	// The first loop limit will be len(b)-32.
-	SUBQ $32, BX
+	SUBQ $32, end
 	// Check whether we have at least one block.
-	CMPQ DX, $32
+	CMPQ n, $32
 	JLT  noBlocks
 	// Set up initial state (v1, v2, v3, v4).
-	MOVQ R13, R8
+	MOVQ prime1, v1
-	ADDQ R14, R8
+	ADDQ prime2, v1
-	MOVQ R14, R9
+	MOVQ prime2, v2
-	XORQ R10, R10
+	XORQ v3, v3
-	XORQ R11, R11
+	XORQ v4, v4
-	SUBQ R13, R11
+	SUBQ prime1, v4
-	// Loop until SI > BX.
+	blockLoop()
 blockLoop:
 	round(R8)
 	round(R9)
 	round(R10)
 	round(R11)
-	CMPQ SI, BX
+	MOVQ v1, h
-	JLE  blockLoop
+	ROLQ $1, h
 	MOVQ v2, x
 	ROLQ $7, x
 	ADDQ x, h
 	MOVQ v3, x
 	ROLQ $12, x
 	ADDQ x, h
 	MOVQ v4, x
 	ROLQ $18, x
 	ADDQ x, h
-	MOVQ R8, AX
+	mergeRound(h, v1)
-	ROLQ $1, AX
+	mergeRound(h, v2)
-	MOVQ R9, R12
+	mergeRound(h, v3)
-	ROLQ $7, R12
+	mergeRound(h, v4)
 	ADDQ R12, AX
 	MOVQ R10, R12
 	ROLQ $12, R12
 	ADDQ R12, AX
 	MOVQ R11, R12
 	ROLQ $18, R12
 	ADDQ R12, AX
 	mergeRound(AX, R8)
 	mergeRound(AX, R9)
 	mergeRound(AX, R10)
 	mergeRound(AX, R11)
 	JMP afterBlocks
 noBlocks:
-	MOVQ ·prime5v(SB), AX
+	MOVQ ·primes+32(SB), h
 afterBlocks:
-	ADDQ DX, AX
+	ADDQ n, h
-	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
+	ADDQ $24, end
-	ADDQ $24, BX
+	CMPQ p, end
 	JG   try4
-	CMPQ SI, BX
+loop8:
-	JG   fourByte
+	MOVQ  (p), x
 	ADDQ  $8, p
 	round0(x)
 	XORQ  x, h
 	ROLQ  $27, h
 	IMULQ prime1, h
 	ADDQ  prime4, h
-wordLoop:
+	CMPQ p, end
-	// Calculate k1.
+	JLE  loop8
 	MOVQ  (SI), R8
 	ADDQ  $8, SI
 	IMULQ R14, R8
 	ROLQ  $31, R8
 	IMULQ R13, R8
-	XORQ  R8, AX
+try4:
-	ROLQ  $27, AX
+	ADDQ $4, end
-	IMULQ R13, AX
+	CMPQ p, end
-	ADDQ  DI, AX
+	JG   try1
-	CMPQ SI, BX
+	MOVL  (p), x
-	JLE  wordLoop
+	ADDQ  $4, p
 	IMULQ prime1, x
 	XORQ  x, h
-fourByte:
+	ROLQ  $23, h
-	ADDQ $4, BX
+	IMULQ prime2, h
-	CMPQ SI, BX
+	ADDQ  ·primes+16(SB), h
 	JG   singles
-	MOVL  (SI), R8
+try1:
-	ADDQ  $4, SI
+	ADDQ $4, end
-	IMULQ R13, R8
+	CMPQ p, end
 	XORQ  R8, AX
 	ROLQ  $23, AX
 	IMULQ R14, AX
 	ADDQ  ·prime3v(SB), AX
 singles:
 	ADDQ $4, BX
 	CMPQ SI, BX
 	JGE  finalize
-singlesLoop:
+loop1:
-	MOVBQZX (SI), R12
+	MOVBQZX (p), x
-	ADDQ    $1, SI
+	ADDQ    $1, p
-	IMULQ   ·prime5v(SB), R12
+	IMULQ   ·primes+32(SB), x
-	XORQ    R12, AX
+	XORQ    x, h
 	ROLQ    $11, h
 	IMULQ   prime1, h
-	ROLQ  $11, AX
+	CMPQ p, end
-	IMULQ R13, AX
+	JL   loop1
 	CMPQ SI, BX
 	JL   singlesLoop
 finalize:
-	MOVQ  AX, R12
+	MOVQ  h, x
-	SHRQ  $33, R12
+	SHRQ  $33, x
-	XORQ  R12, AX
+	XORQ  x, h
-	IMULQ R14, AX
+	IMULQ prime2, h
-	MOVQ  AX, R12
+	MOVQ  h, x
-	SHRQ  $29, R12
+	SHRQ  $29, x
-	XORQ  R12, AX
+	XORQ  x, h
-	IMULQ ·prime3v(SB), AX
+	IMULQ ·primes+16(SB), h
-	MOVQ  AX, R12
+	MOVQ  h, x
-	SHRQ  $32, R12
+	SHRQ  $32, x
-	XORQ  R12, AX
+	XORQ  x, h
-	MOVQ AX, ret+24(FP)
+	MOVQ h, ret+24(FP)
 	RET
 // writeBlocks uses the same registers as above except that it uses AX to store
 // the d pointer.
 // func writeBlocks(d *Digest, b []byte) int
-TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
 	// Load fixed primes needed for round.
-	MOVQ ·prime1v(SB), R13
+	MOVQ ·primes+0(SB), prime1
-	MOVQ ·prime2v(SB), R14
+	MOVQ ·primes+8(SB), prime2
 	// Load slice.
-	MOVQ b_base+8(FP), SI
+	MOVQ b_base+8(FP), p
-	MOVQ b_len+16(FP), DX
+	MOVQ b_len+16(FP), n
-	LEAQ (SI)(DX*1), BX
+	LEAQ (p)(n*1), end
-	SUBQ $32, BX
+	SUBQ $32, end
 	// Load vN from d.
-	MOVQ d+0(FP), AX
+	MOVQ s+0(FP), d
-	MOVQ 0(AX), R8   // v1
+	MOVQ 0(d), v1
-	MOVQ 8(AX), R9   // v2
+	MOVQ 8(d), v2
-	MOVQ 16(AX), R10 // v3
+	MOVQ 16(d), v3
-	MOVQ 24(AX), R11 // v4
+	MOVQ 24(d), v4
 	// We don't need to check the loop condition here; this function is
 	// always called with at least one block of data to process.
-blockLoop:
+	blockLoop()
 	round(R8)
 	round(R9)
 	round(R10)
 	round(R11)
 	CMPQ SI, BX
 	JLE  blockLoop
 	// Copy vN back to d.
-	MOVQ R8, 0(AX)
+	MOVQ v1, 0(d)
-	MOVQ R9, 8(AX)
+	MOVQ v2, 8(d)
-	MOVQ R10, 16(AX)
+	MOVQ v3, 16(d)
-	MOVQ R11, 24(AX)
+	MOVQ v4, 24(d)
-	// The number of bytes written is SI minus the old base pointer.
+	// The number of bytes written is p minus the old base pointer.
-	SUBQ b_base+8(FP), SI
+	SUBQ b_base+8(FP), p
-	MOVQ SI, ret+32(FP)
+	MOVQ p, ret+32(FP)
 	RET
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s
@ -1,13 +1,17 @@
-// +build gc,!purego,!noasm
+//go:build !appengine && gc && !purego && !noasm
 // +build !appengine
 // +build gc
 // +build !purego
 // +build !noasm
 #include "textflag.h"
-// Register allocation.
+// Registers:
 #define digest	R1
-#define h	R2 // Return value.
+#define h	R2 // return value
-#define p	R3 // Input pointer.
+#define p	R3 // input pointer
-#define len	R4
+#define n	R4 // input length
-#define nblocks	R5 // len / 32.
+#define nblocks	R5 // n / 32
 #define prime1	R7
 #define prime2	R8
 #define prime3	R9
@ -25,60 +29,52 @@
 #define round(acc, x) \
 	MADD prime2, acc, x, acc \
 	ROR  $64-31, acc         \
-	MUL  prime1, acc         \
+	MUL  prime1, acc
-// x = round(0, x).
+// round0 performs the operation x = round(0, x).
 #define round0(x) \
 	MUL prime2, x \
 	ROR $64-31, x \
-	MUL prime1, x \
+	MUL prime1, x
-#define mergeRound(x) \
+#define mergeRound(acc, x) \
-	round0(x)                 \
+	round0(x)                     \
-	EOR  x, h                 \
+	EOR  x, acc                   \
-	MADD h, prime4, prime1, h \
+	MADD acc, prime4, prime1, acc
-// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
+// blockLoop processes as many 32-byte blocks as possible,
-#define blocksLoop() \
+// updating v1, v2, v3, and v4. It assumes that n >= 32.
-	LSR     $5, len, nblocks \
+#define blockLoop() \
-	PCALIGN $16              \
+	LSR     $5, n, nblocks  \
-	loop:                    \
+	PCALIGN $16             \
-	LDP.P   32(p), (x1, x2)  \
+	loop:                   \
-	round(v1, x1)            \
+	LDP.P   16(p), (x1, x2) \
-	LDP     -16(p), (x3, x4) \
+	LDP.P   16(p), (x3, x4) \
-	round(v2, x2)            \
+	round(v1, x1)           \
-	SUB     $1, nblocks      \
+	round(v2, x2)           \
-	round(v3, x3)            \
+	round(v3, x3)           \
-	round(v4, x4)            \
+	round(v4, x4)           \
-	CBNZ    nblocks, loop    \
+	SUB     $1, nblocks     \
-
+	CBNZ    nblocks, loop
 // The primes are repeated here to ensure that they're stored
 // in a contiguous array, so we can load them with LDP.
 DATA primes<> +0(SB)/8, $11400714785074694791
 DATA primes<> +8(SB)/8, $14029467366897019727
 DATA primes<>+16(SB)/8, $1609587929392839161
 DATA primes<>+24(SB)/8, $9650029242287828579
 DATA primes<>+32(SB)/8, $2870177450012600261
 GLOBL primes<>(SB), NOPTR+RODATA, $40
 // func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
-	LDP b_base+0(FP), (p, len)
+	LDP b_base+0(FP), (p, n)
-	LDP  primes<> +0(SB), (prime1, prime2)
+	LDP  ·primes+0(SB), (prime1, prime2)
-	LDP  primes<>+16(SB), (prime3, prime4)
+	LDP  ·primes+16(SB), (prime3, prime4)
-	MOVD primes<>+32(SB), prime5
+	MOVD ·primes+32(SB), prime5
-	CMP  $32, len
+	CMP  $32, n
-	CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
+	CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
-	BLO  afterLoop
+	BLT  afterLoop
 	ADD  prime1, prime2, v1
 	MOVD prime2, v2
 	MOVD $0, v3
 	NEG  prime1, v4
-	blocksLoop()
+	blockLoop()
 	ROR $64-1, v1, x1
 	ROR $64-7, v2, x2
@ -88,71 +84,75 @@ TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
 	ADD x3, x4
 	ADD x2, x4, h
-	mergeRound(v1)
+	mergeRound(h, v1)
-	mergeRound(v2)
+	mergeRound(h, v2)
-	mergeRound(v3)
+	mergeRound(h, v3)
-	mergeRound(v4)
+	mergeRound(h, v4)
 afterLoop:
-	ADD len, h
+	ADD n, h
-	TBZ   $4, len, try8
+	TBZ   $4, n, try8
 	LDP.P 16(p), (x1, x2)
 	round0(x1)
 	// NOTE: here and below, sequencing the EOR after the ROR (using a
 	// rotated register) is worth a small but measurable speedup for small
 	// inputs.
 	ROR  $64-27, h
 	EOR  x1 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 	round0(x2)
 	ROR  $64-27, h
-	EOR  x2 @> 64-27, h
+	EOR  x2 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 try8:
-	TBZ    $3, len, try4
+	TBZ    $3, n, try4
 	MOVD.P 8(p), x1
 	round0(x1)
 	ROR  $64-27, h
-	EOR  x1 @> 64-27, h
+	EOR  x1 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 try4:
-	TBZ     $2, len, try2
+	TBZ     $2, n, try2
 	MOVWU.P 4(p), x2
 	MUL  prime1, x2
 	ROR  $64-23, h
-	EOR  x2 @> 64-23, h
+	EOR  x2 @> 64-23, h, h
 	MADD h, prime3, prime2, h
 try2:
-	TBZ     $1, len, try1
+	TBZ     $1, n, try1
 	MOVHU.P 2(p), x3
 	AND     $255, x3, x1
 	LSR     $8, x3, x2
 	MUL prime5, x1
 	ROR $64-11, h
-	EOR x1 @> 64-11, h
+	EOR x1 @> 64-11, h, h
 	MUL prime1, h
 	MUL prime5, x2
 	ROR $64-11, h
-	EOR x2 @> 64-11, h
+	EOR x2 @> 64-11, h, h
 	MUL prime1, h
 try1:
-	TBZ   $0, len, end
+	TBZ   $0, n, finalize
 	MOVBU (p), x4
 	MUL prime5, x4
 	ROR $64-11, h
-	EOR x4 @> 64-11, h
+	EOR x4 @> 64-11, h, h
 	MUL prime1, h
-end:
+finalize:
 	EOR h >> 33, h
 	MUL prime2, h
 	EOR h >> 29, h
@ -163,24 +163,22 @@ end:
 	RET
 // func writeBlocks(d *Digest, b []byte) int
-//
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
-// Assumes len(b) >= 32.
+	LDP ·primes+0(SB), (prime1, prime2)
 TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
 	LDP primes<>(SB), (prime1, prime2)
 	// Load state. Assume v[1-4] are stored contiguously.
 	MOVD d+0(FP), digest
 	LDP  0(digest), (v1, v2)
 	LDP  16(digest), (v3, v4)
-	LDP b_base+8(FP), (p, len)
+	LDP b_base+8(FP), (p, n)
-	blocksLoop()
+	blockLoop()
 	// Store updated state.
 	STP (v1, v2), 0(digest)
 	STP (v3, v4), 16(digest)
-	BIC  $31, len
+	BIC  $31, n
-	MOVD len, ret+32(FP)
+	MOVD n, ret+32(FP)
 	RET
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go
@ -13,4 +13,4 @@ package xxhash
 func Sum64(b []byte) uint64
 //go:noescape
-func writeBlocks(d *Digest, b []byte) int
+func writeBlocks(s *Digest, b []byte) int
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 {
 	var h uint64
 	if n >= 32 {
-		v1 := prime1v + prime2
+		v1 := primes[0] + prime2
 		v2 := prime2
 		v3 := uint64(0)
-		v4 := -prime1v
+		v4 := -primes[0]
 		for len(b) >= 32 {
 			v1 = round(v1, u64(b[0:8:len(b)]))
 			v2 = round(v2, u64(b[8:16:len(b)]))
@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 {
 	h += uint64(n)
-	i, end := 0, len(b)
+	for ; len(b) >= 8; b = b[8:] {
-	for ; i+8 <= end; i += 8 {
+		k1 := round(0, u64(b[:8]))
 		k1 := round(0, u64(b[i:i+8:len(b)]))
 		h ^= k1
 		h = rol27(h)*prime1 + prime4
 	}
-	if i+4 <= end {
+	if len(b) >= 4 {
-		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+		h ^= uint64(u32(b[:4])) * prime1
 		h = rol23(h)*prime2 + prime3
-		i += 4
+		b = b[4:]
 	}
-	for ; i < end; i++ {
+	for ; len(b) > 0; b = b[1:] {
-		h ^= uint64(b[i]) * prime5
+		h ^= uint64(b[0]) * prime5
 		h = rol11(h) * prime1
 	}
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@ -36,9 +36,6 @@ const forcePreDef = false
 // zstdMinMatch is the minimum zstd match length.
 const zstdMinMatch = 3
 // Reset the buffer offset when reaching this.
 const bufferReset = math.MaxInt32 - MaxWindowSize
 // fcsUnknown is used for unknown frame content size.
 const fcsUnknown = math.MaxUint64
@ -110,26 +107,25 @@ func printf(format string, a ...interface{}) {
 	}
 }
-// matchLen returns the maximum length.
+// matchLen returns the maximum common prefix length of a and b.
 // a must be the shortest of the two.
-// The function also returns whether all bytes matched.
+func matchLen(a, b []byte) (n int) {
-func matchLen(a, b []byte) int {
+	for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
-	b = b[:len(a)]
+		diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
-	for i := 0; i < len(a)-7; i += 8 {
+		if diff != 0 {
-		if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+			return n + bits.TrailingZeros64(diff)>>3
 			return i + (bits.TrailingZeros64(diff) >> 3)
 		}
 		n += 8
 	}
 	checked := (len(a) >> 3) << 3
 	a = a[checked:]
 	b = b[checked:]
 	for i := range a {
 		if a[i] != b[i] {
-			return i + checked
+			break
 		}
 		n++
 	}
-	return len(a) + checked
+	return n
 }
 func load3232(b []byte, i int32) uint32 {
@ -140,10 +136,6 @@ func load6432(b []byte, i int32) uint64 {
 	return binary.LittleEndian.Uint64(b[i:])
 }
 func load64(b []byte, i int) uint64 {
 	return binary.LittleEndian.Uint64(b[i:])
 }
 type byter interface {
 	Bytes() []byte
 	Len() int
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -337,7 +337,7 @@ github.com/jmespath/go-jmespath
 # github.com/jpillora/backoff v1.0.0
 ## explicit; go 1.13
 github.com/jpillora/backoff
-# github.com/klauspost/compress v1.15.12
+# github.com/klauspost/compress v1.15.13
 ## explicit; go 1.17
 github.com/klauspost/compress
 github.com/klauspost/compress/flate