lib/httpserver: use github.com/klauspost/compress/gzhttp for compressing http responses

This allows removing gzip-related code from lib/httpserver.
2024-11-21 14:44:00 +00:00 · 2023-02-27 10:16:58 -08:00 · 2023-02-27 10:16:58 -08:00 · 1a6f2f07fd
commit 1a6f2f07fd
parent 27c9446520
29 changed files with 15004 additions and 8452 deletions
--- a/go.mod
+++ b/go.mod
@ -23,7 +23,7 @@ require (
 	github.com/golang/snappy v0.0.4
 	github.com/googleapis/gax-go/v2 v2.7.0
 	github.com/influxdata/influxdb v1.11.0
-	github.com/klauspost/compress v1.15.15
+	github.com/klauspost/compress v1.16.0
 	github.com/prometheus/prometheus v0.42.0
 	github.com/urfave/cli/v2 v2.24.4
 	github.com/valyala/fastjson v1.6.4
--- a/go.sum
+++ b/go.sum
@ -316,8 +316,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
 github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
-github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw=
-github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4=
+github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4=
+github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
--- a/lib/httpserver/httpserver.go
+++ b/lib/httpserver/httpserver.go
@ -1,7 +1,6 @@
 package httpserver

 import (
-	"bufio"
 	"context"
 	"crypto/tls"
 	"errors"
@ -26,7 +25,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
 	"github.com/VictoriaMetrics/metrics"
-	"github.com/klauspost/compress/gzip"
+	"github.com/klauspost/compress/gzhttp"
 	"github.com/valyala/fastrand"
 )

@ -74,9 +73,7 @@ type RequestHandler func(w http.ResponseWriter, r *http.Request) bool

 // Serve starts an http server on the given addr with the given optional rh.
 //
-// By default all the responses are transparently compressed, since Google
-// charges a lot for the egress traffic. The compression may be disabled
-// by calling DisableResponseCompression before writing the first byte to w.
+// By default all the responses are transparently compressed, since egress traffic is usually expensive.
 //
 // The compression is also disabled if -http.disableResponseCompression flag is set.
 //
@ -195,17 +192,23 @@ func Stop(addr string) error {
 }

 func gzipHandler(s *server, rh RequestHandler) http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		w = maybeGzipResponseWriter(w, r)
+	h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		handlerWrapper(s, w, r, rh)
-		if zrw, ok := w.(*gzipResponseWriter); ok {
-			if err := zrw.Close(); err != nil && !netutil.IsTrivialNetworkError(err) {
-				logger.Warnf("gzipResponseWriter.Close: %s", err)
-			}
-		}
+	})
+	if *disableResponseCompression {
+		return h
 	}
+	return gzipHandlerWrapper(h)
 }

+var gzipHandlerWrapper = func() func(http.Handler) http.HandlerFunc {
+	hw, err := gzhttp.NewWrapper(gzhttp.CompressionLevel(1))
+	if err != nil {
+		panic(fmt.Errorf("BUG: cannot initialize gzip http wrapper: %s", err))
+	}
+	return hw
+}()
+
 var metricsHandlerDuration = metrics.NewHistogram(`vm_http_request_duration_seconds{path="/metrics"}`)
 var connTimeoutClosedConns = metrics.NewCounter(`vm_http_conn_timeout_closed_conns_total`)

@ -326,7 +329,6 @@ func handlerWrapper(s *server, w http.ResponseWriter, r *http.Request, rh Reques
 			if !CheckAuthFlag(w, r, *pprofAuthKey, "pprofAuthKey") {
 				return
 			}
-			DisableResponseCompression(w)
 			pprofHandler(r.URL.Path[len("/debug/pprof/"):], w, r)
 			return
 		}
@ -374,179 +376,12 @@ func CheckBasicAuth(w http.ResponseWriter, r *http.Request) bool {
 	return false
 }

-func maybeGzipResponseWriter(w http.ResponseWriter, r *http.Request) http.ResponseWriter {
-	if *disableResponseCompression {
-		return w
-	}
-	if r.Header.Get("Connection") == "Upgrade" {
-		return w
-	}
-	ae := r.Header.Get("Accept-Encoding")
-	if ae == "" {
-		return w
-	}
-	ae = strings.ToLower(ae)
-	n := strings.Index(ae, "gzip")
-	if n < 0 {
-		// Do not apply gzip encoding to the response.
-		return w
-	}
-	// Apply gzip encoding to the response.
-	zw := getGzipWriter(w)
-	bw := getBufioWriter(zw)
-	zrw := &gzipResponseWriter{
-		rw: w,
-		zw: zw,
-		bw: bw,
-	}
-	return zrw
-}
-
-// DisableResponseCompression disables response compression on w.
-//
-// The function must be called before the first w.Write* call.
-func DisableResponseCompression(w http.ResponseWriter) {
-	zrw, ok := w.(*gzipResponseWriter)
-	if !ok {
-		return
-	}
-	if zrw.firstWriteDone {
-		logger.Panicf("BUG: DisableResponseCompression must be called before sending the response")
-	}
-	zrw.disableCompression = true
-}
-
 // EnableCORS enables https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS
 // on the response.
 func EnableCORS(w http.ResponseWriter, _ *http.Request) {
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 }

-func getGzipWriter(w io.Writer) *gzip.Writer {
-	v := gzipWriterPool.Get()
-	if v == nil {
-		zw, err := gzip.NewWriterLevel(w, 1)
-		if err != nil {
-			logger.Panicf("BUG: cannot create gzip writer: %s", err)
-		}
-		return zw
-	}
-	zw := v.(*gzip.Writer)
-	zw.Reset(w)
-	return zw
-}
-
-func putGzipWriter(zw *gzip.Writer) {
-	gzipWriterPool.Put(zw)
-}
-
-var gzipWriterPool sync.Pool
-
-type gzipResponseWriter struct {
-	rw         http.ResponseWriter
-	zw         *gzip.Writer
-	bw         *bufio.Writer
-	statusCode int
-
-	firstWriteDone     bool
-	disableCompression bool
-}
-
-// Implements http.ResponseWriter.Header method.
-func (zrw *gzipResponseWriter) Header() http.Header {
-	return zrw.rw.Header()
-}
-
-// Implements http.ResponseWriter.Write method.
-func (zrw *gzipResponseWriter) Write(p []byte) (int, error) {
-	if !zrw.firstWriteDone {
-		h := zrw.Header()
-		if zrw.statusCode == http.StatusNoContent {
-			zrw.disableCompression = true
-		}
-		if h.Get("Content-Encoding") != "" {
-			zrw.disableCompression = true
-		}
-		if !zrw.disableCompression {
-			h.Set("Content-Encoding", "gzip")
-			h.Del("Content-Length")
-			if h.Get("Content-Type") == "" {
-				// Disable auto-detection of content-type, since it
-				// is incorrectly detected after the compression.
-				h.Set("Content-Type", "text/html; charset=utf-8")
-			}
-		}
-		zrw.writeHeader()
-		zrw.firstWriteDone = true
-	}
-	if zrw.disableCompression {
-		return zrw.rw.Write(p)
-	}
-	return zrw.bw.Write(p)
-}
-
-// Implements http.ResponseWriter.WriteHeader method.
-func (zrw *gzipResponseWriter) WriteHeader(statusCode int) {
-	zrw.statusCode = statusCode
-}
-
-func (zrw *gzipResponseWriter) writeHeader() {
-	if zrw.statusCode == 0 {
-		zrw.statusCode = http.StatusOK
-	}
-	zrw.rw.WriteHeader(zrw.statusCode)
-}
-
-// Implements http.Flusher
-func (zrw *gzipResponseWriter) Flush() {
-	if !zrw.firstWriteDone {
-		_, _ = zrw.Write(nil)
-	}
-	if !zrw.disableCompression {
-		if err := zrw.bw.Flush(); err != nil && !netutil.IsTrivialNetworkError(err) {
-			logger.Warnf("gzipResponseWriter.Flush (buffer): %s", err)
-		}
-		if err := zrw.zw.Flush(); err != nil && !netutil.IsTrivialNetworkError(err) {
-			logger.Warnf("gzipResponseWriter.Flush (gzip): %s", err)
-		}
-	}
-	if fw, ok := zrw.rw.(http.Flusher); ok {
-		fw.Flush()
-	}
-}
-
-func (zrw *gzipResponseWriter) Close() error {
-	if !zrw.firstWriteDone {
-		_, _ = zrw.Write(nil)
-	}
-	zrw.Flush()
-	var err error
-	if !zrw.disableCompression {
-		err = zrw.zw.Close()
-	}
-	putGzipWriter(zrw.zw)
-	zrw.zw = nil
-	putBufioWriter(zrw.bw)
-	zrw.bw = nil
-	return err
-}
-
-func getBufioWriter(w io.Writer) *bufio.Writer {
-	v := bufioWriterPool.Get()
-	if v == nil {
-		return bufio.NewWriterSize(w, 16*1024)
-	}
-	bw := v.(*bufio.Writer)
-	bw.Reset(w)
-	return bw
-}
-
-func putBufioWriter(bw *bufio.Writer) {
-	bufioWriterPool.Put(bw)
-}
-
-var bufioWriterPool sync.Pool
-
 func pprofHandler(profileName string, w http.ResponseWriter, r *http.Request) {
 	// This switch has been stolen from init func at https://golang.org/src/net/http/pprof/pprof.go
 	switch profileName {
--- a/vendor/github.com/klauspost/compress/.goreleaser.yml
+++ b/vendor/github.com/klauspost/compress/.goreleaser.yml
@ -3,7 +3,7 @@
 before:
  hooks:
    - ./gen.sh
-    - go install mvdan.cc/garble@v0.7.2
+    - go install mvdan.cc/garble@v0.9.3

 builds:
  -
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@ -16,6 +16,12 @@ This package provides various compression algorithms.

 # changelog

+* Jan 21st, 2023 (v1.15.15)
+	* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739
+	* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728
+	* zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745
+	* gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740
+
 * Jan 3rd, 2023 (v1.15.14)

 	* flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718
--- a/vendor/github.com/klauspost/compress/gzhttp/LICENSE
+++ b/vendor/github.com/klauspost/compress/gzhttp/LICENSE
@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2016-2017 The New York Times Company
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/vendor/github.com/klauspost/compress/gzhttp/README.md
+++ b/vendor/github.com/klauspost/compress/gzhttp/README.md
@ -0,0 +1,222 @@
+Gzip Middleware
+===============
+
+This Go package which wraps HTTP *server* handlers to transparently gzip the
+response body, for clients which support it. 
+
+For HTTP *clients* we provide a transport wrapper that will do gzip decompression 
+faster than what the standard library offers.
+
+Both the client and server wrappers are fully compatible with other servers and clients.
+
+This package is forked from the dead [nytimes/gziphandler](https://github.com/nytimes/gziphandler)
+and extends functionality for it.
+
+## Install
+```bash
+go get -u github.com/klauspost/compress
+```
+
+## Documentation
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/gzhttp.svg)](https://pkg.go.dev/github.com/klauspost/compress/gzhttp)
+
+
+## Usage
+
+There are 2 main parts, one for http servers and one for http clients.
+
+### Client
+
+The standard library automatically adds gzip compression to most requests 
+and handles decompression of the responses.
+
+However, by wrapping the transport we are able to override this and provide 
+our own (faster) decompressor.
+
+Wrapping is done on the Transport of the http client:
+
+```Go
+func ExampleTransport() {
+	// Get an HTTP client.
+	client := http.Client{
+		// Wrap the transport:
+		Transport: gzhttp.Transport(http.DefaultTransport),
+	}
+
+	resp, err := client.Get("https://google.com")
+	if err != nil {
+		return
+	}
+	defer resp.Body.Close()
+	
+	body, _ := ioutil.ReadAll(resp.Body)
+	fmt.Println("body:", string(body))
+}
+```
+
+Speed compared to standard library `DefaultTransport` for an approximate 127KB JSON payload:
+
+```
+BenchmarkTransport
+
+Single core:
+BenchmarkTransport/gzhttp-32                1995        609791 ns/op     214.14 MB/s       10129 B/op         73 allocs/op
+BenchmarkTransport/stdlib-32                1567        772161 ns/op     169.11 MB/s       53950 B/op         99 allocs/op
+BenchmarkTransport/zstd-32                  4579        238503 ns/op     547.51 MB/s       5775 B/op          69 allocs/op
+
+Multi Core:
+BenchmarkTransport/gzhttp-par-32           29113         36802 ns/op    3548.27 MB/s       11061 B/op         73 allocs/op
+BenchmarkTransport/stdlib-par-32           16114         66442 ns/op    1965.38 MB/s       54971 B/op         99 allocs/op
+BenchmarkTransport/zstd-par-32             90177         13110 ns/op    9960.83 MB/s       5361 B/op          67 allocs/op
+```
+
+This includes both serving the http request, parsing requests and decompressing. 
+
+### Server
+
+For the simplest usage call `GzipHandler` with any handler (an object which implements the
+`http.Handler` interface), and it'll return a new handler which gzips the
+response. For example:
+
+```go
+package main
+
+import (
+	"io"
+	"net/http"
+	"github.com/klauspost/compress/gzhttp"
+)
+
+func main() {
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		io.WriteString(w, "Hello, World")
+	})
+    
+	http.Handle("/", gzhttp.GzipHandler(handler))
+	http.ListenAndServe("0.0.0.0:8000", nil)
+}
+```
+
+This will wrap a handler using the default options. 
+
+To specify custom options a reusable wrapper can be created that can be used to wrap
+any number of handlers.
+
+```Go
+package main
+
+import (
+	"io"
+	"log"
+	"net/http"
+	
+	"github.com/klauspost/compress/gzhttp"
+	"github.com/klauspost/compress/gzip"
+)
+
+func main() {
+	handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		io.WriteString(w, "Hello, World")
+	})
+	
+	// Create a reusable wrapper with custom options.
+	wrapper, err := gzhttp.NewWrapper(gzhttp.MinSize(2000), gzhttp.CompressionLevel(gzip.BestSpeed))
+	if err != nil {
+		log.Fatalln(err)
+	}
+	
+	http.Handle("/", wrapper(handler))
+	http.ListenAndServe("0.0.0.0:8000", nil)
+}
+
+```
+
+
+### Performance
+
+Speed compared to  [nytimes/gziphandler](https://github.com/nytimes/gziphandler) with default settings, 2KB, 20KB and 100KB:
+
+```
+λ benchcmp before.txt after.txt
+benchmark                         old ns/op     new ns/op     delta
+BenchmarkGzipHandler_S2k-32       51302         23679         -53.84%
+BenchmarkGzipHandler_S20k-32      301426        156331        -48.14%
+BenchmarkGzipHandler_S100k-32     1546203       818981        -47.03%
+BenchmarkGzipHandler_P2k-32       3973          1522          -61.69%
+BenchmarkGzipHandler_P20k-32      20319         9397          -53.75%
+BenchmarkGzipHandler_P100k-32     96079         46361         -51.75%
+
+benchmark                         old MB/s     new MB/s     speedup
+BenchmarkGzipHandler_S2k-32       39.92        86.49        2.17x
+BenchmarkGzipHandler_S20k-32      67.94        131.00       1.93x
+BenchmarkGzipHandler_S100k-32     66.23        125.03       1.89x
+BenchmarkGzipHandler_P2k-32       515.44       1345.31      2.61x
+BenchmarkGzipHandler_P20k-32      1007.92      2179.47      2.16x
+BenchmarkGzipHandler_P100k-32     1065.79      2208.75      2.07x
+
+benchmark                         old allocs     new allocs     delta
+BenchmarkGzipHandler_S2k-32       22             16             -27.27%
+BenchmarkGzipHandler_S20k-32      25             19             -24.00%
+BenchmarkGzipHandler_S100k-32     28             21             -25.00%
+BenchmarkGzipHandler_P2k-32       22             16             -27.27%
+BenchmarkGzipHandler_P20k-32      25             19             -24.00%
+BenchmarkGzipHandler_P100k-32     27             21             -22.22%
+
+benchmark                         old bytes     new bytes     delta
+BenchmarkGzipHandler_S2k-32       8836          2980          -66.27%
+BenchmarkGzipHandler_S20k-32      69034         20562         -70.21%
+BenchmarkGzipHandler_S100k-32     356582        86682         -75.69%
+BenchmarkGzipHandler_P2k-32       9062          2971          -67.21%
+BenchmarkGzipHandler_P20k-32      67799         20051         -70.43%
+BenchmarkGzipHandler_P100k-32     300972        83077         -72.40%
+```
+
+### Stateless compression
+
+In cases where you expect to run many thousands of compressors concurrently, 
+but with very little activity you can use stateless compression. 
+This is not intended for regular web servers serving individual requests.
+
+Use `CompressionLevel(-3)` or `CompressionLevel(gzip.StatelessCompression)` to enable.
+Consider adding a [`bufio.Writer`](https://golang.org/pkg/bufio/#NewWriterSize) with a small buffer.
+
+See [more details on stateless compression](https://github.com/klauspost/compress#stateless-compression).
+
+### Migrating from gziphandler
+
+This package removes some of the extra constructors.
+When replacing, this can be used to find a replacement.
+
+* `GzipHandler(h)` -> `GzipHandler(h)` (keep as-is)
+* `GzipHandlerWithOpts(opts...)` -> `NewWrapper(opts...)`
+* `MustNewGzipLevelHandler(n)` -> `NewWrapper(CompressionLevel(n))`
+* `NewGzipLevelAndMinSize(n, s)` -> `NewWrapper(CompressionLevel(n), MinSize(s))` 
+
+By default, some mime types will now be excluded.
+To re-enable compression of all types, use the `ContentTypeFilter(gzhttp.CompressAllContentTypeFilter)` option.
+
+### Range Requests
+
+Ranged requests are not well supported with compression.
+Therefore any request with a "Content-Range" header is not compressed.
+
+To signify that range requests are not supported any "Accept-Ranges" header set is removed when data is compressed.
+If you do not want this behavior use the `KeepAcceptRanges()` option.
+
+### Flushing data
+
+The wrapper supports the [http.Flusher](https://golang.org/pkg/net/http/#Flusher) interface.
+
+The only caveat is that the writer may not yet have received enough bytes to determine if `MinSize`
+has been reached. In this case it will assume that the minimum size has been reached.
+
+If nothing has been written to the response writer, nothing will be flushed.
+
+## License
+
+[Apache 2.0](LICENSE)
+
+
--- a/vendor/github.com/klauspost/compress/gzhttp/compress.go
+++ b/vendor/github.com/klauspost/compress/gzhttp/compress.go
@ -0,0 +1,862 @@
+package gzhttp
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"mime"
+	"net"
+	"net/http"
+	"strconv"
+	"strings"
+	"sync"
+
+	"github.com/klauspost/compress/gzhttp/writer"
+	"github.com/klauspost/compress/gzhttp/writer/gzkp"
+	"github.com/klauspost/compress/gzip"
+)
+
+const (
+	// HeaderNoCompression can be used to disable compression.
+	// Any header value will disable compression.
+	// The Header is always removed from output.
+	HeaderNoCompression = "No-Gzip-Compression"
+
+	vary            = "Vary"
+	acceptEncoding  = "Accept-Encoding"
+	contentEncoding = "Content-Encoding"
+	contentRange    = "Content-Range"
+	acceptRanges    = "Accept-Ranges"
+	contentType     = "Content-Type"
+	contentLength   = "Content-Length"
+	eTag            = "ETag"
+)
+
+type codings map[string]float64
+
+const (
+	// DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
+	// This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
+	// The examples seem to indicate that it is.
+	DefaultQValue = 1.0
+
+	// DefaultMinSize is the default minimum size until we enable gzip compression.
+	// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
+	// If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
+	// That being the case, you should restrict the gzip compression to files with a size (plus header) greater than a single packet,
+	// 1024 bytes (1KB) is therefore default.
+	DefaultMinSize = 1024
+)
+
+// GzipResponseWriter provides an http.ResponseWriter interface, which gzips
+// bytes before writing them to the underlying response. This doesn't close the
+// writers, so don't forget to do that.
+// It can be configured to skip response smaller than minSize.
+type GzipResponseWriter struct {
+	http.ResponseWriter
+	level     int
+	gwFactory writer.GzipWriterFactory
+	gw        writer.GzipWriter
+
+	code int // Saves the WriteHeader value.
+
+	minSize          int    // Specifies the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
+	buf              []byte // Holds the first part of the write before reaching the minSize or the end of the write.
+	ignore           bool   // If true, then we immediately passthru writes to the underlying ResponseWriter.
+	keepAcceptRanges bool   // Keep "Accept-Ranges" header.
+	setContentType   bool   // Add content type, if missing and detected.
+	suffixETag       string // Suffix to add to ETag header if response is compressed.
+	dropETag         bool   // Drop ETag header if response is compressed (supersedes suffixETag).
+
+	contentTypeFilter func(ct string) bool // Only compress if the response is one of these content-types. All are accepted if empty.
+}
+
+type GzipResponseWriterWithCloseNotify struct {
+	*GzipResponseWriter
+}
+
+func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
+	return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
+}
+
+// Write appends data to the gzip writer.
+func (w *GzipResponseWriter) Write(b []byte) (int, error) {
+	// GZIP responseWriter is initialized. Use the GZIP responseWriter.
+	if w.gw != nil {
+		return w.gw.Write(b)
+	}
+
+	// If we have already decided not to use GZIP, immediately passthrough.
+	if w.ignore {
+		return w.ResponseWriter.Write(b)
+	}
+
+	// Save the write into a buffer for later use in GZIP responseWriter
+	// (if content is long enough) or at close with regular responseWriter.
+	wantBuf := 512
+	if w.minSize > wantBuf {
+		wantBuf = w.minSize
+	}
+	toAdd := len(b)
+	if len(w.buf)+toAdd > wantBuf {
+		toAdd = wantBuf - len(w.buf)
+	}
+	w.buf = append(w.buf, b[:toAdd]...)
+	remain := b[toAdd:]
+	hdr := w.Header()
+
+	// Only continue if they didn't already choose an encoding or a known unhandled content length or type.
+	if len(hdr[HeaderNoCompression]) == 0 && hdr.Get(contentEncoding) == "" && hdr.Get(contentRange) == "" {
+		// Check more expensive parts now.
+		cl, _ := atoi(hdr.Get(contentLength))
+		ct := hdr.Get(contentType)
+		if cl == 0 || cl >= w.minSize && (ct == "" || w.contentTypeFilter(ct)) {
+			// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
+			if len(w.buf) < w.minSize && cl == 0 {
+				return len(b), nil
+			}
+
+			// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
+			if cl >= w.minSize || len(w.buf) >= w.minSize {
+				// If a Content-Type wasn't specified, infer it from the current buffer.
+				if ct == "" {
+					ct = http.DetectContentType(w.buf)
+				}
+
+				// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
+				// Set the header only if the key does not exist
+				if _, ok := hdr[contentType]; w.setContentType && !ok {
+					hdr.Set(contentType, ct)
+				}
+
+				// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
+				if w.contentTypeFilter(ct) {
+					if err := w.startGzip(); err != nil {
+						return 0, err
+					}
+					if len(remain) > 0 {
+						if _, err := w.gw.Write(remain); err != nil {
+							return 0, err
+						}
+					}
+					return len(b), nil
+				}
+			}
+		}
+	}
+	// If we got here, we should not GZIP this response.
+	if err := w.startPlain(); err != nil {
+		return 0, err
+	}
+	if len(remain) > 0 {
+		if _, err := w.ResponseWriter.Write(remain); err != nil {
+			return 0, err
+		}
+	}
+	return len(b), nil
+}
+
+// startGzip initializes a GZIP writer and writes the buffer.
+func (w *GzipResponseWriter) startGzip() error {
+	// Set the GZIP header.
+	w.Header().Set(contentEncoding, "gzip")
+
+	// if the Content-Length is already set, then calls to Write on gzip
+	// will fail to set the Content-Length header since its already set
+	// See: https://github.com/golang/go/issues/14975.
+	w.Header().Del(contentLength)
+
+	// Delete Accept-Ranges.
+	if !w.keepAcceptRanges {
+		w.Header().Del(acceptRanges)
+	}
+
+	// Suffix ETag.
+	if w.suffixETag != "" && !w.dropETag && w.Header().Get(eTag) != "" {
+		orig := w.Header().Get(eTag)
+		insertPoint := strings.LastIndex(orig, `"`)
+		if insertPoint == -1 {
+			insertPoint = len(orig)
+		}
+		w.Header().Set(eTag, orig[:insertPoint]+w.suffixETag+orig[insertPoint:])
+	}
+
+	// Delete ETag.
+	if w.dropETag {
+		w.Header().Del(eTag)
+	}
+
+	// Write the header to gzip response.
+	if w.code != 0 {
+		w.ResponseWriter.WriteHeader(w.code)
+		// Ensure that no other WriteHeader's happen
+		w.code = 0
+	}
+
+	// Initialize and flush the buffer into the gzip response if there are any bytes.
+	// If there aren't any, we shouldn't initialize it yet because on Close it will
+	// write the gzip header even if nothing was ever written.
+	if len(w.buf) > 0 {
+		// Initialize the GZIP response.
+		w.init()
+		n, err := w.gw.Write(w.buf)
+
+		// This should never happen (per io.Writer docs), but if the write didn't
+		// accept the entire buffer but returned no specific error, we have no clue
+		// what's going on, so abort just to be safe.
+		if err == nil && n < len(w.buf) {
+			err = io.ErrShortWrite
+		}
+		w.buf = w.buf[:0]
+		return err
+	}
+	return nil
+}
+
+// startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
+func (w *GzipResponseWriter) startPlain() error {
+	w.Header().Del(HeaderNoCompression)
+	if w.code != 0 {
+		w.ResponseWriter.WriteHeader(w.code)
+		// Ensure that no other WriteHeader's happen
+		w.code = 0
+	}
+
+	w.ignore = true
+	// If Write was never called then don't call Write on the underlying ResponseWriter.
+	if len(w.buf) == 0 {
+		return nil
+	}
+	n, err := w.ResponseWriter.Write(w.buf)
+	// This should never happen (per io.Writer docs), but if the write didn't
+	// accept the entire buffer but returned no specific error, we have no clue
+	// what's going on, so abort just to be safe.
+	if err == nil && n < len(w.buf) {
+		err = io.ErrShortWrite
+	}
+
+	w.buf = w.buf[:0]
+	return err
+}
+
+// WriteHeader just saves the response code until close or GZIP effective writes.
+func (w *GzipResponseWriter) WriteHeader(code int) {
+	if w.code == 0 {
+		w.code = code
+	}
+}
+
+// init graps a new gzip writer from the gzipWriterPool and writes the correct
+// content encoding header.
+func (w *GzipResponseWriter) init() {
+	// Bytes written during ServeHTTP are redirected to this gzip writer
+	// before being written to the underlying response.
+	w.gw = w.gwFactory.New(w.ResponseWriter, w.level)
+}
+
+// Close will close the gzip.Writer and will put it back in the gzipWriterPool.
+func (w *GzipResponseWriter) Close() error {
+	if w.ignore {
+		return nil
+	}
+
+	if w.gw == nil {
+		// GZIP not triggered yet, write out regular response.
+		err := w.startPlain()
+		// Returns the error if any at write.
+		if err != nil {
+			err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
+		}
+		return err
+	}
+
+	err := w.gw.Close()
+	w.gw = nil
+	return err
+}
+
+// Flush flushes the underlying *gzip.Writer and then the underlying
+// http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
+// an http.Flusher.
+// If not enough bytes has been written to determine if we have reached minimum size,
+// this will be ignored.
+// If nothing has been written yet, nothing will be flushed.
+func (w *GzipResponseWriter) Flush() {
+	if w.gw == nil && !w.ignore {
+		if len(w.buf) == 0 {
+			// Nothing written yet.
+			return
+		}
+		var (
+			cl, _ = atoi(w.Header().Get(contentLength))
+			ct    = w.Header().Get(contentType)
+			ce    = w.Header().Get(contentEncoding)
+			cr    = w.Header().Get(contentRange)
+		)
+
+		if ct == "" {
+			ct = http.DetectContentType(w.buf)
+
+			// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
+			// Set the header only if the key does not exist
+			if _, ok := w.Header()[contentType]; w.setContentType && !ok {
+				w.Header().Set(contentType, ct)
+			}
+		}
+		if cl == 0 {
+			// Assume minSize.
+			cl = w.minSize
+		}
+
+		// See if we should compress...
+		if len(w.Header()[HeaderNoCompression]) == 0 && ce == "" && cr == "" && cl >= w.minSize && w.contentTypeFilter(ct) {
+			w.startGzip()
+		} else {
+			w.startPlain()
+		}
+	}
+
+	if w.gw != nil {
+		w.gw.Flush()
+	}
+
+	if fw, ok := w.ResponseWriter.(http.Flusher); ok {
+		fw.Flush()
+	}
+}
+
+// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
+// Hijacker, its Hijack method is returned. Otherwise an error is returned.
+func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
+	if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
+		return hj.Hijack()
+	}
+	return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
+}
+
+// verify Hijacker interface implementation
+var _ http.Hijacker = &GzipResponseWriter{}
+
+var onceDefault sync.Once
+var defaultWrapper func(http.Handler) http.HandlerFunc
+
+// GzipHandler allows to easily wrap an http handler with default settings.
+func GzipHandler(h http.Handler) http.HandlerFunc {
+	onceDefault.Do(func() {
+		var err error
+		defaultWrapper, err = NewWrapper()
+		if err != nil {
+			panic(err)
+		}
+	})
+
+	return defaultWrapper(h)
+}
+
+var grwPool = sync.Pool{New: func() interface{} { return &GzipResponseWriter{} }}
+
+// NewWrapper returns a reusable wrapper with the supplied options.
+func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) {
+	c := &config{
+		level:   gzip.DefaultCompression,
+		minSize: DefaultMinSize,
+		writer: writer.GzipWriterFactory{
+			Levels: gzkp.Levels,
+			New:    gzkp.NewWriter,
+		},
+		contentTypes:   DefaultContentTypeFilter,
+		setContentType: true,
+	}
+
+	for _, o := range opts {
+		o(c)
+	}
+
+	if err := c.validate(); err != nil {
+		return nil, err
+	}
+
+	return func(h http.Handler) http.HandlerFunc {
+		return func(w http.ResponseWriter, r *http.Request) {
+			w.Header().Add(vary, acceptEncoding)
+			if acceptsGzip(r) {
+				gw := grwPool.Get().(*GzipResponseWriter)
+				*gw = GzipResponseWriter{
+					ResponseWriter:    w,
+					gwFactory:         c.writer,
+					level:             c.level,
+					minSize:           c.minSize,
+					contentTypeFilter: c.contentTypes,
+					keepAcceptRanges:  c.keepAcceptRanges,
+					dropETag:          c.dropETag,
+					suffixETag:        c.suffixETag,
+					buf:               gw.buf,
+					setContentType:    c.setContentType,
+				}
+				if len(gw.buf) > 0 {
+					gw.buf = gw.buf[:0]
+				}
+				defer func() {
+					gw.Close()
+					gw.ResponseWriter = nil
+					grwPool.Put(gw)
+				}()
+
+				if _, ok := w.(http.CloseNotifier); ok {
+					gwcn := GzipResponseWriterWithCloseNotify{gw}
+					h.ServeHTTP(gwcn, r)
+				} else {
+					h.ServeHTTP(gw, r)
+				}
+			} else {
+				h.ServeHTTP(newNoGzipResponseWriter(w), r)
+				w.Header().Del(HeaderNoCompression)
+			}
+		}
+	}, nil
+}
+
+// Parsed representation of one of the inputs to ContentTypes.
+// See https://golang.org/pkg/mime/#ParseMediaType
+type parsedContentType struct {
+	mediaType string
+	params    map[string]string
+}
+
+// equals returns whether this content type matches another content type.
+func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
+	if pct.mediaType != mediaType {
+		return false
+	}
+	// if pct has no params, don't care about other's params
+	if len(pct.params) == 0 {
+		return true
+	}
+
+	// if pct has any params, they must be identical to other's.
+	if len(pct.params) != len(params) {
+		return false
+	}
+	for k, v := range pct.params {
+		if w, ok := params[k]; !ok || v != w {
+			return false
+		}
+	}
+	return true
+}
+
+// Used for functional configuration.
+type config struct {
+	minSize          int
+	level            int
+	writer           writer.GzipWriterFactory
+	contentTypes     func(ct string) bool
+	keepAcceptRanges bool
+	setContentType   bool
+	suffixETag       string
+	dropETag         bool
+}
+
+func (c *config) validate() error {
+	min, max := c.writer.Levels()
+	if c.level < min || c.level > max {
+		return fmt.Errorf("invalid compression level requested: %d, valid range %d -> %d", c.level, min, max)
+	}
+
+	if c.minSize < 0 {
+		return fmt.Errorf("minimum size must be more than zero")
+	}
+
+	return nil
+}
+
+type option func(c *config)
+
+func MinSize(size int) option {
+	return func(c *config) {
+		c.minSize = size
+	}
+}
+
+// CompressionLevel sets the compression level
+func CompressionLevel(level int) option {
+	return func(c *config) {
+		c.level = level
+	}
+}
+
+// SetContentType sets the content type before returning
+// requests, if unset before returning, and it was detected.
+// Default: true.
+func SetContentType(b bool) option {
+	return func(c *config) {
+		c.setContentType = b
+	}
+}
+
+// Implementation changes the implementation of GzipWriter
+//
+// The default implementation is writer/stdlib/NewWriter
+// which is backed by standard library's compress/zlib
+func Implementation(writer writer.GzipWriterFactory) option {
+	return func(c *config) {
+		c.writer = writer
+	}
+}
+
+// ContentTypes specifies a list of content types to compare
+// the Content-Type header to before compressing. If none
+// match, the response will be returned as-is.
+//
+// Content types are compared in a case-insensitive, whitespace-ignored
+// manner.
+//
+// A MIME type without any other directive will match a content type
+// that has the same MIME type, regardless of that content type's other
+// directives. I.e., "text/html" will match both "text/html" and
+// "text/html; charset=utf-8".
+//
+// A MIME type with any other directive will only match a content type
+// that has the same MIME type and other directives. I.e.,
+// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
+//
+// By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
+//
+// Setting this will override default and any previous Content Type settings.
+func ContentTypes(types []string) option {
+	return func(c *config) {
+		var contentTypes []parsedContentType
+		for _, v := range types {
+			mediaType, params, err := mime.ParseMediaType(v)
+			if err == nil {
+				contentTypes = append(contentTypes, parsedContentType{mediaType, params})
+			}
+		}
+		c.contentTypes = func(ct string) bool {
+			return handleContentType(contentTypes, ct)
+		}
+	}
+}
+
+// ExceptContentTypes specifies a list of content types to compare
+// the Content-Type header to before compressing. If none
+// match, the response will be compressed.
+//
+// Content types are compared in a case-insensitive, whitespace-ignored
+// manner.
+//
+// A MIME type without any other directive will match a content type
+// that has the same MIME type, regardless of that content type's other
+// directives. I.e., "text/html" will match both "text/html" and
+// "text/html; charset=utf-8".
+//
+// A MIME type with any other directive will only match a content type
+// that has the same MIME type and other directives. I.e.,
+// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
+//
+// By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
+//
+// Setting this will override default and any previous Content Type settings.
+func ExceptContentTypes(types []string) option {
+	return func(c *config) {
+		var contentTypes []parsedContentType
+		for _, v := range types {
+			mediaType, params, err := mime.ParseMediaType(v)
+			if err == nil {
+				contentTypes = append(contentTypes, parsedContentType{mediaType, params})
+			}
+		}
+		c.contentTypes = func(ct string) bool {
+			return !handleContentType(contentTypes, ct)
+		}
+	}
+}
+
+// KeepAcceptRanges will keep Accept-Ranges header on gzipped responses.
+// This will likely break ranged requests since that cannot be transparently
+// handled by the filter.
+func KeepAcceptRanges() option {
+	return func(c *config) {
+		c.keepAcceptRanges = true
+	}
+}
+
+// ContentTypeFilter allows adding a custom content type filter.
+//
+// The supplied function must return true/false to indicate if content
+// should be compressed.
+//
+// When called no parsing of the content type 'ct' has been done.
+// It may have been set or auto-detected.
+//
+// Setting this will override default and any previous Content Type settings.
+func ContentTypeFilter(compress func(ct string) bool) option {
+	return func(c *config) {
+		c.contentTypes = compress
+	}
+}
+
+// SuffixETag adds the specified suffix to the ETag header (if it exists) of
+// responses which are compressed.
+//
+// Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
+// the ETag of a compressed response must differ from it's uncompressed version.
+//
+// A suffix such as "-gzip" is sometimes used as a workaround for generating a
+// unique new ETag (see https://bz.apache.org/bugzilla/show_bug.cgi?id=39727).
+func SuffixETag(suffix string) option {
+	return func(c *config) {
+		c.suffixETag = suffix
+	}
+}
+
+// DropETag removes the ETag of responses which are compressed. If DropETag is
+// specified in conjunction with SuffixETag, this option will take precedence
+// and the ETag will be dropped.
+//
+// Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
+// the ETag of a compressed response must differ from it's uncompressed version.
+//
+// This workaround eliminates ETag conflicts between the compressed and
+// uncompressed versions by removing the ETag from the compressed version.
+func DropETag() option {
+	return func(c *config) {
+		c.dropETag = true
+	}
+}
+
+// acceptsGzip returns true if the given HTTP request indicates that it will
+// accept a gzipped response.
+func acceptsGzip(r *http.Request) bool {
+	// Note that we don't request this for HEAD requests,
+	// due to a bug in nginx:
+	//   https://trac.nginx.org/nginx/ticket/358
+	//   https://golang.org/issue/5522
+	return r.Method != http.MethodHead && parseEncodingGzip(r.Header.Get(acceptEncoding)) > 0
+}
+
+// returns true if we've been configured to compress the specific content type.
+func handleContentType(contentTypes []parsedContentType, ct string) bool {
+	// If contentTypes is empty we handle all content types.
+	if len(contentTypes) == 0 {
+		return true
+	}
+
+	mediaType, params, err := mime.ParseMediaType(ct)
+	if err != nil {
+		return false
+	}
+
+	for _, c := range contentTypes {
+		if c.equals(mediaType, params) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// parseEncodingGzip returns the qvalue of gzip compression.
+func parseEncodingGzip(s string) float64 {
+	s = strings.TrimSpace(s)
+
+	for len(s) > 0 {
+		stop := strings.IndexByte(s, ',')
+		if stop < 0 {
+			stop = len(s)
+		}
+		coding, qvalue, _ := parseCoding(s[:stop])
+
+		if coding == "gzip" {
+			return qvalue
+		}
+		if stop == len(s) {
+			break
+		}
+		s = s[stop+1:]
+	}
+	return 0
+}
+
+func parseEncodings(s string) (codings, error) {
+	split := strings.Split(s, ",")
+	c := make(codings, len(split))
+	var e []string
+
+	for _, ss := range split {
+		coding, qvalue, err := parseCoding(ss)
+
+		if err != nil {
+			e = append(e, err.Error())
+		} else {
+			c[coding] = qvalue
+		}
+	}
+
+	// TODO (adammck): Use a proper multi-error struct, so the individual errors
+	//                 can be extracted if anyone cares.
+	if len(e) > 0 {
+		return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
+	}
+
+	return c, nil
+}
+
+// parseCoding parses a single coding (content-coding with an optional qvalue),
+// as might appear in an Accept-Encoding header. It attempts to forgive minor
+// formatting errors.
+func parseCoding(s string) (coding string, qvalue float64, err error) {
+	for n, part := range strings.Split(s, ";") {
+		part = strings.TrimSpace(part)
+		qvalue = DefaultQValue
+
+		if n == 0 {
+			coding = strings.ToLower(part)
+		} else if strings.HasPrefix(part, "q=") {
+			qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
+
+			if qvalue < 0.0 {
+				qvalue = 0.0
+			} else if qvalue > 1.0 {
+				qvalue = 1.0
+			}
+		}
+	}
+
+	if coding == "" {
+		err = fmt.Errorf("empty content-coding")
+	}
+
+	return
+}
+
+// Don't compress any audio/video types.
+var excludePrefixDefault = []string{"video/", "audio/", "image/jp"}
+
+// Skip a bunch of compressed types that contains this string.
+// Curated by supposedly still active formats on https://en.wikipedia.org/wiki/List_of_archive_formats
+var excludeContainsDefault = []string{"compress", "zip", "snappy", "lzma", "xz", "zstd", "brotli", "stuffit"}
+
+// DefaultContentTypeFilter excludes common compressed audio, video and archive formats.
+func DefaultContentTypeFilter(ct string) bool {
+	ct = strings.TrimSpace(strings.ToLower(ct))
+	if ct == "" {
+		return true
+	}
+	for _, s := range excludeContainsDefault {
+		if strings.Contains(ct, s) {
+			return false
+		}
+	}
+
+	for _, prefix := range excludePrefixDefault {
+		if strings.HasPrefix(ct, prefix) {
+			return false
+		}
+	}
+	return true
+}
+
+// CompressAllContentTypeFilter will compress all mime types.
+func CompressAllContentTypeFilter(ct string) bool {
+	return true
+}
+
+const intSize = 32 << (^uint(0) >> 63)
+
+// atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
+func atoi(s string) (int, bool) {
+	sLen := len(s)
+	if intSize == 32 && (0 < sLen && sLen < 10) ||
+		intSize == 64 && (0 < sLen && sLen < 19) {
+		// Fast path for small integers that fit int type.
+		s0 := s
+		if s[0] == '-' || s[0] == '+' {
+			s = s[1:]
+			if len(s) < 1 {
+				return 0, false
+			}
+		}
+
+		n := 0
+		for _, ch := range []byte(s) {
+			ch -= '0'
+			if ch > 9 {
+				return 0, false
+			}
+			n = n*10 + int(ch)
+		}
+		if s0[0] == '-' {
+			n = -n
+		}
+		return n, true
+	}
+
+	// Slow path for invalid, big, or underscored integers.
+	i64, err := strconv.ParseInt(s, 10, 0)
+	return int(i64), err == nil
+}
+
+// newNoGzipResponseWriter will return a response writer that
+// cleans up compression artifacts.
+// Depending on whether http.Hijacker is supported the returned will as well.
+func newNoGzipResponseWriter(w http.ResponseWriter) http.ResponseWriter {
+	n := &NoGzipResponseWriter{ResponseWriter: w}
+	if hj, ok := w.(http.Hijacker); ok {
+		x := struct {
+			http.ResponseWriter
+			http.Hijacker
+			http.Flusher
+		}{
+			ResponseWriter: n,
+			Hijacker:       hj,
+			Flusher:        n,
+		}
+		return x
+	}
+
+	return n
+}
+
+// NoGzipResponseWriter filters out HeaderNoCompression.
+type NoGzipResponseWriter struct {
+	http.ResponseWriter
+	hdrCleaned bool
+}
+
+func (n *NoGzipResponseWriter) CloseNotify() <-chan bool {
+	if cn, ok := n.ResponseWriter.(http.CloseNotifier); ok {
+		return cn.CloseNotify()
+	}
+	return nil
+}
+
+func (n *NoGzipResponseWriter) Flush() {
+	if !n.hdrCleaned {
+		n.ResponseWriter.Header().Del(HeaderNoCompression)
+		n.hdrCleaned = true
+	}
+	if f, ok := n.ResponseWriter.(http.Flusher); ok {
+		f.Flush()
+	}
+}
+
+func (n *NoGzipResponseWriter) Header() http.Header {
+	return n.ResponseWriter.Header()
+}
+
+func (n *NoGzipResponseWriter) Write(bytes []byte) (int, error) {
+	if !n.hdrCleaned {
+		n.ResponseWriter.Header().Del(HeaderNoCompression)
+		n.hdrCleaned = true
+	}
+	return n.ResponseWriter.Write(bytes)
+}
+
+func (n *NoGzipResponseWriter) WriteHeader(statusCode int) {
+	if !n.hdrCleaned {
+		n.ResponseWriter.Header().Del(HeaderNoCompression)
+		n.hdrCleaned = true
+	}
+	n.ResponseWriter.WriteHeader(statusCode)
+}
--- a/vendor/github.com/klauspost/compress/gzhttp/transport.go
+++ b/vendor/github.com/klauspost/compress/gzhttp/transport.go
@ -0,0 +1,211 @@
+// Copyright (c) 2021 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gzhttp
+
+import (
+	"io"
+	"net/http"
+	"strings"
+	"sync"
+
+	"github.com/klauspost/compress/gzip"
+	"github.com/klauspost/compress/zstd"
+)
+
+// Transport will wrap a transport with a custom handler
+// that will request gzip and automatically decompress it.
+// Using this is significantly faster than using the default transport.
+func Transport(parent http.RoundTripper, opts ...transportOption) http.RoundTripper {
+	g := gzRoundtripper{parent: parent, withZstd: true, withGzip: true}
+	for _, o := range opts {
+		o(&g)
+	}
+	var ae []string
+	if g.withZstd {
+		ae = append(ae, "zstd")
+	}
+	if g.withGzip {
+		ae = append(ae, "gzip")
+	}
+	g.acceptEncoding = strings.Join(ae, ",")
+	return &g
+}
+
+type transportOption func(c *gzRoundtripper)
+
+// TransportEnableZstd will send Zstandard as a compression option to the server.
+// Enabled by default, but may be disabled if future problems arise.
+func TransportEnableZstd(b bool) transportOption {
+	return func(c *gzRoundtripper) {
+		c.withZstd = b
+	}
+}
+
+// TransportEnableGzip will send Gzip as a compression option to the server.
+// Enabled by default.
+func TransportEnableGzip(b bool) transportOption {
+	return func(c *gzRoundtripper) {
+		c.withGzip = b
+	}
+}
+
+type gzRoundtripper struct {
+	parent             http.RoundTripper
+	acceptEncoding     string
+	withZstd, withGzip bool
+}
+
+func (g *gzRoundtripper) RoundTrip(req *http.Request) (*http.Response, error) {
+	var requestedComp bool
+	if req.Header.Get("Accept-Encoding") == "" &&
+		req.Header.Get("Range") == "" &&
+		req.Method != "HEAD" {
+		// Request gzip only, not deflate. Deflate is ambiguous and
+		// not as universally supported anyway.
+		// See: https://zlib.net/zlib_faq.html#faq39
+		//
+		// Note that we don't request this for HEAD requests,
+		// due to a bug in nginx:
+		//   https://trac.nginx.org/nginx/ticket/358
+		//   https://golang.org/issue/5522
+		//
+		// We don't request gzip if the request is for a range, since
+		// auto-decoding a portion of a gzipped document will just fail
+		// anyway. See https://golang.org/issue/8923
+		requestedComp = len(g.acceptEncoding) > 0
+		req.Header.Set("Accept-Encoding", g.acceptEncoding)
+	}
+
+	resp, err := g.parent.RoundTrip(req)
+	if err != nil || !requestedComp {
+		return resp, err
+	}
+
+	// Decompress
+	if g.withGzip && asciiEqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
+		resp.Body = &gzipReader{body: resp.Body}
+		resp.Header.Del("Content-Encoding")
+		resp.Header.Del("Content-Length")
+		resp.ContentLength = -1
+		resp.Uncompressed = true
+	}
+	if g.withZstd && asciiEqualFold(resp.Header.Get("Content-Encoding"), "zstd") {
+		resp.Body = &zstdReader{body: resp.Body}
+		resp.Header.Del("Content-Encoding")
+		resp.Header.Del("Content-Length")
+		resp.ContentLength = -1
+		resp.Uncompressed = true
+	}
+
+	return resp, nil
+}
+
+var gzReaderPool sync.Pool
+
+// gzipReader wraps a response body so it can lazily
+// call gzip.NewReader on the first call to Read
+type gzipReader struct {
+	body io.ReadCloser // underlying HTTP/1 response body framing
+	zr   *gzip.Reader  // lazily-initialized gzip reader
+	zerr error         // any error from gzip.NewReader; sticky
+}
+
+func (gz *gzipReader) Read(p []byte) (n int, err error) {
+	if gz.zr == nil {
+		if gz.zerr == nil {
+			zr, ok := gzReaderPool.Get().(*gzip.Reader)
+			if ok {
+				gz.zr, gz.zerr = zr, zr.Reset(gz.body)
+			} else {
+				gz.zr, gz.zerr = gzip.NewReader(gz.body)
+			}
+		}
+		if gz.zerr != nil {
+			return 0, gz.zerr
+		}
+	}
+
+	return gz.zr.Read(p)
+}
+
+func (gz *gzipReader) Close() error {
+	if gz.zr != nil {
+		gzReaderPool.Put(gz.zr)
+		gz.zr = nil
+	}
+	return gz.body.Close()
+}
+
+// asciiEqualFold is strings.EqualFold, ASCII only. It reports whether s and t
+// are equal, ASCII-case-insensitively.
+func asciiEqualFold(s, t string) bool {
+	if len(s) != len(t) {
+		return false
+	}
+	for i := 0; i < len(s); i++ {
+		if lower(s[i]) != lower(t[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+// lower returns the ASCII lowercase version of b.
+func lower(b byte) byte {
+	if 'A' <= b && b <= 'Z' {
+		return b + ('a' - 'A')
+	}
+	return b
+}
+
+// zstdReaderPool pools zstd decoders.
+var zstdReaderPool sync.Pool
+
+// zstdReader wraps a response body so it can lazily
+// call gzip.NewReader on the first call to Read
+type zstdReader struct {
+	body io.ReadCloser // underlying HTTP/1 response body framing
+	zr   *zstd.Decoder // lazily-initialized gzip reader
+	zerr error         // any error from zstd.NewReader; sticky
+}
+
+func (zr *zstdReader) Read(p []byte) (n int, err error) {
+	if zr.zerr != nil {
+		return 0, zr.zerr
+	}
+	if zr.zr == nil {
+		if zr.zerr == nil {
+			reader, ok := zstdReaderPool.Get().(*zstd.Decoder)
+			if ok {
+				zr.zerr = reader.Reset(zr.body)
+				zr.zr = reader
+			} else {
+				zr.zr, zr.zerr = zstd.NewReader(zr.body, zstd.WithDecoderLowmem(true), zstd.WithDecoderMaxWindow(32<<20), zstd.WithDecoderConcurrency(1))
+			}
+		}
+		if zr.zerr != nil {
+			return 0, zr.zerr
+		}
+	}
+	n, err = zr.zr.Read(p)
+	if err != nil {
+		// Usually this will be io.EOF,
+		// stash the decoder and keep the error.
+		zr.zr.Reset(nil)
+		zstdReaderPool.Put(zr.zr)
+		zr.zr = nil
+		zr.zerr = err
+	}
+	return
+}
+
+func (zr *zstdReader) Close() error {
+	if zr.zr != nil {
+		zr.zr.Reset(nil)
+		zstdReaderPool.Put(zr.zr)
+		zr.zr = nil
+	}
+	return zr.body.Close()
+}
--- a/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go
+++ b/vendor/github.com/klauspost/compress/gzhttp/writer/gzkp/gzkp.go
@ -0,0 +1,70 @@
+// package gzkp provides gzip compression through github.com/klauspost/compress/gzip.
+
+package gzkp
+
+import (
+	"io"
+	"sync"
+
+	"github.com/klauspost/compress/gzhttp/writer"
+	"github.com/klauspost/compress/gzip"
+)
+
+// gzipWriterPools stores a sync.Pool for each compression level for reuse of
+// gzip.Writers. Use poolIndex to covert a compression level to an index into
+// gzipWriterPools.
+var gzipWriterPools [gzip.BestCompression - gzip.StatelessCompression + 1]*sync.Pool
+
+func init() {
+	for i := gzip.StatelessCompression; i <= gzip.BestCompression; i++ {
+		addLevelPool(i)
+	}
+}
+
+// poolIndex maps a compression level to its index into gzipWriterPools. It
+// assumes that level is a valid gzip compression level.
+func poolIndex(level int) int {
+	return level - gzip.StatelessCompression
+}
+
+func addLevelPool(level int) {
+	gzipWriterPools[poolIndex(level)] = &sync.Pool{
+		New: func() interface{} {
+			// NewWriterLevel only returns error on a bad level, we are guaranteeing
+			// that this will be a valid level so it is okay to ignore the returned
+			// error.
+			w, _ := gzip.NewWriterLevel(nil, level)
+			return w
+		},
+	}
+}
+
+type pooledWriter struct {
+	*gzip.Writer
+	index int
+}
+
+func (pw *pooledWriter) Close() error {
+	err := pw.Writer.Close()
+	gzipWriterPools[pw.index].Put(pw.Writer)
+	pw.Writer = nil
+	return err
+}
+
+func NewWriter(w io.Writer, level int) writer.GzipWriter {
+	index := poolIndex(level)
+	gzw := gzipWriterPools[index].Get().(*gzip.Writer)
+	gzw.Reset(w)
+	return &pooledWriter{
+		Writer: gzw,
+		index:  index,
+	}
+}
+
+func Levels() (min, max int) {
+	return gzip.StatelessCompression, gzip.BestCompression
+}
+
+func ImplementationInfo() string {
+	return "klauspost/compress/gzip"
+}
--- a/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go
+++ b/vendor/github.com/klauspost/compress/gzhttp/writer/interface.go
@ -0,0 +1,20 @@
+package writer
+
+import "io"
+
+// GzipWriter implements the functions needed for compressing content.
+type GzipWriter interface {
+	Write(p []byte) (int, error)
+	Close() error
+	Flush() error
+}
+
+// GzipWriterFactory contains the information needed for custom gzip implementations.
+type GzipWriterFactory struct {
+	// Must return the minimum and maximum supported level.
+	Levels func() (min, max int)
+
+	// New must return a new GzipWriter.
+	// level will always be within the return limits above.
+	New func(writer io.Writer, level int) GzipWriter
+}
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@ -61,7 +61,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
 		b, err := fse.Decompress(in[:iSize], s.fse)
 		s.fse.Out = nil
 		if err != nil {
-			return s, nil, err
+			return s, nil, fmt.Errorf("fse decompress returned: %w", err)
 		}
 		if len(b) > 255 {
 			return s, nil, errors.New("corrupt input: output table too large")
--- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
+++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
@ -103,6 +103,28 @@ func hash(u, shift uint32) uint32 {
 	return (u * 0x1e35a7bd) >> shift
 }

+// EncodeBlockInto exposes encodeBlock but checks dst size.
+func EncodeBlockInto(dst, src []byte) (d int) {
+	if MaxEncodedLen(len(src)) > len(dst) {
+		return 0
+	}
+
+	// encodeBlock breaks on too big blocks, so split.
+	for len(src) > 0 {
+		p := src
+		src = nil
+		if len(p) > maxBlockSize {
+			p, src = p[:maxBlockSize], p[maxBlockSize:]
+		}
+		if len(p) < minNonLiteralBlockSize {
+			d += emitLiteral(dst[d:], p)
+		} else {
+			d += encodeBlock(dst[d:], p)
+		}
+	}
+	return d
+}
+
 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
--- a/vendor/github.com/klauspost/compress/s2/README.md
+++ b/vendor/github.com/klauspost/compress/s2/README.md
@ -20,11 +20,12 @@ This is important, so you don't have to worry about spending CPU cycles on alrea
 * Concurrent stream compression
 * Faster decompression, even for Snappy compatible content
 * Concurrent Snappy/S2 stream decompression
-* Ability to quickly skip forward in compressed stream
+* Skip forward in compressed stream
 * Random seeking with indexes
 * Compatible with reading Snappy compressed content
 * Smaller block size overhead on incompressible blocks
 * Block concatenation
+* Block Dictionary support
 * Uncompressed stream mode
 * Automatic stream size padding
 * Snappy compatible block compression
@ -594,6 +595,123 @@ Best...    10737418240 -> 4210602774 [39.21%]; 42.96s, 254.4MB/s

 Decompression speed should be around the same as using the 'better' compression mode. 

+## Dictionaries
+
+*Note: S2 dictionary compression is currently at an early implementation stage, with no assembly for
+neither encoding nor decoding. Performance improvements can be expected in the future.*
+
+Adding dictionaries allow providing a custom dictionary that will serve as lookup in the beginning of blocks.
+
+The same dictionary *must* be used for both encoding and decoding. 
+S2 does not keep track of whether the same dictionary is used,
+and using the wrong dictionary will most often not result in an error when decompressing.
+
+Blocks encoded *without* dictionaries can be decompressed seamlessly *with* a dictionary.
+This means it is possible to switch from an encoding without dictionaries to an encoding with dictionaries
+and treat the blocks similarly.
+
+Similar to [zStandard dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression), 
+the same usage scenario applies to S2 dictionaries.  
+
+> Training works if there is some correlation in a family of small data samples. The more data-specific a dictionary is, the more efficient it is (there is no universal dictionary). Hence, deploying one dictionary per type of data will provide the greatest benefits. Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm will gradually use previously decoded content to better compress the rest of the file.
+
+S2 further limits the dictionary to only be enabled on the first 64KB of a block.
+This will remove any negative (speed) impacts of the dictionaries on bigger blocks. 
+
+### Compression
+
+Using the [github_users_sample_set](https://github.com/facebook/zstd/releases/download/v1.1.3/github_users_sample_set.tar.zst) 
+and a 64KB dictionary trained with zStandard the following sizes can be achieved. 
+
+|                    | Default          | Better           | Best                  |
+|--------------------|------------------|------------------|-----------------------|
+| Without Dictionary | 3362023 (44.92%) | 3083163 (41.19%) | 3057944 (40.86%)      |
+| With Dictionary    | 921524 (12.31%)  | 873154 (11.67%)  | 785503 bytes (10.49%) |
+
+So for highly repetitive content, this case provides an almost 3x reduction in size.
+
+For less uniform data we will use the Go source code tree.
+Compressing First 64KB of all `.go` files in `go/src`, Go 1.19.5, 8912 files, 51253563 bytes input:
+
+|                    | Default           | Better            | Best              |
+|--------------------|-------------------|-------------------|-------------------|
+| Without Dictionary | 22955767 (44.79%) | 20189613 (39.39%  | 19482828 (38.01%) |
+| With Dictionary    | 19654568 (38.35%) | 16289357 (31.78%) | 15184589 (29.63%) |
+| Saving/file        | 362 bytes         | 428 bytes         | 472 bytes         |
+
+
+### Creating Dictionaries
+
+There are no tools to create dictionaries in S2. 
+However, there are multiple ways to create a useful dictionary:
+
+#### Using a Sample File
+
+If your input is very uniform, you can just use a sample file as the dictionary.
+
+For example in the `github_users_sample_set` above, the average compression only goes up from 
+10.49% to 11.48% by using the first file as dictionary compared to using a dedicated dictionary.
+
+```Go
+    // Read a sample
+    sample, err := os.ReadFile("sample.json")
+
+    // Create a dictionary.
+    dict := s2.MakeDict(sample, nil)
+	
+    // b := dict.Bytes() will provide a dictionary that can be saved
+    // and reloaded with s2.NewDict(b).
+	
+    // To encode:
+    encoded := dict.Encode(nil, file)
+
+    // To decode:
+    decoded, err := dict.Decode(nil, file)
+```
+
+#### Using Zstandard
+
+Zstandard dictionaries can easily be converted to S2 dictionaries.
+
+This can be helpful to generate dictionaries for files that don't have a fixed structure.
+
+
+Example, with training set files  placed in `./training-set`: 
+
+`λ zstd -r --train-fastcover training-set/* --maxdict=65536 -o name.dict`
+
+This will create a dictionary of 64KB, that can be converted to a dictionary like this:
+
+```Go
+    // Decode the Zstandard dictionary.
+    insp, err := zstd.InspectDictionary(zdict)
+    if err != nil {
+        panic(err)
+    }
+	
+    // We are only interested in the contents.
+    // Assume that files start with "// Copyright (c) 2023".
+    // Search for the longest match for that.
+    // This may save a few bytes.
+    dict := s2.MakeDict(insp.Content(), []byte("// Copyright (c) 2023"))
+
+    // b := dict.Bytes() will provide a dictionary that can be saved
+    // and reloaded with s2.NewDict(b).
+
+    // We can now encode using this dictionary
+    encodedWithDict := dict.Encode(nil, payload)
+
+    // To decode content:
+    decoded, err := dict.Decode(nil, encodedWithDict)
+```
+
+It is recommended to save the dictionary returned by ` b:= dict.Bytes()`, since that will contain only the S2 dictionary.
+
+This dictionary can later be loaded using `s2.NewDict(b)`. The dictionary then no longer requires `zstd` to be initialized.
+
+Also note how `s2.MakeDict` allows you to search for a common starting sequence of your files.
+This can be omitted, at the expense of a few bytes.
+
 # Snappy Compatibility

 S2 now offers full compatibility with Snappy.
@ -929,6 +1047,72 @@ The first copy of a block cannot be a repeat offset and the offset is reset on e

 Default streaming block size is 1MB.

+# Dictionary Encoding
+
+Adding dictionaries allow providing a custom dictionary that will serve as lookup in the beginning of blocks.
+
+A dictionary provides an initial repeat value that can be used to point to a common header.
+
+Other than that the dictionary contains values that can be used as back-references.
+
+Often used data should be placed at the *end* of the dictionary since offsets < 2048 bytes will be smaller.
+
+## Format
+
+Dictionary *content* must at least 16 bytes and less or equal to 64KiB (65536 bytes).
+
+Encoding: `[repeat value (uvarint)][dictionary content...]`
+
+Before the dictionary content, an unsigned base-128 (uvarint) encoded value specifying the initial repeat offset.
+This value is an offset into the dictionary content and not a back-reference offset,
+so setting this to 0 will make the repeat value point to the first value of the dictionary.
+
+The value must be less than the dictionary length-8
+
+## Encoding
+
+From the decoder point of view the dictionary content is seen as preceding the encoded content.
+
+`[dictionary content][decoded output]`
+
+Backreferences to the dictionary are encoded as ordinary backreferences that have an offset before the start of the decoded block.
+
+Matches copying from the dictionary are **not** allowed to cross from the dictionary into the decoded data.
+However, if a copy ends at the end of the dictionary the next repeat will point to the start of the decoded buffer, which is allowed.
+
+The first match can be a repeat value, which will use the repeat offset stored in the dictionary.
+
+When 64KB (65536 bytes) has been en/decoded it is no longer allowed to reference the dictionary, 
+neither by a copy nor repeat operations. 
+If the boundary is crossed while copying from the dictionary, the operation should complete, 
+but the next instruction is not allowed to reference the dictionary.
+
+Valid blocks encoded *without* a dictionary can be decoded with any dictionary. 
+There are no checks whether the supplied dictionary is the correct for a block.
+Because of this there is no overhead by using a dictionary.
+
+## Example
+
+This is the dictionary content. Elements are separated by `[]`.
+
+Dictionary: `[0x0a][Yesterday 25 bananas were added to Benjamins brown bag]`.
+
+Initial repeat offset is set at 10, which is the letter `2`.
+
+Encoded `[LIT "10"][REPEAT len=10][LIT "hich"][MATCH off=50 len=6][MATCH off=31 len=6][MATCH off=61 len=10]`
+
+Decoded: `[10][ bananas w][hich][ were ][brown ][were added]`
+
+Output: `10 bananas which were brown were added`
+
+
+## Streams
+
+For streams each block can use the dictionary.
+
+The dictionary cannot not currently be provided on the stream.
+
+
 # LICENSE

 This code is based on the [Snappy-Go](https://github.com/golang/snappy) implementation.
--- a/vendor/github.com/klauspost/compress/s2/decode.go
+++ b/vendor/github.com/klauspost/compress/s2/decode.go
@ -13,6 +13,7 @@ import (
 	"io/ioutil"
 	"math"
 	"runtime"
+	"strconv"
 	"sync"
 )

@ -880,15 +881,20 @@ func (r *Reader) Skip(n int64) error {
 // See Reader.ReadSeeker
 type ReadSeeker struct {
 	*Reader
+	readAtMu sync.Mutex
 }

-// ReadSeeker will return an io.ReadSeeker compatible version of the reader.
+// ReadSeeker will return an io.ReadSeeker and io.ReaderAt
+// compatible version of the reader.
 // If 'random' is specified the returned io.Seeker can be used for
 // random seeking, otherwise only forward seeking is supported.
 // Enabling random seeking requires the original input to support
 // the io.Seeker interface.
 // A custom index can be specified which will be used if supplied.
 // When using a custom index, it will not be read from the input stream.
+// The ReadAt position will affect regular reads and the current position of Seek.
+// So using Read after ReadAt will continue from where the ReadAt stopped.
+// No functions should be used concurrently.
 // The returned ReadSeeker contains a shallow reference to the existing Reader,
 // meaning changes performed to one is reflected in the other.
 func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
@ -958,42 +964,55 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
 		// Reset on EOF
 		r.err = nil
 	}
-	if offset == 0 && whence == io.SeekCurrent {
-		return r.blockStart + int64(r.i), nil
-	}
-	if !r.readHeader {
-		// Make sure we read the header.
-		_, r.err = r.Read([]byte{})
-	}
-	rs, ok := r.r.(io.ReadSeeker)
-	if r.index == nil || !ok {
-		if whence == io.SeekCurrent && offset >= 0 {
-			err := r.Skip(offset)
-			return r.blockStart + int64(r.i), err
-		}
-		if whence == io.SeekStart && offset >= r.blockStart+int64(r.i) {
-			err := r.Skip(offset - r.blockStart - int64(r.i))
-			return r.blockStart + int64(r.i), err
-		}
-		return 0, ErrUnsupported

-	}
+	// Calculate absolute offset.
+	absOffset := offset

 	switch whence {
+	case io.SeekStart:
 	case io.SeekCurrent:
-		offset += r.blockStart + int64(r.i)
+		absOffset = r.blockStart + int64(r.i) + offset
 	case io.SeekEnd:
-		if offset > 0 {
-			return 0, errors.New("seek after end of file")
+		if r.index == nil {
+			return 0, ErrUnsupported
 		}
-		offset = r.index.TotalUncompressed + offset
+		absOffset = r.index.TotalUncompressed + offset
+	default:
+		r.err = ErrUnsupported
+		return 0, r.err
 	}

-	if offset < 0 {
+	if absOffset < 0 {
 		return 0, errors.New("seek before start of file")
 	}

-	c, u, err := r.index.Find(offset)
+	if !r.readHeader {
+		// Make sure we read the header.
+		_, r.err = r.Read([]byte{})
+		if r.err != nil {
+			return 0, r.err
+		}
+	}
+
+	// If we are inside current block no need to seek.
+	// This includes no offset changes.
+	if absOffset >= r.blockStart && absOffset < r.blockStart+int64(r.j) {
+		r.i = int(absOffset - r.blockStart)
+		return r.blockStart + int64(r.i), nil
+	}
+
+	rs, ok := r.r.(io.ReadSeeker)
+	if r.index == nil || !ok {
+		currOffset := r.blockStart + int64(r.i)
+		if absOffset >= currOffset {
+			err := r.Skip(absOffset - currOffset)
+			return r.blockStart + int64(r.i), err
+		}
+		return 0, ErrUnsupported
+	}
+
+	// We can seek and we have an index.
+	c, u, err := r.index.Find(absOffset)
 	if err != nil {
 		return r.blockStart + int64(r.i), err
 	}
@ -1005,11 +1024,56 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
 	}

 	r.i = r.j                     // Remove rest of current block.
-	if u < offset {
+	r.blockStart = u - int64(r.j) // Adjust current block start for accounting.
+	if u < absOffset {
 		// Forward inside block
-		return offset, r.Skip(offset - u)
+		return absOffset, r.Skip(absOffset - u)
 	}
-	return offset, nil
+	if u > absOffset {
+		return 0, fmt.Errorf("s2 seek: (internal error) u (%d) > absOffset (%d)", u, absOffset)
+	}
+	return absOffset, nil
+}
+
+// ReadAt reads len(p) bytes into p starting at offset off in the
+// underlying input source. It returns the number of bytes
+// read (0 <= n <= len(p)) and any error encountered.
+//
+// When ReadAt returns n < len(p), it returns a non-nil error
+// explaining why more bytes were not returned. In this respect,
+// ReadAt is stricter than Read.
+//
+// Even if ReadAt returns n < len(p), it may use all of p as scratch
+// space during the call. If some data is available but not len(p) bytes,
+// ReadAt blocks until either all the data is available or an error occurs.
+// In this respect ReadAt is different from Read.
+//
+// If the n = len(p) bytes returned by ReadAt are at the end of the
+// input source, ReadAt may return either err == EOF or err == nil.
+//
+// If ReadAt is reading from an input source with a seek offset,
+// ReadAt should not affect nor be affected by the underlying
+// seek offset.
+//
+// Clients of ReadAt can execute parallel ReadAt calls on the
+// same input source. This is however not recommended.
+func (r *ReadSeeker) ReadAt(p []byte, offset int64) (int, error) {
+	r.readAtMu.Lock()
+	defer r.readAtMu.Unlock()
+	_, err := r.Seek(offset, io.SeekStart)
+	if err != nil {
+		return 0, err
+	}
+	n := 0
+	for n < len(p) {
+		n2, err := r.Read(p[n:])
+		if err != nil {
+			// This will include io.EOF
+			return n + n2, err
+		}
+		n += n2
+	}
+	return n, nil
 }

 // ReadByte satisfies the io.ByteReader interface.
@ -1048,3 +1112,370 @@ func (r *Reader) SkippableCB(id uint8, fn func(r io.Reader) error) error {
 	r.skippableCB[id] = fn
 	return nil
 }
+
+// s2DecodeDict writes the decoding of src to dst. It assumes that the varint-encoded
+// length of the decompressed bytes has already been read, and that len(dst)
+// equals that length.
+//
+// It returns 0 on success or a decodeErrCodeXxx error code on failure.
+func s2DecodeDict(dst, src []byte, dict *Dict) int {
+	if dict == nil {
+		return s2Decode(dst, src)
+	}
+	const debug = false
+	const debugErrs = debug
+
+	if debug {
+		fmt.Println("Starting decode, dst len:", len(dst))
+	}
+	var d, s, length int
+	offset := len(dict.dict) - dict.repeat
+
+	// As long as we can read at least 5 bytes...
+	for s < len(src)-5 {
+		// Removing bounds checks is SLOWER, when if doing
+		// in := src[s:s+5]
+		// Checked on Go 1.18
+		switch src[s] & 0x03 {
+		case tagLiteral:
+			x := uint32(src[s] >> 2)
+			switch {
+			case x < 60:
+				s++
+			case x == 60:
+				s += 2
+				x = uint32(src[s-1])
+			case x == 61:
+				in := src[s : s+3]
+				x = uint32(in[1]) | uint32(in[2])<<8
+				s += 3
+			case x == 62:
+				in := src[s : s+4]
+				// Load as 32 bit and shift down.
+				x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
+				x >>= 8
+				s += 4
+			case x == 63:
+				in := src[s : s+5]
+				x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
+				s += 5
+			}
+			length = int(x) + 1
+			if debug {
+				fmt.Println("literals, length:", length, "d-after:", d+length)
+			}
+			if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
+				if debugErrs {
+					fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
+				}
+				return decodeErrCodeCorrupt
+			}
+
+			copy(dst[d:], src[s:s+length])
+			d += length
+			s += length
+			continue
+
+		case tagCopy1:
+			s += 2
+			toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+			length = int(src[s-2]) >> 2 & 0x7
+			if toffset == 0 {
+				if debug {
+					fmt.Print("(repeat) ")
+				}
+				// keep last offset
+				switch length {
+				case 5:
+					length = int(src[s]) + 4
+					s += 1
+				case 6:
+					in := src[s : s+2]
+					length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
+					s += 2
+				case 7:
+					in := src[s : s+3]
+					length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
+					s += 3
+				default: // 0-> 4
+				}
+			} else {
+				offset = toffset
+			}
+			length += 4
+		case tagCopy2:
+			in := src[s : s+3]
+			offset = int(uint32(in[1]) | uint32(in[2])<<8)
+			length = 1 + int(in[0])>>2
+			s += 3
+
+		case tagCopy4:
+			in := src[s : s+5]
+			offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
+			length = 1 + int(in[0])>>2
+			s += 5
+		}
+
+		if offset <= 0 || length > len(dst)-d {
+			if debugErrs {
+				fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
+			}
+			return decodeErrCodeCorrupt
+		}
+
+		// copy from dict
+		if d < offset {
+			if d > MaxDictSrcOffset {
+				if debugErrs {
+					fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
+				}
+				return decodeErrCodeCorrupt
+			}
+			startOff := len(dict.dict) - offset + d
+			if startOff < 0 || startOff+length > len(dict.dict) {
+				if debugErrs {
+					fmt.Printf("offset (%d) + length (%d) bigger than dict (%d)\n", offset, length, len(dict.dict))
+				}
+				return decodeErrCodeCorrupt
+			}
+			if debug {
+				fmt.Println("dict copy, length:", length, "offset:", offset, "d-after:", d+length, "dict start offset:", startOff)
+			}
+			copy(dst[d:d+length], dict.dict[startOff:])
+			d += length
+			continue
+		}
+
+		if debug {
+			fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
+		}
+
+		// Copy from an earlier sub-slice of dst to a later sub-slice.
+		// If no overlap, use the built-in copy:
+		if offset > length {
+			copy(dst[d:d+length], dst[d-offset:])
+			d += length
+			continue
+		}
+
+		// Unlike the built-in copy function, this byte-by-byte copy always runs
+		// forwards, even if the slices overlap. Conceptually, this is:
+		//
+		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
+		//
+		// We align the slices into a and b and show the compiler they are the same size.
+		// This allows the loop to run without bounds checks.
+		a := dst[d : d+length]
+		b := dst[d-offset:]
+		b = b[:len(a)]
+		for i := range a {
+			a[i] = b[i]
+		}
+		d += length
+	}
+
+	// Remaining with extra checks...
+	for s < len(src) {
+		switch src[s] & 0x03 {
+		case tagLiteral:
+			x := uint32(src[s] >> 2)
+			switch {
+			case x < 60:
+				s++
+			case x == 60:
+				s += 2
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					if debugErrs {
+						fmt.Println("src went oob")
+					}
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-1])
+			case x == 61:
+				s += 3
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					if debugErrs {
+						fmt.Println("src went oob")
+					}
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-2]) | uint32(src[s-1])<<8
+			case x == 62:
+				s += 4
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					if debugErrs {
+						fmt.Println("src went oob")
+					}
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+			case x == 63:
+				s += 5
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					if debugErrs {
+						fmt.Println("src went oob")
+					}
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+			}
+			length = int(x) + 1
+			if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
+				if debugErrs {
+					fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
+				}
+				return decodeErrCodeCorrupt
+			}
+			if debug {
+				fmt.Println("literals, length:", length, "d-after:", d+length)
+			}
+
+			copy(dst[d:], src[s:s+length])
+			d += length
+			s += length
+			continue
+
+		case tagCopy1:
+			s += 2
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				if debugErrs {
+					fmt.Println("src went oob")
+				}
+				return decodeErrCodeCorrupt
+			}
+			length = int(src[s-2]) >> 2 & 0x7
+			toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+			if toffset == 0 {
+				if debug {
+					fmt.Print("(repeat) ")
+				}
+				// keep last offset
+				switch length {
+				case 5:
+					s += 1
+					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+						if debugErrs {
+							fmt.Println("src went oob")
+						}
+						return decodeErrCodeCorrupt
+					}
+					length = int(uint32(src[s-1])) + 4
+				case 6:
+					s += 2
+					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+						if debugErrs {
+							fmt.Println("src went oob")
+						}
+						return decodeErrCodeCorrupt
+					}
+					length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
+				case 7:
+					s += 3
+					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+						if debugErrs {
+							fmt.Println("src went oob")
+						}
+						return decodeErrCodeCorrupt
+					}
+					length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
+				default: // 0-> 4
+				}
+			} else {
+				offset = toffset
+			}
+			length += 4
+		case tagCopy2:
+			s += 3
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				if debugErrs {
+					fmt.Println("src went oob")
+				}
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-3])>>2
+			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+
+		case tagCopy4:
+			s += 5
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				if debugErrs {
+					fmt.Println("src went oob")
+				}
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-5])>>2
+			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+		}
+
+		if offset <= 0 || length > len(dst)-d {
+			if debugErrs {
+				fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
+			}
+			return decodeErrCodeCorrupt
+		}
+
+		// copy from dict
+		if d < offset {
+			if d > MaxDictSrcOffset {
+				if debugErrs {
+					fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
+				}
+				return decodeErrCodeCorrupt
+			}
+			rOff := len(dict.dict) - (offset - d)
+			if debug {
+				fmt.Println("starting dict entry from dict offset", len(dict.dict)-rOff)
+			}
+			if rOff+length > len(dict.dict) {
+				if debugErrs {
+					fmt.Println("err: END offset", rOff+length, "bigger than dict", len(dict.dict), "dict offset:", rOff, "length:", length)
+				}
+				return decodeErrCodeCorrupt
+			}
+			if rOff < 0 {
+				if debugErrs {
+					fmt.Println("err: START offset", rOff, "less than 0", len(dict.dict), "dict offset:", rOff, "length:", length)
+				}
+				return decodeErrCodeCorrupt
+			}
+			copy(dst[d:d+length], dict.dict[rOff:])
+			d += length
+			continue
+		}
+
+		if debug {
+			fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
+		}
+
+		// Copy from an earlier sub-slice of dst to a later sub-slice.
+		// If no overlap, use the built-in copy:
+		if offset > length {
+			copy(dst[d:d+length], dst[d-offset:])
+			d += length
+			continue
+		}
+
+		// Unlike the built-in copy function, this byte-by-byte copy always runs
+		// forwards, even if the slices overlap. Conceptually, this is:
+		//
+		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
+		//
+		// We align the slices into a and b and show the compiler they are the same size.
+		// This allows the loop to run without bounds checks.
+		a := dst[d : d+length]
+		b := dst[d-offset:]
+		b = b[:len(a)]
+		for i := range a {
+			a[i] = b[i]
+		}
+		d += length
+	}
+
+	if d != len(dst) {
+		if debugErrs {
+			fmt.Println("wanted length", len(dst), "got", d)
+		}
+		return decodeErrCodeCorrupt
+	}
+	return 0
+}
--- a/vendor/github.com/klauspost/compress/s2/decode_other.go
+++ b/vendor/github.com/klauspost/compress/s2/decode_other.go
@ -57,6 +57,9 @@ func s2Decode(dst, src []byte) int {
 			}
 			length = int(x) + 1
 			if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
+				if debug {
+					fmt.Println("corrupt: lit size", length)
+				}
 				return decodeErrCodeCorrupt
 			}
 			if debug {
@ -109,6 +112,10 @@ func s2Decode(dst, src []byte) int {
 		}

 		if offset <= 0 || d < offset || length > len(dst)-d {
+			if debug {
+				fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
+			}
+
 			return decodeErrCodeCorrupt
 		}

@ -175,6 +182,9 @@ func s2Decode(dst, src []byte) int {
 			}
 			length = int(x) + 1
 			if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
+				if debug {
+					fmt.Println("corrupt: lit size", length)
+				}
 				return decodeErrCodeCorrupt
 			}
 			if debug {
@ -241,6 +251,9 @@ func s2Decode(dst, src []byte) int {
 		}

 		if offset <= 0 || d < offset || length > len(dst)-d {
+			if debug {
+				fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
+			}
 			return decodeErrCodeCorrupt
 		}

--- a/vendor/github.com/klauspost/compress/s2/dict.go
+++ b/vendor/github.com/klauspost/compress/s2/dict.go
@ -0,0 +1,331 @@
+// Copyright (c) 2022+ Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s2
+
+import (
+	"bytes"
+	"encoding/binary"
+	"sync"
+)
+
+const (
+	// MinDictSize is the minimum dictionary size when repeat has been read.
+	MinDictSize = 16
+
+	// MaxDictSize is the maximum dictionary size when repeat has been read.
+	MaxDictSize = 65536
+
+	// MaxDictSrcOffset is the maximum offset where a dictionary entry can start.
+	MaxDictSrcOffset = 65535
+)
+
+// Dict contains a dictionary that can be used for encoding and decoding s2
+type Dict struct {
+	dict   []byte
+	repeat int // Repeat as index of dict
+
+	fast, better, best sync.Once
+	fastTable          *[1 << 14]uint16
+
+	betterTableShort *[1 << 14]uint16
+	betterTableLong  *[1 << 17]uint16
+
+	bestTableShort *[1 << 16]uint32
+	bestTableLong  *[1 << 19]uint32
+}
+
+// NewDict will read a dictionary.
+// It will return nil if the dictionary is invalid.
+func NewDict(dict []byte) *Dict {
+	if len(dict) == 0 {
+		return nil
+	}
+	var d Dict
+	// Repeat is the first value of the dict
+	r, n := binary.Uvarint(dict)
+	if n <= 0 {
+		return nil
+	}
+	dict = dict[n:]
+	d.dict = dict
+	if cap(d.dict) < len(d.dict)+16 {
+		d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
+	}
+	if len(dict) < MinDictSize || len(dict) > MaxDictSize {
+		return nil
+	}
+	d.repeat = int(r)
+	if d.repeat > len(dict) {
+		return nil
+	}
+	return &d
+}
+
+// Bytes will return a serialized version of the dictionary.
+// The output can be sent to NewDict.
+func (d *Dict) Bytes() []byte {
+	dst := make([]byte, binary.MaxVarintLen16+len(d.dict))
+	return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...)
+}
+
+// MakeDict will create a dictionary.
+// 'data' must be at least MinDictSize.
+// If data is longer than MaxDictSize only the last MaxDictSize bytes will be used.
+// If searchStart is set the start repeat value will be set to the last
+// match of this content.
+// If no matches are found, it will attempt to find shorter matches.
+// This content should match the typical start of a block.
+// If at least 4 bytes cannot be matched, repeat is set to start of block.
+func MakeDict(data []byte, searchStart []byte) *Dict {
+	if len(data) == 0 {
+		return nil
+	}
+	if len(data) > MaxDictSize {
+		data = data[len(data)-MaxDictSize:]
+	}
+	var d Dict
+	dict := data
+	d.dict = dict
+	if cap(d.dict) < len(d.dict)+16 {
+		d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
+	}
+	if len(dict) < MinDictSize {
+		return nil
+	}
+
+	// Find the longest match possible, last entry if multiple.
+	for s := len(searchStart); s > 4; s-- {
+		if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 {
+			d.repeat = idx
+			break
+		}
+	}
+
+	return &d
+}
+
+// Encode returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// The blocks will require the same amount of memory to decode as encoding,
+// and does not make for concurrent decoding.
+// Also note that blocks do not contain CRC information, so corruption may be undetected.
+//
+// If you need to encode larger amounts of data, consider using
+// the streaming interface which gives all of these features.
+func (d *Dict) Encode(dst, src []byte) []byte {
+	if n := MaxEncodedLen(len(src)); n < 0 {
+		panic(ErrTooLarge)
+	} else if cap(dst) < n {
+		dst = make([]byte, n)
+	} else {
+		dst = dst[:n]
+	}
+
+	// The block starts with the varint-encoded length of the decompressed bytes.
+	dstP := binary.PutUvarint(dst, uint64(len(src)))
+
+	if len(src) == 0 {
+		return dst[:dstP]
+	}
+	if len(src) < minNonLiteralBlockSize {
+		dstP += emitLiteral(dst[dstP:], src)
+		return dst[:dstP]
+	}
+	n := encodeBlockDictGo(dst[dstP:], src, d)
+	if n > 0 {
+		dstP += n
+		return dst[:dstP]
+	}
+	// Not compressible
+	dstP += emitLiteral(dst[dstP:], src)
+	return dst[:dstP]
+}
+
+// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// EncodeBetter compresses better than Encode but typically with a
+// 10-40% speed decrease on both compression and decompression.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// The blocks will require the same amount of memory to decode as encoding,
+// and does not make for concurrent decoding.
+// Also note that blocks do not contain CRC information, so corruption may be undetected.
+//
+// If you need to encode larger amounts of data, consider using
+// the streaming interface which gives all of these features.
+func (d *Dict) EncodeBetter(dst, src []byte) []byte {
+	if n := MaxEncodedLen(len(src)); n < 0 {
+		panic(ErrTooLarge)
+	} else if len(dst) < n {
+		dst = make([]byte, n)
+	}
+
+	// The block starts with the varint-encoded length of the decompressed bytes.
+	dstP := binary.PutUvarint(dst, uint64(len(src)))
+
+	if len(src) == 0 {
+		return dst[:dstP]
+	}
+	if len(src) < minNonLiteralBlockSize {
+		dstP += emitLiteral(dst[dstP:], src)
+		return dst[:dstP]
+	}
+	n := encodeBlockBetterDict(dst[dstP:], src, d)
+	if n > 0 {
+		dstP += n
+		return dst[:dstP]
+	}
+	// Not compressible
+	dstP += emitLiteral(dst[dstP:], src)
+	return dst[:dstP]
+}
+
+// EncodeBest returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// EncodeBest compresses as good as reasonably possible but with a
+// big speed decrease.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// The blocks will require the same amount of memory to decode as encoding,
+// and does not make for concurrent decoding.
+// Also note that blocks do not contain CRC information, so corruption may be undetected.
+//
+// If you need to encode larger amounts of data, consider using
+// the streaming interface which gives all of these features.
+func (d *Dict) EncodeBest(dst, src []byte) []byte {
+	if n := MaxEncodedLen(len(src)); n < 0 {
+		panic(ErrTooLarge)
+	} else if len(dst) < n {
+		dst = make([]byte, n)
+	}
+
+	// The block starts with the varint-encoded length of the decompressed bytes.
+	dstP := binary.PutUvarint(dst, uint64(len(src)))
+
+	if len(src) == 0 {
+		return dst[:dstP]
+	}
+	if len(src) < minNonLiteralBlockSize {
+		dstP += emitLiteral(dst[dstP:], src)
+		return dst[:dstP]
+	}
+	n := encodeBlockBest(dst[dstP:], src, d)
+	if n > 0 {
+		dstP += n
+		return dst[:dstP]
+	}
+	// Not compressible
+	dstP += emitLiteral(dst[dstP:], src)
+	return dst[:dstP]
+}
+
+// Decode returns the decoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire decoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func (d *Dict) Decode(dst, src []byte) ([]byte, error) {
+	dLen, s, err := decodedLen(src)
+	if err != nil {
+		return nil, err
+	}
+	if dLen <= cap(dst) {
+		dst = dst[:dLen]
+	} else {
+		dst = make([]byte, dLen)
+	}
+	if s2DecodeDict(dst, src[s:], d) != 0 {
+		return nil, ErrCorrupt
+	}
+	return dst, nil
+}
+
+func (d *Dict) initFast() {
+	d.fast.Do(func() {
+		const (
+			tableBits    = 14
+			maxTableSize = 1 << tableBits
+		)
+
+		var table [maxTableSize]uint16
+		// We stop so any entry of length 8 can always be read.
+		for i := 0; i < len(d.dict)-8-2; i += 3 {
+			x0 := load64(d.dict, i)
+			h0 := hash6(x0, tableBits)
+			h1 := hash6(x0>>8, tableBits)
+			h2 := hash6(x0>>16, tableBits)
+			table[h0] = uint16(i)
+			table[h1] = uint16(i + 1)
+			table[h2] = uint16(i + 2)
+		}
+		d.fastTable = &table
+	})
+}
+
+func (d *Dict) initBetter() {
+	d.better.Do(func() {
+		const (
+			// Long hash matches.
+			lTableBits    = 17
+			maxLTableSize = 1 << lTableBits
+
+			// Short hash matches.
+			sTableBits    = 14
+			maxSTableSize = 1 << sTableBits
+		)
+
+		var lTable [maxLTableSize]uint16
+		var sTable [maxSTableSize]uint16
+
+		// We stop so any entry of length 8 can always be read.
+		for i := 0; i < len(d.dict)-8; i++ {
+			cv := load64(d.dict, i)
+			lTable[hash7(cv, lTableBits)] = uint16(i)
+			sTable[hash4(cv, sTableBits)] = uint16(i)
+		}
+		d.betterTableShort = &sTable
+		d.betterTableLong = &lTable
+	})
+}
+
+func (d *Dict) initBest() {
+	d.best.Do(func() {
+		const (
+			// Long hash matches.
+			lTableBits    = 19
+			maxLTableSize = 1 << lTableBits
+
+			// Short hash matches.
+			sTableBits    = 16
+			maxSTableSize = 1 << sTableBits
+		)
+
+		var lTable [maxLTableSize]uint32
+		var sTable [maxSTableSize]uint32
+
+		// We stop so any entry of length 8 can always be read.
+		for i := 0; i < len(d.dict)-8; i++ {
+			cv := load64(d.dict, i)
+			hashL := hash8(cv, lTableBits)
+			hashS := hash4(cv, sTableBits)
+			candidateL := lTable[hashL]
+			candidateS := sTable[hashS]
+			lTable[hashL] = uint32(i) | candidateL<<16
+			sTable[hashS] = uint32(i) | candidateS<<16
+		}
+		d.bestTableShort = &sTable
+		d.bestTableLong = &lTable
+	})
+}
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ b/vendor/github.com/klauspost/compress/s2/encode.go
@ -58,6 +58,32 @@ func Encode(dst, src []byte) []byte {
 	return dst[:d]
 }

+// EstimateBlockSize will perform a very fast compression
+// without outputting the result and return the compressed output size.
+// The function returns -1 if no improvement could be achieved.
+// Using actual compression will most often produce better compression than the estimate.
+func EstimateBlockSize(src []byte) (d int) {
+	if len(src) < 6 || int64(len(src)) > 0xffffffff {
+		return -1
+	}
+	if len(src) <= 1024 {
+		d = calcBlockSizeSmall(src)
+	} else {
+		d = calcBlockSize(src)
+	}
+
+	if d == 0 {
+		return -1
+	}
+	// Size of the varint encoded block size.
+	d += (bits.Len64(uint64(len(src))) + 7) / 7
+
+	if d >= len(src) {
+		return -1
+	}
+	return d
+}
+
 // EncodeBetter returns the encoded form of src. The returned slice may be a sub-
 // slice of dst if dst was large enough to hold the entire encoded block.
 // Otherwise, a newly allocated slice will be returned.
@ -132,7 +158,7 @@ func EncodeBest(dst, src []byte) []byte {
 		d += emitLiteral(dst[d:], src)
 		return dst[:d]
 	}
-	n := encodeBlockBest(dst[d:], src)
+	n := encodeBlockBest(dst[d:], src, nil)
 	if n > 0 {
 		d += n
 		return dst[:d]
@ -408,6 +434,7 @@ type Writer struct {
 	randSrc   io.Reader
 	writerWg  sync.WaitGroup
 	index     Index
+	customEnc func(dst, src []byte) int

 	// wroteStreamHeader is whether we have written the stream header.
 	wroteStreamHeader bool
@ -773,6 +800,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
 }

 func (w *Writer) encodeBlock(obuf, uncompressed []byte) int {
+	if w.customEnc != nil {
+		return w.customEnc(obuf, uncompressed)
+	}
 	if w.snappy {
 		switch w.level {
 		case levelFast:
@ -790,7 +820,7 @@ func (w *Writer) encodeBlock(obuf, uncompressed []byte) int {
 	case levelBetter:
 		return encodeBlockBetter(obuf, uncompressed)
 	case levelBest:
-		return encodeBlockBest(obuf, uncompressed)
+		return encodeBlockBest(obuf, uncompressed, nil)
 	}
 	return 0
 }
@ -1339,3 +1369,15 @@ func WriterFlushOnWrite() WriterOption {
 		return nil
 	}
 }
+
+// WriterCustomEncoder allows to override the encoder for blocks on the stream.
+// The function must compress 'src' into 'dst' and return the bytes used in dst as an integer.
+// Block size (initial varint) should not be added by the encoder.
+// Returning value 0 indicates the block could not be compressed.
+// The function should expect to be called concurrently.
+func WriterCustomEncoder(fn func(dst, src []byte) int) WriterOption {
+	return func(w *Writer) error {
+		w.customEnc = fn
+		return nil
+	}
+}
--- a/vendor/github.com/klauspost/compress/s2/encode_all.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_all.go
@ -8,6 +8,7 @@ package s2
 import (
 	"bytes"
 	"encoding/binary"
+	"fmt"
 	"math/bits"
 )

@ -455,3 +456,594 @@ emitRemainder:
 	}
 	return d
 }
+
+// encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
+// assumes that the varint-encoded length of the decompressed bytes has already
+// been written.
+//
+// It also assumes that:
+//
+//	len(dst) >= MaxEncodedLen(len(src)) &&
+//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
+	// Initialize the hash table.
+	const (
+		tableBits    = 14
+		maxTableSize = 1 << tableBits
+		maxAhead     = 8 // maximum bytes ahead without checking sLimit
+
+		debug = false
+	)
+	dict.initFast()
+
+	var table [maxTableSize]uint32
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := len(src) - inputMargin
+	if sLimit > MaxDictSrcOffset-maxAhead {
+		sLimit = MaxDictSrcOffset - maxAhead
+	}
+
+	// Bail if we can't compress to at least this.
+	dstLimit := len(src) - len(src)>>5 - 5
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form can start with a dict entry (copy or repeat).
+	s := 0
+
+	// Convert dict repeat to offset
+	repeat := len(dict.dict) - dict.repeat
+	cv := load64(src, 0)
+
+	// While in dict
+searchDict:
+	for {
+		// Next src position to check
+		nextS := s + (s-nextEmit)>>6 + 4
+		hash0 := hash6(cv, tableBits)
+		hash1 := hash6(cv>>8, tableBits)
+		if nextS > sLimit {
+			if debug {
+				fmt.Println("slimit reached", s, nextS)
+			}
+			break searchDict
+		}
+		candidateDict := int(dict.fastTable[hash0])
+		candidateDict2 := int(dict.fastTable[hash1])
+		candidate2 := int(table[hash1])
+		candidate := int(table[hash0])
+		table[hash0] = uint32(s)
+		table[hash1] = uint32(s + 1)
+		hash2 := hash6(cv>>16, tableBits)
+
+		// Check repeat at offset checkRep.
+		const checkRep = 1
+
+		if repeat > s {
+			candidate := len(dict.dict) - repeat + s
+			if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
+				// Extend back
+				base := s
+				for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
+					i--
+					base--
+				}
+				d += emitLiteral(dst[d:], src[nextEmit:base])
+				if debug && nextEmit != base {
+					fmt.Println("emitted ", base-nextEmit, "literals")
+				}
+				s += 4
+				candidate += 4
+				for candidate < len(dict.dict)-8 && s <= len(src)-8 {
+					if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
+						s += bits.TrailingZeros64(diff) >> 3
+						break
+					}
+					s += 8
+					candidate += 8
+				}
+				d += emitRepeat(dst[d:], repeat, s-base)
+				if debug {
+					fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
+				}
+				nextEmit = s
+				if s >= sLimit {
+					break searchDict
+				}
+				cv = load64(src, s)
+				continue
+			}
+		} else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
+			base := s + checkRep
+			// Extend back
+			for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
+				i--
+				base--
+			}
+			d += emitLiteral(dst[d:], src[nextEmit:base])
+			if debug && nextEmit != base {
+				fmt.Println("emitted ", base-nextEmit, "literals")
+			}
+
+			// Extend forward
+			candidate := s - repeat + 4 + checkRep
+			s += 4 + checkRep
+			for s <= sLimit {
+				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+					s += bits.TrailingZeros64(diff) >> 3
+					break
+				}
+				s += 8
+				candidate += 8
+			}
+			if debug {
+				// Validate match.
+				if s <= candidate {
+					panic("s <= candidate")
+				}
+				a := src[base:s]
+				b := src[base-repeat : base-repeat+(s-base)]
+				if !bytes.Equal(a, b) {
+					panic("mismatch")
+				}
+			}
+
+			if nextEmit > 0 {
+				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
+				d += emitRepeat(dst[d:], repeat, s-base)
+			} else {
+				// First match, cannot be repeat.
+				d += emitCopy(dst[d:], repeat, s-base)
+			}
+
+			nextEmit = s
+			if s >= sLimit {
+				break searchDict
+			}
+			if debug {
+				fmt.Println("emitted reg repeat", s-base, "s:", s)
+			}
+			cv = load64(src, s)
+			continue searchDict
+		}
+		if s == 0 {
+			cv = load64(src, nextS)
+			s = nextS
+			continue searchDict
+		}
+		// Start with table. These matches will always be closer.
+		if uint32(cv) == load32(src, candidate) {
+			goto emitMatch
+		}
+		candidate = int(table[hash2])
+		if uint32(cv>>8) == load32(src, candidate2) {
+			table[hash2] = uint32(s + 2)
+			candidate = candidate2
+			s++
+			goto emitMatch
+		}
+
+		// Check dict. Dicts have longer offsets, so we want longer matches.
+		if cv == load64(dict.dict, candidateDict) {
+			table[hash2] = uint32(s + 2)
+			goto emitDict
+		}
+
+		candidateDict = int(dict.fastTable[hash2])
+		// Check if upper 7 bytes match
+		if candidateDict2 >= 1 {
+			if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
+				table[hash2] = uint32(s + 2)
+				candidateDict = candidateDict2
+				s++
+				goto emitDict
+			}
+		}
+
+		table[hash2] = uint32(s + 2)
+		if uint32(cv>>16) == load32(src, candidate) {
+			s += 2
+			goto emitMatch
+		}
+		if candidateDict >= 2 {
+			// Check if upper 6 bytes match
+			if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
+				s += 2
+				goto emitDict
+			}
+		}
+
+		cv = load64(src, nextS)
+		s = nextS
+		continue searchDict
+
+	emitDict:
+		{
+			if debug {
+				if load32(dict.dict, candidateDict) != load32(src, s) {
+					panic("dict emit mismatch")
+				}
+			}
+			// Extend backwards.
+			// The top bytes will be rechecked to get the full match.
+			for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
+				candidateDict--
+				s--
+			}
+
+			// Bail if we exceed the maximum size.
+			if d+(s-nextEmit) > dstLimit {
+				return 0
+			}
+
+			// A 4-byte match has been found. We'll later see if more than 4 bytes
+			// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+			// them as literal bytes.
+
+			d += emitLiteral(dst[d:], src[nextEmit:s])
+			if debug && nextEmit != s {
+				fmt.Println("emitted ", s-nextEmit, "literals")
+			}
+			{
+				// Invariant: we have a 4-byte match at s, and no need to emit any
+				// literal bytes prior to s.
+				base := s
+				repeat = s + (len(dict.dict)) - candidateDict
+
+				// Extend the 4-byte match as long as possible.
+				s += 4
+				candidateDict += 4
+				for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
+					if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
+						s += bits.TrailingZeros64(diff) >> 3
+						break
+					}
+					s += 8
+					candidateDict += 8
+				}
+
+				// Matches longer than 64 are split.
+				if s <= sLimit || s-base < 8 {
+					d += emitCopy(dst[d:], repeat, s-base)
+				} else {
+					// Split to ensure we don't start a copy within next block
+					d += emitCopy(dst[d:], repeat, 4)
+					d += emitRepeat(dst[d:], repeat, s-base-4)
+				}
+				if false {
+					// Validate match.
+					if s <= candidate {
+						panic("s <= candidate")
+					}
+					a := src[base:s]
+					b := dict.dict[base-repeat : base-repeat+(s-base)]
+					if !bytes.Equal(a, b) {
+						panic("mismatch")
+					}
+				}
+				if debug {
+					fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
+				}
+				nextEmit = s
+				if s >= sLimit {
+					break searchDict
+				}
+
+				if d > dstLimit {
+					// Do we have space for more, if not bail.
+					return 0
+				}
+
+				// Index and continue loop to try new candidate.
+				x := load64(src, s-2)
+				m2Hash := hash6(x, tableBits)
+				currHash := hash6(x>>8, tableBits)
+				candidate = int(table[currHash])
+				table[m2Hash] = uint32(s - 2)
+				table[currHash] = uint32(s - 1)
+				cv = load64(src, s)
+			}
+			continue
+		}
+	emitMatch:
+
+		// Extend backwards.
+		// The top bytes will be rechecked to get the full match.
+		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
+			candidate--
+			s--
+		}
+
+		// Bail if we exceed the maximum size.
+		if d+(s-nextEmit) > dstLimit {
+			return 0
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		d += emitLiteral(dst[d:], src[nextEmit:s])
+		if debug && nextEmit != s {
+			fmt.Println("emitted ", s-nextEmit, "literals")
+		}
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+			base := s
+			repeat = base - candidate
+
+			// Extend the 4-byte match as long as possible.
+			s += 4
+			candidate += 4
+			for s <= len(src)-8 {
+				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+					s += bits.TrailingZeros64(diff) >> 3
+					break
+				}
+				s += 8
+				candidate += 8
+			}
+
+			d += emitCopy(dst[d:], repeat, s-base)
+			if debug {
+				// Validate match.
+				if s <= candidate {
+					panic("s <= candidate")
+				}
+				a := src[base:s]
+				b := src[base-repeat : base-repeat+(s-base)]
+				if !bytes.Equal(a, b) {
+					panic("mismatch")
+				}
+			}
+			if debug {
+				fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
+			}
+			nextEmit = s
+			if s >= sLimit {
+				break searchDict
+			}
+
+			if d > dstLimit {
+				// Do we have space for more, if not bail.
+				return 0
+			}
+			// Check for an immediate match, otherwise start search at s+1
+			x := load64(src, s-2)
+			m2Hash := hash6(x, tableBits)
+			currHash := hash6(x>>16, tableBits)
+			candidate = int(table[currHash])
+			table[m2Hash] = uint32(s - 2)
+			table[currHash] = uint32(s)
+			if debug && s == candidate {
+				panic("s == candidate")
+			}
+			if uint32(x>>16) != load32(src, candidate) {
+				cv = load64(src, s+1)
+				s++
+				break
+			}
+		}
+	}
+
+	// Search without dict:
+	if repeat > s {
+		repeat = 0
+	}
+
+	// No more dict
+	sLimit = len(src) - inputMargin
+	if s >= sLimit {
+		goto emitRemainder
+	}
+	if debug {
+		fmt.Println("non-dict matching at", s, "repeat:", repeat)
+	}
+	cv = load64(src, s)
+	if debug {
+		fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
+	}
+	for {
+		candidate := 0
+		for {
+			// Next src position to check
+			nextS := s + (s-nextEmit)>>6 + 4
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			hash0 := hash6(cv, tableBits)
+			hash1 := hash6(cv>>8, tableBits)
+			candidate = int(table[hash0])
+			candidate2 := int(table[hash1])
+			table[hash0] = uint32(s)
+			table[hash1] = uint32(s + 1)
+			hash2 := hash6(cv>>16, tableBits)
+
+			// Check repeat at offset checkRep.
+			const checkRep = 1
+			if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
+				base := s + checkRep
+				// Extend back
+				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
+					i--
+					base--
+				}
+				d += emitLiteral(dst[d:], src[nextEmit:base])
+				if debug && nextEmit != base {
+					fmt.Println("emitted ", base-nextEmit, "literals")
+				}
+				// Extend forward
+				candidate := s - repeat + 4 + checkRep
+				s += 4 + checkRep
+				for s <= sLimit {
+					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+						s += bits.TrailingZeros64(diff) >> 3
+						break
+					}
+					s += 8
+					candidate += 8
+				}
+				if debug {
+					// Validate match.
+					if s <= candidate {
+						panic("s <= candidate")
+					}
+					a := src[base:s]
+					b := src[base-repeat : base-repeat+(s-base)]
+					if !bytes.Equal(a, b) {
+						panic("mismatch")
+					}
+				}
+				if nextEmit > 0 {
+					// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
+					d += emitRepeat(dst[d:], repeat, s-base)
+				} else {
+					// First match, cannot be repeat.
+					d += emitCopy(dst[d:], repeat, s-base)
+				}
+				if debug {
+					fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
+				}
+				nextEmit = s
+				if s >= sLimit {
+					goto emitRemainder
+				}
+
+				cv = load64(src, s)
+				continue
+			}
+
+			if uint32(cv) == load32(src, candidate) {
+				break
+			}
+			candidate = int(table[hash2])
+			if uint32(cv>>8) == load32(src, candidate2) {
+				table[hash2] = uint32(s + 2)
+				candidate = candidate2
+				s++
+				break
+			}
+			table[hash2] = uint32(s + 2)
+			if uint32(cv>>16) == load32(src, candidate) {
+				s += 2
+				break
+			}
+
+			cv = load64(src, nextS)
+			s = nextS
+		}
+
+		// Extend backwards.
+		// The top bytes will be rechecked to get the full match.
+		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
+			candidate--
+			s--
+		}
+
+		// Bail if we exceed the maximum size.
+		if d+(s-nextEmit) > dstLimit {
+			return 0
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		d += emitLiteral(dst[d:], src[nextEmit:s])
+		if debug && nextEmit != s {
+			fmt.Println("emitted ", s-nextEmit, "literals")
+		}
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+			base := s
+			repeat = base - candidate
+
+			// Extend the 4-byte match as long as possible.
+			s += 4
+			candidate += 4
+			for s <= len(src)-8 {
+				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+					s += bits.TrailingZeros64(diff) >> 3
+					break
+				}
+				s += 8
+				candidate += 8
+			}
+
+			d += emitCopy(dst[d:], repeat, s-base)
+			if debug {
+				// Validate match.
+				if s <= candidate {
+					panic("s <= candidate")
+				}
+				a := src[base:s]
+				b := src[base-repeat : base-repeat+(s-base)]
+				if !bytes.Equal(a, b) {
+					panic("mismatch")
+				}
+			}
+			if debug {
+				fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
+			}
+			nextEmit = s
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			if d > dstLimit {
+				// Do we have space for more, if not bail.
+				return 0
+			}
+			// Check for an immediate match, otherwise start search at s+1
+			x := load64(src, s-2)
+			m2Hash := hash6(x, tableBits)
+			currHash := hash6(x>>16, tableBits)
+			candidate = int(table[currHash])
+			table[m2Hash] = uint32(s - 2)
+			table[currHash] = uint32(s)
+			if debug && s == candidate {
+				panic("s == candidate")
+			}
+			if uint32(x>>16) != load32(src, candidate) {
+				cv = load64(src, s+1)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		// Bail if we exceed the maximum size.
+		if d+len(src)-nextEmit > dstLimit {
+			return 0
+		}
+		d += emitLiteral(dst[d:], src[nextEmit:])
+		if debug && nextEmit != s {
+			fmt.Println("emitted ", len(src)-nextEmit, "literals")
+		}
+	}
+	return d
+}
--- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
@ -3,6 +3,8 @@

 package s2

+const hasAmd64Asm = true
+
 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
--- a/vendor/github.com/klauspost/compress/s2/encode_best.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_best.go
@ -7,6 +7,7 @@ package s2

 import (
 	"fmt"
+	"math"
 	"math/bits"
 )

@ -18,7 +19,7 @@ import (
 //
 //	len(dst) >= MaxEncodedLen(len(src)) &&
 //	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBest(dst, src []byte) (d int) {
+func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
 	// Initialize the hash tables.
 	const (
 		// Long hash matches.
@ -30,6 +31,8 @@ func encodeBlockBest(dst, src []byte) (d int) {
 		maxSTableSize = 1 << sTableBits

 		inputMargin = 8 + 2
+
+		debug = false
 	)

 	// sLimit is when to stop looking for offset/length copies. The inputMargin
@ -39,6 +42,10 @@ func encodeBlockBest(dst, src []byte) (d int) {
 	if len(src) < minNonLiteralBlockSize {
 		return 0
 	}
+	sLimitDict := len(src) - inputMargin
+	if sLimitDict > MaxDictSrcOffset-inputMargin {
+		sLimitDict = MaxDictSrcOffset - inputMargin
+	}

 	var lTable [maxLTableSize]uint64
 	var sTable [maxSTableSize]uint64
@ -52,10 +59,15 @@ func encodeBlockBest(dst, src []byte) (d int) {
 	// The encoded form must start with a literal, as there are no previous
 	// bytes to copy, so we start looking for hash matches at s == 1.
 	s := 1
+	repeat := 1
+	if dict != nil {
+		dict.initBest()
+		s = 0
+		repeat = len(dict.dict) - dict.repeat
+	}
 	cv := load64(src, s)

 	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
-	repeat := 1
 	const lowbitMask = 0xffffffff
 	getCur := func(x uint64) int {
 		return int(x & lowbitMask)
@ -71,7 +83,7 @@ func encodeBlockBest(dst, src []byte) (d int) {
 			s         int
 			length    int
 			score     int
-			rep    bool
+			rep, dict bool
 		}
 		var best match
 		for {
@ -85,6 +97,12 @@ func encodeBlockBest(dst, src []byte) (d int) {
 			if nextS > sLimit {
 				goto emitRemainder
 			}
+			if dict != nil && s >= MaxDictSrcOffset {
+				dict = nil
+				if repeat > s {
+					repeat = math.MinInt32
+				}
+			}
 			hashL := hash8(cv, lTableBits)
 			hashS := hash4(cv, sTableBits)
 			candidateL := lTable[hashL]
@ -114,7 +132,15 @@ func encodeBlockBest(dst, src []byte) (d int) {
 				}
 				m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
 				s += 4
-				for s <= sLimit {
+				for s < len(src) {
+					if len(src)-s < 8 {
+						if src[s] == src[m.length] {
+							m.length++
+							s++
+							continue
+						}
+						break
+					}
 					if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
 						m.length += bits.TrailingZeros64(diff) >> 3
 						break
@ -130,6 +156,62 @@ func encodeBlockBest(dst, src []byte) (d int) {
 				}
 				return m
 			}
+			matchDict := func(candidate, s int, first uint32, rep bool) match {
+				// Calculate offset as if in continuous array with s
+				offset := -len(dict.dict) + candidate
+				if best.length != 0 && best.s-best.offset == s-offset && !rep {
+					// Don't retest if we have the same offset.
+					return match{offset: offset, s: s}
+				}
+
+				if load32(dict.dict, candidate) != first {
+					return match{offset: offset, s: s}
+				}
+				m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
+				s += 4
+				if !rep {
+					for s < sLimitDict && m.length < len(dict.dict) {
+						if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
+							if src[s] == dict.dict[m.length] {
+								m.length++
+								s++
+								continue
+							}
+							break
+						}
+						if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
+							m.length += bits.TrailingZeros64(diff) >> 3
+							break
+						}
+						s += 8
+						m.length += 8
+					}
+				} else {
+					for s < len(src) && m.length < len(dict.dict) {
+						if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
+							if src[s] == dict.dict[m.length] {
+								m.length++
+								s++
+								continue
+							}
+							break
+						}
+						if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
+							m.length += bits.TrailingZeros64(diff) >> 3
+							break
+						}
+						s += 8
+						m.length += 8
+					}
+				}
+				m.length -= candidate
+				m.score = score(m)
+				if m.score <= -m.s {
+					// Eliminate if no savings, we might find a better one.
+					m.length = 0
+				}
+				return m
+			}

 			bestOf := func(a, b match) match {
 				if b.length == 0 {
@ -146,35 +228,82 @@ func encodeBlockBest(dst, src []byte) (d int) {
 				return b
 			}

+			if s > 0 {
 				best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
 				best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
 				best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
-
+			}
+			if dict != nil {
+				candidateL := dict.bestTableLong[hashL]
+				candidateS := dict.bestTableShort[hashS]
+				best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
+				best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
+				best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
+				best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
+			}
 			{
+				if (dict == nil || repeat <= s) && repeat > 0 {
 					best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
+				} else if s-repeat < -4 && dict != nil {
+					candidate := len(dict.dict) - (repeat - s)
+					best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
+					candidate++
+					best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
+				}
+
 				if best.length > 0 {
+					hashS := hash4(cv>>8, sTableBits)
 					// s+1
-					nextShort := sTable[hash4(cv>>8, sTableBits)]
+					nextShort := sTable[hashS]
 					s := s + 1
 					cv := load64(src, s)
-					nextLong := lTable[hash8(cv, lTableBits)]
+					hashL := hash8(cv, lTableBits)
+					nextLong := lTable[hashL]
 					best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
 					best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
 					best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
 					best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
-					// Repeat at + 2
-					best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
+
+					// Dict at + 1
+					if dict != nil {
+						candidateL := dict.bestTableLong[hashL]
+						candidateS := dict.bestTableShort[hashS]
+
+						best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
+						best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
+					}

 					// s+2
 					if true {
-						nextShort = sTable[hash4(cv>>8, sTableBits)]
+						hashS := hash4(cv>>8, sTableBits)
+
+						nextShort = sTable[hashS]
 						s++
 						cv = load64(src, s)
-						nextLong = lTable[hash8(cv, lTableBits)]
+						hashL := hash8(cv, lTableBits)
+						nextLong = lTable[hashL]
+
+						if (dict == nil || repeat <= s) && repeat > 0 {
+							// Repeat at + 2
+							best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
+						} else if repeat-s > 4 && dict != nil {
+							candidate := len(dict.dict) - (repeat - s)
+							best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
+						}
 						best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
 						best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
 						best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
 						best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
+
+						// Dict at +2
+						// Very small gain
+						if dict != nil {
+							candidateL := dict.bestTableLong[hashL]
+							candidateS := dict.bestTableShort[hashS]
+
+							best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
+							best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
+						}
 					}
 					// Search for a match at best match end, see if that is better.
 					// Allow some bytes at the beginning to mismatch.
@ -227,7 +356,7 @@ func encodeBlockBest(dst, src []byte) (d int) {

 		// Extend backwards, not needed for repeats...
 		s = best.s
-		if !best.rep {
+		if !best.rep && !best.dict {
 			for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
 				best.offset--
 				best.length++
@ -244,7 +373,6 @@ func encodeBlockBest(dst, src []byte) (d int) {

 		base := s
 		offset := s - best.offset
-
 		s += best.length

 		if offset > 65535 && s-base <= 5 && !best.rep {
@ -256,16 +384,28 @@ func encodeBlockBest(dst, src []byte) (d int) {
 			cv = load64(src, s)
 			continue
 		}
+		if debug && nextEmit != base {
+			fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
+		}
 		d += emitLiteral(dst[d:], src[nextEmit:base])
 		if best.rep {
-			if nextEmit > 0 {
+			if nextEmit > 0 || best.dict {
+				if debug {
+					fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
+				}
 				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
 				d += emitRepeat(dst[d:], offset, best.length)
 			} else {
-				// First match, cannot be repeat.
+				// First match without dict cannot be a repeat.
+				if debug {
+					fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
+				}
 				d += emitCopy(dst[d:], offset, best.length)
 			}
 		} else {
+			if debug {
+				fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
+			}
 			d += emitCopy(dst[d:], offset, best.length)
 		}
 		repeat = offset
@ -296,6 +436,9 @@ emitRemainder:
 		if d+len(src)-nextEmit > dstLimit {
 			return 0
 		}
+		if debug && nextEmit != s {
+			fmt.Println("emitted ", len(src)-nextEmit, "literals")
+		}
 		d += emitLiteral(dst[d:], src[nextEmit:])
 	}
 	return d
@ -642,7 +785,6 @@ func emitRepeatSize(offset, length int) int {
 	left := 0
 	if length > maxRepeat {
 		left = length - maxRepeat + 4
-		length = maxRepeat - 4
 	}
 	if left > 0 {
 		return 5 + emitRepeatSize(offset, left)
--- a/vendor/github.com/klauspost/compress/s2/encode_better.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_better.go
@ -6,6 +6,8 @@
 package s2

 import (
+	"bytes"
+	"fmt"
 	"math/bits"
 )

@ -476,3 +478,623 @@ emitRemainder:
 	}
 	return d
 }
+
+// encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It
+// assumes that the varint-encoded length of the decompressed bytes has already
+// been written.
+//
+// It also assumes that:
+//
+//	len(dst) >= MaxEncodedLen(len(src)) &&
+//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+func encodeBlockBetterDict(dst, src []byte, dict *Dict) (d int) {
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	// Initialize the hash tables.
+	const (
+		// Long hash matches.
+		lTableBits    = 17
+		maxLTableSize = 1 << lTableBits
+
+		// Short hash matches.
+		sTableBits    = 14
+		maxSTableSize = 1 << sTableBits
+
+		maxAhead = 8 // maximum bytes ahead without checking sLimit
+
+		debug = false
+	)
+
+	sLimit := len(src) - inputMargin
+	if sLimit > MaxDictSrcOffset-maxAhead {
+		sLimit = MaxDictSrcOffset - maxAhead
+	}
+	if len(src) < minNonLiteralBlockSize {
+		return 0
+	}
+
+	dict.initBetter()
+
+	var lTable [maxLTableSize]uint32
+	var sTable [maxSTableSize]uint32
+
+	// Bail if we can't compress to at least this.
+	dstLimit := len(src) - len(src)>>5 - 6
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form must start with a literal, as there are no previous
+	// bytes to copy, so we start looking for hash matches at s == 1.
+	s := 0
+	cv := load64(src, s)
+
+	// We initialize repeat to 0, so we never match on first attempt
+	repeat := len(dict.dict) - dict.repeat
+
+	// While in dict
+searchDict:
+	for {
+		candidateL := 0
+		nextS := 0
+		for {
+			// Next src position to check
+			nextS = s + (s-nextEmit)>>7 + 1
+			if nextS > sLimit {
+				break searchDict
+			}
+			hashL := hash7(cv, lTableBits)
+			hashS := hash4(cv, sTableBits)
+			candidateL = int(lTable[hashL])
+			candidateS := int(sTable[hashS])
+			dictL := int(dict.betterTableLong[hashL])
+			dictS := int(dict.betterTableShort[hashS])
+			lTable[hashL] = uint32(s)
+			sTable[hashS] = uint32(s)
+
+			valLong := load64(src, candidateL)
+			valShort := load64(src, candidateS)
+
+			// If long matches at least 8 bytes, use that.
+			if s != 0 {
+				if cv == valLong {
+					goto emitMatch
+				}
+				if cv == valShort {
+					candidateL = candidateS
+					goto emitMatch
+				}
+			}
+
+			// Check dict repeat.
+			if repeat >= s+4 {
+				candidate := len(dict.dict) - repeat + s
+				if candidate > 0 && uint32(cv) == load32(dict.dict, candidate) {
+					// Extend back
+					base := s
+					for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
+						i--
+						base--
+					}
+					d += emitLiteral(dst[d:], src[nextEmit:base])
+					if debug && nextEmit != base {
+						fmt.Println("emitted ", base-nextEmit, "literals")
+					}
+					s += 4
+					candidate += 4
+					for candidate < len(dict.dict)-8 && s <= len(src)-8 {
+						if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
+							s += bits.TrailingZeros64(diff) >> 3
+							break
+						}
+						s += 8
+						candidate += 8
+					}
+					d += emitRepeat(dst[d:], repeat, s-base)
+					if debug {
+						fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
+					}
+					nextEmit = s
+					if s >= sLimit {
+						break searchDict
+					}
+					cv = load64(src, s)
+					// Index in-between
+					index0 := base + 1
+					index1 := s - 2
+
+					cv = load64(src, s)
+					for index0 < index1 {
+						cv0 := load64(src, index0)
+						cv1 := load64(src, index1)
+						lTable[hash7(cv0, lTableBits)] = uint32(index0)
+						sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
+
+						lTable[hash7(cv1, lTableBits)] = uint32(index1)
+						sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
+						index0 += 2
+						index1 -= 2
+					}
+					continue
+				}
+			}
+			// Don't try to find match at s==0
+			if s == 0 {
+				cv = load64(src, nextS)
+				s = nextS
+				continue
+			}
+
+			// Long likely matches 7, so take that.
+			if uint32(cv) == uint32(valLong) {
+				goto emitMatch
+			}
+
+			// Long dict...
+			if uint32(cv) == load32(dict.dict, dictL) {
+				candidateL = dictL
+				goto emitDict
+			}
+
+			// Check our short candidate
+			if uint32(cv) == uint32(valShort) {
+				// Try a long candidate at s+1
+				hashL = hash7(cv>>8, lTableBits)
+				candidateL = int(lTable[hashL])
+				lTable[hashL] = uint32(s + 1)
+				if uint32(cv>>8) == load32(src, candidateL) {
+					s++
+					goto emitMatch
+				}
+				// Use our short candidate.
+				candidateL = candidateS
+				goto emitMatch
+			}
+			if uint32(cv) == load32(dict.dict, dictS) {
+				// Try a long candidate at s+1
+				hashL = hash7(cv>>8, lTableBits)
+				candidateL = int(lTable[hashL])
+				lTable[hashL] = uint32(s + 1)
+				if uint32(cv>>8) == load32(src, candidateL) {
+					s++
+					goto emitMatch
+				}
+				candidateL = dictS
+				goto emitDict
+			}
+			cv = load64(src, nextS)
+			s = nextS
+		}
+	emitDict:
+		{
+			if debug {
+				if load32(dict.dict, candidateL) != load32(src, s) {
+					panic("dict emit mismatch")
+				}
+			}
+			// Extend backwards.
+			// The top bytes will be rechecked to get the full match.
+			for candidateL > 0 && s > nextEmit && dict.dict[candidateL-1] == src[s-1] {
+				candidateL--
+				s--
+			}
+
+			// Bail if we exceed the maximum size.
+			if d+(s-nextEmit) > dstLimit {
+				return 0
+			}
+
+			// A 4-byte match has been found. We'll later see if more than 4 bytes
+			// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+			// them as literal bytes.
+
+			d += emitLiteral(dst[d:], src[nextEmit:s])
+			if debug && nextEmit != s {
+				fmt.Println("emitted ", s-nextEmit, "literals")
+			}
+			{
+				// Invariant: we have a 4-byte match at s, and no need to emit any
+				// literal bytes prior to s.
+				base := s
+				offset := s + (len(dict.dict)) - candidateL
+
+				// Extend the 4-byte match as long as possible.
+				s += 4
+				candidateL += 4
+				for s <= len(src)-8 && len(dict.dict)-candidateL >= 8 {
+					if diff := load64(src, s) ^ load64(dict.dict, candidateL); diff != 0 {
+						s += bits.TrailingZeros64(diff) >> 3
+						break
+					}
+					s += 8
+					candidateL += 8
+				}
+
+				if repeat == offset {
+					if debug {
+						fmt.Println("emitted dict repeat, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL)
+					}
+					d += emitRepeat(dst[d:], offset, s-base)
+				} else {
+					if debug {
+						fmt.Println("emitted dict copy, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL)
+					}
+					// Matches longer than 64 are split.
+					if s <= sLimit || s-base < 8 {
+						d += emitCopy(dst[d:], offset, s-base)
+					} else {
+						// Split to ensure we don't start a copy within next block.
+						d += emitCopy(dst[d:], offset, 4)
+						d += emitRepeat(dst[d:], offset, s-base-4)
+					}
+					repeat = offset
+				}
+				if false {
+					// Validate match.
+					if s <= candidateL {
+						panic("s <= candidate")
+					}
+					a := src[base:s]
+					b := dict.dict[base-repeat : base-repeat+(s-base)]
+					if !bytes.Equal(a, b) {
+						panic("mismatch")
+					}
+				}
+
+				nextEmit = s
+				if s >= sLimit {
+					break searchDict
+				}
+
+				if d > dstLimit {
+					// Do we have space for more, if not bail.
+					return 0
+				}
+
+				// Index short & long
+				index0 := base + 1
+				index1 := s - 2
+
+				cv0 := load64(src, index0)
+				cv1 := load64(src, index1)
+				lTable[hash7(cv0, lTableBits)] = uint32(index0)
+				sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
+
+				lTable[hash7(cv1, lTableBits)] = uint32(index1)
+				sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
+				index0 += 1
+				index1 -= 1
+				cv = load64(src, s)
+
+				// index every second long in between.
+				for index0 < index1 {
+					lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
+					lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+					index0 += 2
+					index1 -= 2
+				}
+			}
+			continue
+		}
+	emitMatch:
+
+		// Extend backwards
+		for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
+			candidateL--
+			s--
+		}
+
+		// Bail if we exceed the maximum size.
+		if d+(s-nextEmit) > dstLimit {
+			return 0
+		}
+
+		base := s
+		offset := base - candidateL
+
+		// Extend the 4-byte match as long as possible.
+		s += 4
+		candidateL += 4
+		for s < len(src) {
+			if len(src)-s < 8 {
+				if src[s] == src[candidateL] {
+					s++
+					candidateL++
+					continue
+				}
+				break
+			}
+			if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
+				s += bits.TrailingZeros64(diff) >> 3
+				break
+			}
+			s += 8
+			candidateL += 8
+		}
+
+		if offset > 65535 && s-base <= 5 && repeat != offset {
+			// Bail if the match is equal or worse to the encoding.
+			s = nextS + 1
+			if s >= sLimit {
+				goto emitRemainder
+			}
+			cv = load64(src, s)
+			continue
+		}
+
+		d += emitLiteral(dst[d:], src[nextEmit:base])
+		if debug && nextEmit != s {
+			fmt.Println("emitted ", s-nextEmit, "literals")
+		}
+		if repeat == offset {
+			if debug {
+				fmt.Println("emitted match repeat, length", s-base, "offset:", offset, "s:", s)
+			}
+			d += emitRepeat(dst[d:], offset, s-base)
+		} else {
+			if debug {
+				fmt.Println("emitted match copy, length", s-base, "offset:", offset, "s:", s)
+			}
+			d += emitCopy(dst[d:], offset, s-base)
+			repeat = offset
+		}
+
+		nextEmit = s
+		if s >= sLimit {
+			goto emitRemainder
+		}
+
+		if d > dstLimit {
+			// Do we have space for more, if not bail.
+			return 0
+		}
+
+		// Index short & long
+		index0 := base + 1
+		index1 := s - 2
+
+		cv0 := load64(src, index0)
+		cv1 := load64(src, index1)
+		lTable[hash7(cv0, lTableBits)] = uint32(index0)
+		sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
+
+		lTable[hash7(cv1, lTableBits)] = uint32(index1)
+		sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
+		index0 += 1
+		index1 -= 1
+		cv = load64(src, s)
+
+		// index every second long in between.
+		for index0 < index1 {
+			lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
+			lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+			index0 += 2
+			index1 -= 2
+		}
+	}
+
+	// Search without dict:
+	if repeat > s {
+		repeat = 0
+	}
+
+	// No more dict
+	sLimit = len(src) - inputMargin
+	if s >= sLimit {
+		goto emitRemainder
+	}
+	cv = load64(src, s)
+	if debug {
+		fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
+	}
+	for {
+		candidateL := 0
+		nextS := 0
+		for {
+			// Next src position to check
+			nextS = s + (s-nextEmit)>>7 + 1
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			hashL := hash7(cv, lTableBits)
+			hashS := hash4(cv, sTableBits)
+			candidateL = int(lTable[hashL])
+			candidateS := int(sTable[hashS])
+			lTable[hashL] = uint32(s)
+			sTable[hashS] = uint32(s)
+
+			valLong := load64(src, candidateL)
+			valShort := load64(src, candidateS)
+
+			// If long matches at least 8 bytes, use that.
+			if cv == valLong {
+				break
+			}
+			if cv == valShort {
+				candidateL = candidateS
+				break
+			}
+
+			// Check repeat at offset checkRep.
+			const checkRep = 1
+			// Minimum length of a repeat. Tested with various values.
+			// While 4-5 offers improvements in some, 6 reduces
+			// regressions significantly.
+			const wantRepeatBytes = 6
+			const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
+			if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
+				base := s + checkRep
+				// Extend back
+				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
+					i--
+					base--
+				}
+				d += emitLiteral(dst[d:], src[nextEmit:base])
+
+				// Extend forward
+				candidate := s - repeat + wantRepeatBytes + checkRep
+				s += wantRepeatBytes + checkRep
+				for s < len(src) {
+					if len(src)-s < 8 {
+						if src[s] == src[candidate] {
+							s++
+							candidate++
+							continue
+						}
+						break
+					}
+					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+						s += bits.TrailingZeros64(diff) >> 3
+						break
+					}
+					s += 8
+					candidate += 8
+				}
+				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
+				d += emitRepeat(dst[d:], repeat, s-base)
+				nextEmit = s
+				if s >= sLimit {
+					goto emitRemainder
+				}
+				// Index in-between
+				index0 := base + 1
+				index1 := s - 2
+
+				cv = load64(src, s)
+				for index0 < index1 {
+					cv0 := load64(src, index0)
+					cv1 := load64(src, index1)
+					lTable[hash7(cv0, lTableBits)] = uint32(index0)
+					sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
+
+					lTable[hash7(cv1, lTableBits)] = uint32(index1)
+					sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
+					index0 += 2
+					index1 -= 2
+				}
+
+				cv = load64(src, s)
+				continue
+			}
+
+			// Long likely matches 7, so take that.
+			if uint32(cv) == uint32(valLong) {
+				break
+			}
+
+			// Check our short candidate
+			if uint32(cv) == uint32(valShort) {
+				// Try a long candidate at s+1
+				hashL = hash7(cv>>8, lTableBits)
+				candidateL = int(lTable[hashL])
+				lTable[hashL] = uint32(s + 1)
+				if uint32(cv>>8) == load32(src, candidateL) {
+					s++
+					break
+				}
+				// Use our short candidate.
+				candidateL = candidateS
+				break
+			}
+
+			cv = load64(src, nextS)
+			s = nextS
+		}
+
+		// Extend backwards
+		for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
+			candidateL--
+			s--
+		}
+
+		// Bail if we exceed the maximum size.
+		if d+(s-nextEmit) > dstLimit {
+			return 0
+		}
+
+		base := s
+		offset := base - candidateL
+
+		// Extend the 4-byte match as long as possible.
+		s += 4
+		candidateL += 4
+		for s < len(src) {
+			if len(src)-s < 8 {
+				if src[s] == src[candidateL] {
+					s++
+					candidateL++
+					continue
+				}
+				break
+			}
+			if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
+				s += bits.TrailingZeros64(diff) >> 3
+				break
+			}
+			s += 8
+			candidateL += 8
+		}
+
+		if offset > 65535 && s-base <= 5 && repeat != offset {
+			// Bail if the match is equal or worse to the encoding.
+			s = nextS + 1
+			if s >= sLimit {
+				goto emitRemainder
+			}
+			cv = load64(src, s)
+			continue
+		}
+
+		d += emitLiteral(dst[d:], src[nextEmit:base])
+		if repeat == offset {
+			d += emitRepeat(dst[d:], offset, s-base)
+		} else {
+			d += emitCopy(dst[d:], offset, s-base)
+			repeat = offset
+		}
+
+		nextEmit = s
+		if s >= sLimit {
+			goto emitRemainder
+		}
+
+		if d > dstLimit {
+			// Do we have space for more, if not bail.
+			return 0
+		}
+
+		// Index short & long
+		index0 := base + 1
+		index1 := s - 2
+
+		cv0 := load64(src, index0)
+		cv1 := load64(src, index1)
+		lTable[hash7(cv0, lTableBits)] = uint32(index0)
+		sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
+
+		lTable[hash7(cv1, lTableBits)] = uint32(index1)
+		sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
+		index0 += 1
+		index1 -= 1
+		cv = load64(src, s)
+
+		// index every second long in between.
+		for index0 < index1 {
+			lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
+			lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+			index0 += 2
+			index1 -= 2
+		}
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		// Bail if we exceed the maximum size.
+		if d+len(src)-nextEmit > dstLimit {
+			return 0
+		}
+		d += emitLiteral(dst[d:], src[nextEmit:])
+	}
+	return d
+}
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@ -4,9 +4,12 @@
 package s2

 import (
+	"bytes"
 	"math/bits"
 )

+const hasAmd64Asm = false
+
 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
@ -312,3 +315,405 @@ func matchLen(a []byte, b []byte) int {
 	}
 	return len(a) + checked
 }
+
+func calcBlockSize(src []byte) (d int) {
+	// Initialize the hash table.
+	const (
+		tableBits    = 13
+		maxTableSize = 1 << tableBits
+	)
+
+	var table [maxTableSize]uint32
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := len(src) - inputMargin
+
+	// Bail if we can't compress to at least this.
+	dstLimit := len(src) - len(src)>>5 - 5
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form must start with a literal, as there are no previous
+	// bytes to copy, so we start looking for hash matches at s == 1.
+	s := 1
+	cv := load64(src, s)
+
+	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
+	repeat := 1
+
+	for {
+		candidate := 0
+		for {
+			// Next src position to check
+			nextS := s + (s-nextEmit)>>6 + 4
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			hash0 := hash6(cv, tableBits)
+			hash1 := hash6(cv>>8, tableBits)
+			candidate = int(table[hash0])
+			candidate2 := int(table[hash1])
+			table[hash0] = uint32(s)
+			table[hash1] = uint32(s + 1)
+			hash2 := hash6(cv>>16, tableBits)
+
+			// Check repeat at offset checkRep.
+			const checkRep = 1
+			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
+				base := s + checkRep
+				// Extend back
+				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
+					i--
+					base--
+				}
+				d += emitLiteralSize(src[nextEmit:base])
+
+				// Extend forward
+				candidate := s - repeat + 4 + checkRep
+				s += 4 + checkRep
+				for s <= sLimit {
+					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+						s += bits.TrailingZeros64(diff) >> 3
+						break
+					}
+					s += 8
+					candidate += 8
+				}
+
+				d += emitCopyNoRepeatSize(repeat, s-base)
+				nextEmit = s
+				if s >= sLimit {
+					goto emitRemainder
+				}
+
+				cv = load64(src, s)
+				continue
+			}
+
+			if uint32(cv) == load32(src, candidate) {
+				break
+			}
+			candidate = int(table[hash2])
+			if uint32(cv>>8) == load32(src, candidate2) {
+				table[hash2] = uint32(s + 2)
+				candidate = candidate2
+				s++
+				break
+			}
+			table[hash2] = uint32(s + 2)
+			if uint32(cv>>16) == load32(src, candidate) {
+				s += 2
+				break
+			}
+
+			cv = load64(src, nextS)
+			s = nextS
+		}
+
+		// Extend backwards
+		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
+			candidate--
+			s--
+		}
+
+		// Bail if we exceed the maximum size.
+		if d+(s-nextEmit) > dstLimit {
+			return 0
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		d += emitLiteralSize(src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+			base := s
+			repeat = base - candidate
+
+			// Extend the 4-byte match as long as possible.
+			s += 4
+			candidate += 4
+			for s <= len(src)-8 {
+				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+					s += bits.TrailingZeros64(diff) >> 3
+					break
+				}
+				s += 8
+				candidate += 8
+			}
+
+			d += emitCopyNoRepeatSize(repeat, s-base)
+			if false {
+				// Validate match.
+				a := src[base:s]
+				b := src[base-repeat : base-repeat+(s-base)]
+				if !bytes.Equal(a, b) {
+					panic("mismatch")
+				}
+			}
+
+			nextEmit = s
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			if d > dstLimit {
+				// Do we have space for more, if not bail.
+				return 0
+			}
+			// Check for an immediate match, otherwise start search at s+1
+			x := load64(src, s-2)
+			m2Hash := hash6(x, tableBits)
+			currHash := hash6(x>>16, tableBits)
+			candidate = int(table[currHash])
+			table[m2Hash] = uint32(s - 2)
+			table[currHash] = uint32(s)
+			if uint32(x>>16) != load32(src, candidate) {
+				cv = load64(src, s+1)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		// Bail if we exceed the maximum size.
+		if d+len(src)-nextEmit > dstLimit {
+			return 0
+		}
+		d += emitLiteralSize(src[nextEmit:])
+	}
+	return d
+}
+
+func calcBlockSizeSmall(src []byte) (d int) {
+	// Initialize the hash table.
+	const (
+		tableBits    = 9
+		maxTableSize = 1 << tableBits
+	)
+
+	var table [maxTableSize]uint32
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := len(src) - inputMargin
+
+	// Bail if we can't compress to at least this.
+	dstLimit := len(src) - len(src)>>5 - 5
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form must start with a literal, as there are no previous
+	// bytes to copy, so we start looking for hash matches at s == 1.
+	s := 1
+	cv := load64(src, s)
+
+	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
+	repeat := 1
+
+	for {
+		candidate := 0
+		for {
+			// Next src position to check
+			nextS := s + (s-nextEmit)>>6 + 4
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			hash0 := hash6(cv, tableBits)
+			hash1 := hash6(cv>>8, tableBits)
+			candidate = int(table[hash0])
+			candidate2 := int(table[hash1])
+			table[hash0] = uint32(s)
+			table[hash1] = uint32(s + 1)
+			hash2 := hash6(cv>>16, tableBits)
+
+			// Check repeat at offset checkRep.
+			const checkRep = 1
+			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
+				base := s + checkRep
+				// Extend back
+				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
+					i--
+					base--
+				}
+				d += emitLiteralSize(src[nextEmit:base])
+
+				// Extend forward
+				candidate := s - repeat + 4 + checkRep
+				s += 4 + checkRep
+				for s <= sLimit {
+					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+						s += bits.TrailingZeros64(diff) >> 3
+						break
+					}
+					s += 8
+					candidate += 8
+				}
+
+				d += emitCopyNoRepeatSize(repeat, s-base)
+				nextEmit = s
+				if s >= sLimit {
+					goto emitRemainder
+				}
+
+				cv = load64(src, s)
+				continue
+			}
+
+			if uint32(cv) == load32(src, candidate) {
+				break
+			}
+			candidate = int(table[hash2])
+			if uint32(cv>>8) == load32(src, candidate2) {
+				table[hash2] = uint32(s + 2)
+				candidate = candidate2
+				s++
+				break
+			}
+			table[hash2] = uint32(s + 2)
+			if uint32(cv>>16) == load32(src, candidate) {
+				s += 2
+				break
+			}
+
+			cv = load64(src, nextS)
+			s = nextS
+		}
+
+		// Extend backwards
+		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
+			candidate--
+			s--
+		}
+
+		// Bail if we exceed the maximum size.
+		if d+(s-nextEmit) > dstLimit {
+			return 0
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+
+		d += emitLiteralSize(src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+			base := s
+			repeat = base - candidate
+
+			// Extend the 4-byte match as long as possible.
+			s += 4
+			candidate += 4
+			for s <= len(src)-8 {
+				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
+					s += bits.TrailingZeros64(diff) >> 3
+					break
+				}
+				s += 8
+				candidate += 8
+			}
+
+			d += emitCopyNoRepeatSize(repeat, s-base)
+			if false {
+				// Validate match.
+				a := src[base:s]
+				b := src[base-repeat : base-repeat+(s-base)]
+				if !bytes.Equal(a, b) {
+					panic("mismatch")
+				}
+			}
+
+			nextEmit = s
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			if d > dstLimit {
+				// Do we have space for more, if not bail.
+				return 0
+			}
+			// Check for an immediate match, otherwise start search at s+1
+			x := load64(src, s-2)
+			m2Hash := hash6(x, tableBits)
+			currHash := hash6(x>>16, tableBits)
+			candidate = int(table[currHash])
+			table[m2Hash] = uint32(s - 2)
+			table[currHash] = uint32(s)
+			if uint32(x>>16) != load32(src, candidate) {
+				cv = load64(src, s+1)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		// Bail if we exceed the maximum size.
+		if d+len(src)-nextEmit > dstLimit {
+			return 0
+		}
+		d += emitLiteralSize(src[nextEmit:])
+	}
+	return d
+}
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+//
+// It assumes that:
+//
+//	dst is long enough to hold the encoded bytes
+//	0 <= len(lit) && len(lit) <= math.MaxUint32
+func emitLiteralSize(lit []byte) int {
+	if len(lit) == 0 {
+		return 0
+	}
+	switch {
+	case len(lit) <= 60:
+		return len(lit) + 1
+	case len(lit) <= 1<<8:
+		return len(lit) + 2
+	case len(lit) <= 1<<16:
+		return len(lit) + 3
+	case len(lit) <= 1<<24:
+		return len(lit) + 4
+	default:
+		return len(lit) + 5
+	}
+}
+
+func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
+	panic("cvtLZ4BlockAsm should be unreachable")
+}
+
+func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
+	panic("cvtLZ4BlockSnappyAsm should be unreachable")
+}
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
@ -146,6 +146,20 @@ func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
 //go:noescape
 func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int

+// calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst.
+// Maximum input 4294967295 bytes.
+// It assumes that the varint-encoded length of the decompressed bytes has already been written.
+//
+//go:noescape
+func calcBlockSize(src []byte) int
+
+// calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst.
+// Maximum input 1024 bytes.
+// It assumes that the varint-encoded length of the decompressed bytes has already been written.
+//
+//go:noescape
+func calcBlockSizeSmall(src []byte) int
+
 // emitLiteral writes a literal chunk and returns the number of bytes written.
 //
 // It assumes that:
@ -192,3 +206,13 @@ func emitCopyNoRepeat(dst []byte, offset int, length int) int
 //
 //go:noescape
 func matchLen(a []byte, b []byte) int
+
+// cvtLZ4Block converts an LZ4 block to S2
+//
+//go:noescape
+func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
+
+// cvtLZ4Block converts an LZ4 block to S2
+//
+//go:noescape
+func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
--- a/vendor/github.com/klauspost/compress/s2/lz4convert.go
+++ b/vendor/github.com/klauspost/compress/s2/lz4convert.go
@ -0,0 +1,585 @@
+// Copyright (c) 2022 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package s2
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+)
+
+// LZ4Converter provides conversion from LZ4 blocks as defined here:
+// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
+type LZ4Converter struct {
+}
+
+// ErrDstTooSmall is returned when provided destination is too small.
+var ErrDstTooSmall = errors.New("s2: destination too small")
+
+// ConvertBlock will convert an LZ4 block and append it as an S2
+// block without block length to dst.
+// The uncompressed size is returned as well.
+// dst must have capacity to contain the entire compressed block.
+func (l *LZ4Converter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
+	if len(src) == 0 {
+		return dst, 0, nil
+	}
+	const debug = false
+	const inline = true
+	const lz4MinMatch = 4
+
+	s, d := 0, len(dst)
+	dst = dst[:cap(dst)]
+	if !debug && hasAmd64Asm {
+		res, sz := cvtLZ4BlockAsm(dst[d:], src)
+		if res < 0 {
+			const (
+				errCorrupt     = -1
+				errDstTooSmall = -2
+			)
+			switch res {
+			case errCorrupt:
+				return nil, 0, ErrCorrupt
+			case errDstTooSmall:
+				return nil, 0, ErrDstTooSmall
+			default:
+				return nil, 0, fmt.Errorf("unexpected result: %d", res)
+			}
+		}
+		if d+sz > len(dst) {
+			return nil, 0, ErrDstTooSmall
+		}
+		return dst[:d+sz], res, nil
+	}
+
+	dLimit := len(dst) - 10
+	var lastOffset uint16
+	var uncompressed int
+	if debug {
+		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
+	}
+
+	for {
+		if s >= len(src) {
+			return dst[:d], 0, ErrCorrupt
+		}
+		// Read literal info
+		token := src[s]
+		ll := int(token >> 4)
+		ml := int(lz4MinMatch + (token & 0xf))
+
+		// If upper nibble is 15, literal length is extended
+		if token >= 0xf0 {
+			for {
+				s++
+				if s >= len(src) {
+					if debug {
+						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
+					}
+					return dst[:d], 0, ErrCorrupt
+				}
+				val := src[s]
+				ll += int(val)
+				if val != 255 {
+					break
+				}
+			}
+		}
+		// Skip past token
+		if s+ll >= len(src) {
+			if debug {
+				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
+			}
+			return nil, 0, ErrCorrupt
+		}
+		s++
+		if ll > 0 {
+			if d+ll > dLimit {
+				return nil, 0, ErrDstTooSmall
+			}
+			if debug {
+				fmt.Printf("emit %d literals\n", ll)
+			}
+			d += emitLiteralGo(dst[d:], src[s:s+ll])
+			s += ll
+			uncompressed += ll
+		}
+
+		// Check if we are done...
+		if s == len(src) && ml == lz4MinMatch {
+			break
+		}
+		// 2 byte offset
+		if s >= len(src)-2 {
+			if debug {
+				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
+			}
+			return nil, 0, ErrCorrupt
+		}
+		offset := binary.LittleEndian.Uint16(src[s:])
+		s += 2
+		if offset == 0 {
+			if debug {
+				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
+			}
+			return nil, 0, ErrCorrupt
+		}
+		if int(offset) > uncompressed {
+			if debug {
+				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
+			}
+			return nil, 0, ErrCorrupt
+		}
+
+		if ml == lz4MinMatch+15 {
+			for {
+				if s >= len(src) {
+					if debug {
+						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
+					}
+					return nil, 0, ErrCorrupt
+				}
+				val := src[s]
+				s++
+				ml += int(val)
+				if val != 255 {
+					if s >= len(src) {
+						if debug {
+							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
+						}
+						return nil, 0, ErrCorrupt
+					}
+					break
+				}
+			}
+		}
+		if offset == lastOffset {
+			if debug {
+				fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
+			}
+			if !inline {
+				d += emitRepeat16(dst[d:], offset, ml)
+			} else {
+				length := ml
+				dst := dst[d:]
+				for len(dst) > 5 {
+					// Repeat offset, make length cheaper
+					length -= 4
+					if length <= 4 {
+						dst[0] = uint8(length)<<2 | tagCopy1
+						dst[1] = 0
+						d += 2
+						break
+					}
+					if length < 8 && offset < 2048 {
+						// Encode WITH offset
+						dst[1] = uint8(offset)
+						dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
+						d += 2
+						break
+					}
+					if length < (1<<8)+4 {
+						length -= 4
+						dst[2] = uint8(length)
+						dst[1] = 0
+						dst[0] = 5<<2 | tagCopy1
+						d += 3
+						break
+					}
+					if length < (1<<16)+(1<<8) {
+						length -= 1 << 8
+						dst[3] = uint8(length >> 8)
+						dst[2] = uint8(length >> 0)
+						dst[1] = 0
+						dst[0] = 6<<2 | tagCopy1
+						d += 4
+						break
+					}
+					const maxRepeat = (1 << 24) - 1
+					length -= 1 << 16
+					left := 0
+					if length > maxRepeat {
+						left = length - maxRepeat + 4
+						length = maxRepeat - 4
+					}
+					dst[4] = uint8(length >> 16)
+					dst[3] = uint8(length >> 8)
+					dst[2] = uint8(length >> 0)
+					dst[1] = 0
+					dst[0] = 7<<2 | tagCopy1
+					if left > 0 {
+						d += 5 + emitRepeat16(dst[5:], offset, left)
+						break
+					}
+					d += 5
+					break
+				}
+			}
+		} else {
+			if debug {
+				fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
+			}
+			if !inline {
+				d += emitCopy16(dst[d:], offset, ml)
+			} else {
+				length := ml
+				dst := dst[d:]
+				for len(dst) > 5 {
+					// Offset no more than 2 bytes.
+					if length > 64 {
+						off := 3
+						if offset < 2048 {
+							// emit 8 bytes as tagCopy1, rest as repeats.
+							dst[1] = uint8(offset)
+							dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
+							length -= 8
+							off = 2
+						} else {
+							// Emit a length 60 copy, encoded as 3 bytes.
+							// Emit remaining as repeat value (minimum 4 bytes).
+							dst[2] = uint8(offset >> 8)
+							dst[1] = uint8(offset)
+							dst[0] = 59<<2 | tagCopy2
+							length -= 60
+						}
+						// Emit remaining as repeats, at least 4 bytes remain.
+						d += off + emitRepeat16(dst[off:], offset, length)
+						break
+					}
+					if length >= 12 || offset >= 2048 {
+						// Emit the remaining copy, encoded as 3 bytes.
+						dst[2] = uint8(offset >> 8)
+						dst[1] = uint8(offset)
+						dst[0] = uint8(length-1)<<2 | tagCopy2
+						d += 3
+						break
+					}
+					// Emit the remaining copy, encoded as 2 bytes.
+					dst[1] = uint8(offset)
+					dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+					d += 2
+					break
+				}
+			}
+			lastOffset = offset
+		}
+		uncompressed += ml
+		if d > dLimit {
+			return nil, 0, ErrDstTooSmall
+		}
+	}
+
+	return dst[:d], uncompressed, nil
+}
+
+// ConvertBlockSnappy will convert an LZ4 block and append it
+// as a Snappy block without block length to dst.
+// The uncompressed size is returned as well.
+// dst must have capacity to contain the entire compressed block.
+func (l *LZ4Converter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
+	if len(src) == 0 {
+		return dst, 0, nil
+	}
+	const debug = false
+	const lz4MinMatch = 4
+
+	s, d := 0, len(dst)
+	dst = dst[:cap(dst)]
+	// Use assembly when possible
+	if !debug && hasAmd64Asm {
+		res, sz := cvtLZ4BlockSnappyAsm(dst[d:], src)
+		if res < 0 {
+			const (
+				errCorrupt     = -1
+				errDstTooSmall = -2
+			)
+			switch res {
+			case errCorrupt:
+				return nil, 0, ErrCorrupt
+			case errDstTooSmall:
+				return nil, 0, ErrDstTooSmall
+			default:
+				return nil, 0, fmt.Errorf("unexpected result: %d", res)
+			}
+		}
+		if d+sz > len(dst) {
+			return nil, 0, ErrDstTooSmall
+		}
+		return dst[:d+sz], res, nil
+	}
+
+	dLimit := len(dst) - 10
+	var uncompressed int
+	if debug {
+		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
+	}
+
+	for {
+		if s >= len(src) {
+			return nil, 0, ErrCorrupt
+		}
+		// Read literal info
+		token := src[s]
+		ll := int(token >> 4)
+		ml := int(lz4MinMatch + (token & 0xf))
+
+		// If upper nibble is 15, literal length is extended
+		if token >= 0xf0 {
+			for {
+				s++
+				if s >= len(src) {
+					if debug {
+						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
+					}
+					return nil, 0, ErrCorrupt
+				}
+				val := src[s]
+				ll += int(val)
+				if val != 255 {
+					break
+				}
+			}
+		}
+		// Skip past token
+		if s+ll >= len(src) {
+			if debug {
+				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
+			}
+			return nil, 0, ErrCorrupt
+		}
+		s++
+		if ll > 0 {
+			if d+ll > dLimit {
+				return nil, 0, ErrDstTooSmall
+			}
+			if debug {
+				fmt.Printf("emit %d literals\n", ll)
+			}
+			d += emitLiteralGo(dst[d:], src[s:s+ll])
+			s += ll
+			uncompressed += ll
+		}
+
+		// Check if we are done...
+		if s == len(src) && ml == lz4MinMatch {
+			break
+		}
+		// 2 byte offset
+		if s >= len(src)-2 {
+			if debug {
+				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
+			}
+			return nil, 0, ErrCorrupt
+		}
+		offset := binary.LittleEndian.Uint16(src[s:])
+		s += 2
+		if offset == 0 {
+			if debug {
+				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
+			}
+			return nil, 0, ErrCorrupt
+		}
+		if int(offset) > uncompressed {
+			if debug {
+				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
+			}
+			return nil, 0, ErrCorrupt
+		}
+
+		if ml == lz4MinMatch+15 {
+			for {
+				if s >= len(src) {
+					if debug {
+						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
+					}
+					return nil, 0, ErrCorrupt
+				}
+				val := src[s]
+				s++
+				ml += int(val)
+				if val != 255 {
+					if s >= len(src) {
+						if debug {
+							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
+						}
+						return nil, 0, ErrCorrupt
+					}
+					break
+				}
+			}
+		}
+		if debug {
+			fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
+		}
+		length := ml
+		// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
+		for length > 0 {
+			if d >= dLimit {
+				return nil, 0, ErrDstTooSmall
+			}
+
+			// Offset no more than 2 bytes.
+			if length > 64 {
+				// Emit a length 64 copy, encoded as 3 bytes.
+				dst[d+2] = uint8(offset >> 8)
+				dst[d+1] = uint8(offset)
+				dst[d+0] = 63<<2 | tagCopy2
+				length -= 64
+				d += 3
+				continue
+			}
+			if length >= 12 || offset >= 2048 || length < 4 {
+				// Emit the remaining copy, encoded as 3 bytes.
+				dst[d+2] = uint8(offset >> 8)
+				dst[d+1] = uint8(offset)
+				dst[d+0] = uint8(length-1)<<2 | tagCopy2
+				d += 3
+				break
+			}
+			// Emit the remaining copy, encoded as 2 bytes.
+			dst[d+1] = uint8(offset)
+			dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+			d += 2
+			break
+		}
+		uncompressed += ml
+		if d > dLimit {
+			return nil, 0, ErrDstTooSmall
+		}
+	}
+
+	return dst[:d], uncompressed, nil
+}
+
+// emitRepeat writes a repeat chunk and returns the number of bytes written.
+// Length must be at least 4 and < 1<<24
+func emitRepeat16(dst []byte, offset uint16, length int) int {
+	// Repeat offset, make length cheaper
+	length -= 4
+	if length <= 4 {
+		dst[0] = uint8(length)<<2 | tagCopy1
+		dst[1] = 0
+		return 2
+	}
+	if length < 8 && offset < 2048 {
+		// Encode WITH offset
+		dst[1] = uint8(offset)
+		dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
+		return 2
+	}
+	if length < (1<<8)+4 {
+		length -= 4
+		dst[2] = uint8(length)
+		dst[1] = 0
+		dst[0] = 5<<2 | tagCopy1
+		return 3
+	}
+	if length < (1<<16)+(1<<8) {
+		length -= 1 << 8
+		dst[3] = uint8(length >> 8)
+		dst[2] = uint8(length >> 0)
+		dst[1] = 0
+		dst[0] = 6<<2 | tagCopy1
+		return 4
+	}
+	const maxRepeat = (1 << 24) - 1
+	length -= 1 << 16
+	left := 0
+	if length > maxRepeat {
+		left = length - maxRepeat + 4
+		length = maxRepeat - 4
+	}
+	dst[4] = uint8(length >> 16)
+	dst[3] = uint8(length >> 8)
+	dst[2] = uint8(length >> 0)
+	dst[1] = 0
+	dst[0] = 7<<2 | tagCopy1
+	if left > 0 {
+		return 5 + emitRepeat16(dst[5:], offset, left)
+	}
+	return 5
+}
+
+// emitCopy writes a copy chunk and returns the number of bytes written.
+//
+// It assumes that:
+//
+//	dst is long enough to hold the encoded bytes
+//	1 <= offset && offset <= math.MaxUint16
+//	4 <= length && length <= math.MaxUint32
+func emitCopy16(dst []byte, offset uint16, length int) int {
+	// Offset no more than 2 bytes.
+	if length > 64 {
+		off := 3
+		if offset < 2048 {
+			// emit 8 bytes as tagCopy1, rest as repeats.
+			dst[1] = uint8(offset)
+			dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
+			length -= 8
+			off = 2
+		} else {
+			// Emit a length 60 copy, encoded as 3 bytes.
+			// Emit remaining as repeat value (minimum 4 bytes).
+			dst[2] = uint8(offset >> 8)
+			dst[1] = uint8(offset)
+			dst[0] = 59<<2 | tagCopy2
+			length -= 60
+		}
+		// Emit remaining as repeats, at least 4 bytes remain.
+		return off + emitRepeat16(dst[off:], offset, length)
+	}
+	if length >= 12 || offset >= 2048 {
+		// Emit the remaining copy, encoded as 3 bytes.
+		dst[2] = uint8(offset >> 8)
+		dst[1] = uint8(offset)
+		dst[0] = uint8(length-1)<<2 | tagCopy2
+		return 3
+	}
+	// Emit the remaining copy, encoded as 2 bytes.
+	dst[1] = uint8(offset)
+	dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+	return 2
+}
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+//
+// It assumes that:
+//
+//	dst is long enough to hold the encoded bytes
+//	0 <= len(lit) && len(lit) <= math.MaxUint32
+func emitLiteralGo(dst, lit []byte) int {
+	if len(lit) == 0 {
+		return 0
+	}
+	i, n := 0, uint(len(lit)-1)
+	switch {
+	case n < 60:
+		dst[0] = uint8(n)<<2 | tagLiteral
+		i = 1
+	case n < 1<<8:
+		dst[1] = uint8(n)
+		dst[0] = 60<<2 | tagLiteral
+		i = 2
+	case n < 1<<16:
+		dst[2] = uint8(n >> 8)
+		dst[1] = uint8(n)
+		dst[0] = 61<<2 | tagLiteral
+		i = 3
+	case n < 1<<24:
+		dst[3] = uint8(n >> 16)
+		dst[2] = uint8(n >> 8)
+		dst[1] = uint8(n)
+		dst[0] = 62<<2 | tagLiteral
+		i = 4
+	default:
+		dst[4] = uint8(n >> 24)
+		dst[3] = uint8(n >> 16)
+		dst[2] = uint8(n >> 8)
+		dst[1] = uint8(n)
+		dst[0] = 63<<2 | tagLiteral
+		i = 5
+	}
+	return i + copy(dst[i:], lit)
+}
--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@ -32,14 +32,38 @@ func (d *dict) ID() uint32 {
 	return d.id
 }

-// DictContentSize returns the dictionary content size or 0 if d is nil.
-func (d *dict) DictContentSize() int {
+// ContentSize returns the dictionary content size or 0 if d is nil.
+func (d *dict) ContentSize() int {
 	if d == nil {
 		return 0
 	}
 	return len(d.content)
 }

+// Content returns the dictionary content.
+func (d *dict) Content() []byte {
+	if d == nil {
+		return nil
+	}
+	return d.content
+}
+
+// Offsets returns the initial offsets.
+func (d *dict) Offsets() [3]int {
+	if d == nil {
+		return [3]int{}
+	}
+	return d.offsets
+}
+
+// LitEncoder returns the literal encoder.
+func (d *dict) LitEncoder() *huff0.Scratch {
+	if d == nil {
+		return nil
+	}
+	return d.litEnc
+}
+
 // Load a dictionary as described in
 // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
 func loadDict(b []byte) (*dict, error) {
@ -64,7 +88,7 @@ func loadDict(b []byte) (*dict, error) {
 	var err error
 	d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("loading literal table: %w", err)
 	}
 	d.litEnc.Reuse = huff0.ReusePolicyMust

@ -122,3 +146,16 @@ func loadDict(b []byte) (*dict, error) {

 	return &d, nil
 }
+
+// InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
+func InspectDictionary(b []byte) (interface {
+	ID() uint32
+	ContentSize() int
+	Content() []byte
+	Offsets() [3]int
+	LitEncoder() *huff0.Scratch
+}, error) {
+	initPredefined()
+	d, err := loadDict(b)
+	return d, err
+}
--- a/vendor/github.com/klauspost/compress/zstd/enc_base.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@ -149,7 +149,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
 		if singleBlock {
 			e.lowMem = true
 		}
-		e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
+		e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
 		e.lowMem = low
 	}

--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -338,11 +338,14 @@ github.com/jpillora/backoff
 # github.com/json-iterator/go v1.1.12
 ## explicit; go 1.12
 github.com/json-iterator/go
-# github.com/klauspost/compress v1.15.15
-## explicit; go 1.17
+# github.com/klauspost/compress v1.16.0
+## explicit; go 1.18
 github.com/klauspost/compress
 github.com/klauspost/compress/flate
 github.com/klauspost/compress/fse
+github.com/klauspost/compress/gzhttp
+github.com/klauspost/compress/gzhttp/writer
+github.com/klauspost/compress/gzhttp/writer/gzkp
 github.com/klauspost/compress/gzip
 github.com/klauspost/compress/huff0
 github.com/klauspost/compress/internal/cpuinfo