lib/httpserver: use github.com/klauspost/compress/gzhttp for compressing http responses

This allows removing gzip-related code from lib/httpserver.
This commit is contained in:
Aliaksandr Valialkin 2023-02-27 10:16:58 -08:00
parent 27c9446520
commit 1a6f2f07fd
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
29 changed files with 15004 additions and 8452 deletions

2
go.mod
View file

@ -23,7 +23,7 @@ require (
github.com/golang/snappy v0.0.4
github.com/googleapis/gax-go/v2 v2.7.0
github.com/influxdata/influxdb v1.11.0
github.com/klauspost/compress v1.15.15
github.com/klauspost/compress v1.16.0
github.com/prometheus/prometheus v0.42.0
github.com/urfave/cli/v2 v2.24.4
github.com/valyala/fastjson v1.6.4

4
go.sum
View file

@ -316,8 +316,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw=
github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4=
github.com/klauspost/compress v1.16.0 h1:iULayQNOReoYUe+1qtKOqw9CwJv3aNQu8ivo7lw1HU4=
github.com/klauspost/compress v1.16.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=

View file

@ -1,7 +1,6 @@
package httpserver
import (
"bufio"
"context"
"crypto/tls"
"errors"
@ -26,7 +25,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/metrics"
"github.com/klauspost/compress/gzip"
"github.com/klauspost/compress/gzhttp"
"github.com/valyala/fastrand"
)
@ -74,9 +73,7 @@ type RequestHandler func(w http.ResponseWriter, r *http.Request) bool
// Serve starts an http server on the given addr with the given optional rh.
//
// By default all the responses are transparently compressed, since Google
// charges a lot for the egress traffic. The compression may be disabled
// by calling DisableResponseCompression before writing the first byte to w.
// By default all the responses are transparently compressed, since egress traffic is usually expensive.
//
// The compression is also disabled if -http.disableResponseCompression flag is set.
//
@ -195,17 +192,23 @@ func Stop(addr string) error {
}
func gzipHandler(s *server, rh RequestHandler) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w = maybeGzipResponseWriter(w, r)
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handlerWrapper(s, w, r, rh)
if zrw, ok := w.(*gzipResponseWriter); ok {
if err := zrw.Close(); err != nil && !netutil.IsTrivialNetworkError(err) {
logger.Warnf("gzipResponseWriter.Close: %s", err)
}
}
})
if *disableResponseCompression {
return h
}
return gzipHandlerWrapper(h)
}
var gzipHandlerWrapper = func() func(http.Handler) http.HandlerFunc {
hw, err := gzhttp.NewWrapper(gzhttp.CompressionLevel(1))
if err != nil {
panic(fmt.Errorf("BUG: cannot initialize gzip http wrapper: %s", err))
}
return hw
}()
var metricsHandlerDuration = metrics.NewHistogram(`vm_http_request_duration_seconds{path="/metrics"}`)
var connTimeoutClosedConns = metrics.NewCounter(`vm_http_conn_timeout_closed_conns_total`)
@ -326,7 +329,6 @@ func handlerWrapper(s *server, w http.ResponseWriter, r *http.Request, rh Reques
if !CheckAuthFlag(w, r, *pprofAuthKey, "pprofAuthKey") {
return
}
DisableResponseCompression(w)
pprofHandler(r.URL.Path[len("/debug/pprof/"):], w, r)
return
}
@ -374,179 +376,12 @@ func CheckBasicAuth(w http.ResponseWriter, r *http.Request) bool {
return false
}
func maybeGzipResponseWriter(w http.ResponseWriter, r *http.Request) http.ResponseWriter {
if *disableResponseCompression {
return w
}
if r.Header.Get("Connection") == "Upgrade" {
return w
}
ae := r.Header.Get("Accept-Encoding")
if ae == "" {
return w
}
ae = strings.ToLower(ae)
n := strings.Index(ae, "gzip")
if n < 0 {
// Do not apply gzip encoding to the response.
return w
}
// Apply gzip encoding to the response.
zw := getGzipWriter(w)
bw := getBufioWriter(zw)
zrw := &gzipResponseWriter{
rw: w,
zw: zw,
bw: bw,
}
return zrw
}
// DisableResponseCompression disables response compression on w.
//
// The function must be called before the first w.Write* call.
func DisableResponseCompression(w http.ResponseWriter) {
zrw, ok := w.(*gzipResponseWriter)
if !ok {
return
}
if zrw.firstWriteDone {
logger.Panicf("BUG: DisableResponseCompression must be called before sending the response")
}
zrw.disableCompression = true
}
// EnableCORS enables https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS
// on the response.
func EnableCORS(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
}
func getGzipWriter(w io.Writer) *gzip.Writer {
v := gzipWriterPool.Get()
if v == nil {
zw, err := gzip.NewWriterLevel(w, 1)
if err != nil {
logger.Panicf("BUG: cannot create gzip writer: %s", err)
}
return zw
}
zw := v.(*gzip.Writer)
zw.Reset(w)
return zw
}
func putGzipWriter(zw *gzip.Writer) {
gzipWriterPool.Put(zw)
}
var gzipWriterPool sync.Pool
type gzipResponseWriter struct {
rw http.ResponseWriter
zw *gzip.Writer
bw *bufio.Writer
statusCode int
firstWriteDone bool
disableCompression bool
}
// Implements http.ResponseWriter.Header method.
func (zrw *gzipResponseWriter) Header() http.Header {
return zrw.rw.Header()
}
// Implements http.ResponseWriter.Write method.
func (zrw *gzipResponseWriter) Write(p []byte) (int, error) {
if !zrw.firstWriteDone {
h := zrw.Header()
if zrw.statusCode == http.StatusNoContent {
zrw.disableCompression = true
}
if h.Get("Content-Encoding") != "" {
zrw.disableCompression = true
}
if !zrw.disableCompression {
h.Set("Content-Encoding", "gzip")
h.Del("Content-Length")
if h.Get("Content-Type") == "" {
// Disable auto-detection of content-type, since it
// is incorrectly detected after the compression.
h.Set("Content-Type", "text/html; charset=utf-8")
}
}
zrw.writeHeader()
zrw.firstWriteDone = true
}
if zrw.disableCompression {
return zrw.rw.Write(p)
}
return zrw.bw.Write(p)
}
// Implements http.ResponseWriter.WriteHeader method.
func (zrw *gzipResponseWriter) WriteHeader(statusCode int) {
zrw.statusCode = statusCode
}
func (zrw *gzipResponseWriter) writeHeader() {
if zrw.statusCode == 0 {
zrw.statusCode = http.StatusOK
}
zrw.rw.WriteHeader(zrw.statusCode)
}
// Implements http.Flusher
func (zrw *gzipResponseWriter) Flush() {
if !zrw.firstWriteDone {
_, _ = zrw.Write(nil)
}
if !zrw.disableCompression {
if err := zrw.bw.Flush(); err != nil && !netutil.IsTrivialNetworkError(err) {
logger.Warnf("gzipResponseWriter.Flush (buffer): %s", err)
}
if err := zrw.zw.Flush(); err != nil && !netutil.IsTrivialNetworkError(err) {
logger.Warnf("gzipResponseWriter.Flush (gzip): %s", err)
}
}
if fw, ok := zrw.rw.(http.Flusher); ok {
fw.Flush()
}
}
func (zrw *gzipResponseWriter) Close() error {
if !zrw.firstWriteDone {
_, _ = zrw.Write(nil)
}
zrw.Flush()
var err error
if !zrw.disableCompression {
err = zrw.zw.Close()
}
putGzipWriter(zrw.zw)
zrw.zw = nil
putBufioWriter(zrw.bw)
zrw.bw = nil
return err
}
func getBufioWriter(w io.Writer) *bufio.Writer {
v := bufioWriterPool.Get()
if v == nil {
return bufio.NewWriterSize(w, 16*1024)
}
bw := v.(*bufio.Writer)
bw.Reset(w)
return bw
}
func putBufioWriter(bw *bufio.Writer) {
bufioWriterPool.Put(bw)
}
var bufioWriterPool sync.Pool
func pprofHandler(profileName string, w http.ResponseWriter, r *http.Request) {
// This switch has been stolen from init func at https://golang.org/src/net/http/pprof/pprof.go
switch profileName {

View file

@ -3,7 +3,7 @@
before:
hooks:
- ./gen.sh
- go install mvdan.cc/garble@v0.7.2
- go install mvdan.cc/garble@v0.9.3
builds:
-

View file

@ -16,6 +16,12 @@ This package provides various compression algorithms.
# changelog
* Jan 21st, 2023 (v1.15.15)
* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739
* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728
* zstd: Various speed improvements by @greatroar https://github.com/klauspost/compress/pull/741 https://github.com/klauspost/compress/pull/734 https://github.com/klauspost/compress/pull/736 https://github.com/klauspost/compress/pull/744 https://github.com/klauspost/compress/pull/743 https://github.com/klauspost/compress/pull/745
* gzhttp: Add SuffixETag() and DropETag() options to prevent ETag collisions on compressed responses by @willbicks in https://github.com/klauspost/compress/pull/740
* Jan 3rd, 2023 (v1.15.14)
* flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718

201
vendor/github.com/klauspost/compress/gzhttp/LICENSE generated vendored Normal file
View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016-2017 The New York Times Company
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

222
vendor/github.com/klauspost/compress/gzhttp/README.md generated vendored Normal file
View file

@ -0,0 +1,222 @@
Gzip Middleware
===============
This Go package which wraps HTTP *server* handlers to transparently gzip the
response body, for clients which support it.
For HTTP *clients* we provide a transport wrapper that will do gzip decompression
faster than what the standard library offers.
Both the client and server wrappers are fully compatible with other servers and clients.
This package is forked from the dead [nytimes/gziphandler](https://github.com/nytimes/gziphandler)
and extends functionality for it.
## Install
```bash
go get -u github.com/klauspost/compress
```
## Documentation
[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/gzhttp.svg)](https://pkg.go.dev/github.com/klauspost/compress/gzhttp)
## Usage
There are 2 main parts, one for http servers and one for http clients.
### Client
The standard library automatically adds gzip compression to most requests
and handles decompression of the responses.
However, by wrapping the transport we are able to override this and provide
our own (faster) decompressor.
Wrapping is done on the Transport of the http client:
```Go
func ExampleTransport() {
// Get an HTTP client.
client := http.Client{
// Wrap the transport:
Transport: gzhttp.Transport(http.DefaultTransport),
}
resp, err := client.Get("https://google.com")
if err != nil {
return
}
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
fmt.Println("body:", string(body))
}
```
Speed compared to standard library `DefaultTransport` for an approximate 127KB JSON payload:
```
BenchmarkTransport
Single core:
BenchmarkTransport/gzhttp-32 1995 609791 ns/op 214.14 MB/s 10129 B/op 73 allocs/op
BenchmarkTransport/stdlib-32 1567 772161 ns/op 169.11 MB/s 53950 B/op 99 allocs/op
BenchmarkTransport/zstd-32 4579 238503 ns/op 547.51 MB/s 5775 B/op 69 allocs/op
Multi Core:
BenchmarkTransport/gzhttp-par-32 29113 36802 ns/op 3548.27 MB/s 11061 B/op 73 allocs/op
BenchmarkTransport/stdlib-par-32 16114 66442 ns/op 1965.38 MB/s 54971 B/op 99 allocs/op
BenchmarkTransport/zstd-par-32 90177 13110 ns/op 9960.83 MB/s 5361 B/op 67 allocs/op
```
This includes both serving the http request, parsing requests and decompressing.
### Server
For the simplest usage call `GzipHandler` with any handler (an object which implements the
`http.Handler` interface), and it'll return a new handler which gzips the
response. For example:
```go
package main
import (
"io"
"net/http"
"github.com/klauspost/compress/gzhttp"
)
func main() {
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
io.WriteString(w, "Hello, World")
})
http.Handle("/", gzhttp.GzipHandler(handler))
http.ListenAndServe("0.0.0.0:8000", nil)
}
```
This will wrap a handler using the default options.
To specify custom options a reusable wrapper can be created that can be used to wrap
any number of handlers.
```Go
package main
import (
"io"
"log"
"net/http"
"github.com/klauspost/compress/gzhttp"
"github.com/klauspost/compress/gzip"
)
func main() {
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
io.WriteString(w, "Hello, World")
})
// Create a reusable wrapper with custom options.
wrapper, err := gzhttp.NewWrapper(gzhttp.MinSize(2000), gzhttp.CompressionLevel(gzip.BestSpeed))
if err != nil {
log.Fatalln(err)
}
http.Handle("/", wrapper(handler))
http.ListenAndServe("0.0.0.0:8000", nil)
}
```
### Performance
Speed compared to [nytimes/gziphandler](https://github.com/nytimes/gziphandler) with default settings, 2KB, 20KB and 100KB:
```
λ benchcmp before.txt after.txt
benchmark old ns/op new ns/op delta
BenchmarkGzipHandler_S2k-32 51302 23679 -53.84%
BenchmarkGzipHandler_S20k-32 301426 156331 -48.14%
BenchmarkGzipHandler_S100k-32 1546203 818981 -47.03%
BenchmarkGzipHandler_P2k-32 3973 1522 -61.69%
BenchmarkGzipHandler_P20k-32 20319 9397 -53.75%
BenchmarkGzipHandler_P100k-32 96079 46361 -51.75%
benchmark old MB/s new MB/s speedup
BenchmarkGzipHandler_S2k-32 39.92 86.49 2.17x
BenchmarkGzipHandler_S20k-32 67.94 131.00 1.93x
BenchmarkGzipHandler_S100k-32 66.23 125.03 1.89x
BenchmarkGzipHandler_P2k-32 515.44 1345.31 2.61x
BenchmarkGzipHandler_P20k-32 1007.92 2179.47 2.16x
BenchmarkGzipHandler_P100k-32 1065.79 2208.75 2.07x
benchmark old allocs new allocs delta
BenchmarkGzipHandler_S2k-32 22 16 -27.27%
BenchmarkGzipHandler_S20k-32 25 19 -24.00%
BenchmarkGzipHandler_S100k-32 28 21 -25.00%
BenchmarkGzipHandler_P2k-32 22 16 -27.27%
BenchmarkGzipHandler_P20k-32 25 19 -24.00%
BenchmarkGzipHandler_P100k-32 27 21 -22.22%
benchmark old bytes new bytes delta
BenchmarkGzipHandler_S2k-32 8836 2980 -66.27%
BenchmarkGzipHandler_S20k-32 69034 20562 -70.21%
BenchmarkGzipHandler_S100k-32 356582 86682 -75.69%
BenchmarkGzipHandler_P2k-32 9062 2971 -67.21%
BenchmarkGzipHandler_P20k-32 67799 20051 -70.43%
BenchmarkGzipHandler_P100k-32 300972 83077 -72.40%
```
### Stateless compression
In cases where you expect to run many thousands of compressors concurrently,
but with very little activity you can use stateless compression.
This is not intended for regular web servers serving individual requests.
Use `CompressionLevel(-3)` or `CompressionLevel(gzip.StatelessCompression)` to enable.
Consider adding a [`bufio.Writer`](https://golang.org/pkg/bufio/#NewWriterSize) with a small buffer.
See [more details on stateless compression](https://github.com/klauspost/compress#stateless-compression).
### Migrating from gziphandler
This package removes some of the extra constructors.
When replacing, this can be used to find a replacement.
* `GzipHandler(h)` -> `GzipHandler(h)` (keep as-is)
* `GzipHandlerWithOpts(opts...)` -> `NewWrapper(opts...)`
* `MustNewGzipLevelHandler(n)` -> `NewWrapper(CompressionLevel(n))`
* `NewGzipLevelAndMinSize(n, s)` -> `NewWrapper(CompressionLevel(n), MinSize(s))`
By default, some mime types will now be excluded.
To re-enable compression of all types, use the `ContentTypeFilter(gzhttp.CompressAllContentTypeFilter)` option.
### Range Requests
Ranged requests are not well supported with compression.
Therefore any request with a "Content-Range" header is not compressed.
To signify that range requests are not supported any "Accept-Ranges" header set is removed when data is compressed.
If you do not want this behavior use the `KeepAcceptRanges()` option.
### Flushing data
The wrapper supports the [http.Flusher](https://golang.org/pkg/net/http/#Flusher) interface.
The only caveat is that the writer may not yet have received enough bytes to determine if `MinSize`
has been reached. In this case it will assume that the minimum size has been reached.
If nothing has been written to the response writer, nothing will be flushed.
## License
[Apache 2.0](LICENSE)

862
vendor/github.com/klauspost/compress/gzhttp/compress.go generated vendored Normal file
View file

@ -0,0 +1,862 @@
package gzhttp
import (
"bufio"
"fmt"
"io"
"mime"
"net"
"net/http"
"strconv"
"strings"
"sync"
"github.com/klauspost/compress/gzhttp/writer"
"github.com/klauspost/compress/gzhttp/writer/gzkp"
"github.com/klauspost/compress/gzip"
)
const (
// HeaderNoCompression can be used to disable compression.
// Any header value will disable compression.
// The Header is always removed from output.
HeaderNoCompression = "No-Gzip-Compression"
vary = "Vary"
acceptEncoding = "Accept-Encoding"
contentEncoding = "Content-Encoding"
contentRange = "Content-Range"
acceptRanges = "Accept-Ranges"
contentType = "Content-Type"
contentLength = "Content-Length"
eTag = "ETag"
)
type codings map[string]float64
const (
// DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
// This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
// The examples seem to indicate that it is.
DefaultQValue = 1.0
// DefaultMinSize is the default minimum size until we enable gzip compression.
// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
// If you take a file that is 1300 bytes and compress it to 800 bytes, its still transmitted in that same 1500 byte packet regardless, so youve gained nothing.
// That being the case, you should restrict the gzip compression to files with a size (plus header) greater than a single packet,
// 1024 bytes (1KB) is therefore default.
DefaultMinSize = 1024
)
// GzipResponseWriter provides an http.ResponseWriter interface, which gzips
// bytes before writing them to the underlying response. This doesn't close the
// writers, so don't forget to do that.
// It can be configured to skip response smaller than minSize.
type GzipResponseWriter struct {
http.ResponseWriter
level int
gwFactory writer.GzipWriterFactory
gw writer.GzipWriter
code int // Saves the WriteHeader value.
minSize int // Specifies the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
buf []byte // Holds the first part of the write before reaching the minSize or the end of the write.
ignore bool // If true, then we immediately passthru writes to the underlying ResponseWriter.
keepAcceptRanges bool // Keep "Accept-Ranges" header.
setContentType bool // Add content type, if missing and detected.
suffixETag string // Suffix to add to ETag header if response is compressed.
dropETag bool // Drop ETag header if response is compressed (supersedes suffixETag).
contentTypeFilter func(ct string) bool // Only compress if the response is one of these content-types. All are accepted if empty.
}
type GzipResponseWriterWithCloseNotify struct {
*GzipResponseWriter
}
func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
}
// Write appends data to the gzip writer.
func (w *GzipResponseWriter) Write(b []byte) (int, error) {
// GZIP responseWriter is initialized. Use the GZIP responseWriter.
if w.gw != nil {
return w.gw.Write(b)
}
// If we have already decided not to use GZIP, immediately passthrough.
if w.ignore {
return w.ResponseWriter.Write(b)
}
// Save the write into a buffer for later use in GZIP responseWriter
// (if content is long enough) or at close with regular responseWriter.
wantBuf := 512
if w.minSize > wantBuf {
wantBuf = w.minSize
}
toAdd := len(b)
if len(w.buf)+toAdd > wantBuf {
toAdd = wantBuf - len(w.buf)
}
w.buf = append(w.buf, b[:toAdd]...)
remain := b[toAdd:]
hdr := w.Header()
// Only continue if they didn't already choose an encoding or a known unhandled content length or type.
if len(hdr[HeaderNoCompression]) == 0 && hdr.Get(contentEncoding) == "" && hdr.Get(contentRange) == "" {
// Check more expensive parts now.
cl, _ := atoi(hdr.Get(contentLength))
ct := hdr.Get(contentType)
if cl == 0 || cl >= w.minSize && (ct == "" || w.contentTypeFilter(ct)) {
// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
if len(w.buf) < w.minSize && cl == 0 {
return len(b), nil
}
// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
if cl >= w.minSize || len(w.buf) >= w.minSize {
// If a Content-Type wasn't specified, infer it from the current buffer.
if ct == "" {
ct = http.DetectContentType(w.buf)
}
// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
// Set the header only if the key does not exist
if _, ok := hdr[contentType]; w.setContentType && !ok {
hdr.Set(contentType, ct)
}
// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
if w.contentTypeFilter(ct) {
if err := w.startGzip(); err != nil {
return 0, err
}
if len(remain) > 0 {
if _, err := w.gw.Write(remain); err != nil {
return 0, err
}
}
return len(b), nil
}
}
}
}
// If we got here, we should not GZIP this response.
if err := w.startPlain(); err != nil {
return 0, err
}
if len(remain) > 0 {
if _, err := w.ResponseWriter.Write(remain); err != nil {
return 0, err
}
}
return len(b), nil
}
// startGzip initializes a GZIP writer and writes the buffer.
func (w *GzipResponseWriter) startGzip() error {
// Set the GZIP header.
w.Header().Set(contentEncoding, "gzip")
// if the Content-Length is already set, then calls to Write on gzip
// will fail to set the Content-Length header since its already set
// See: https://github.com/golang/go/issues/14975.
w.Header().Del(contentLength)
// Delete Accept-Ranges.
if !w.keepAcceptRanges {
w.Header().Del(acceptRanges)
}
// Suffix ETag.
if w.suffixETag != "" && !w.dropETag && w.Header().Get(eTag) != "" {
orig := w.Header().Get(eTag)
insertPoint := strings.LastIndex(orig, `"`)
if insertPoint == -1 {
insertPoint = len(orig)
}
w.Header().Set(eTag, orig[:insertPoint]+w.suffixETag+orig[insertPoint:])
}
// Delete ETag.
if w.dropETag {
w.Header().Del(eTag)
}
// Write the header to gzip response.
if w.code != 0 {
w.ResponseWriter.WriteHeader(w.code)
// Ensure that no other WriteHeader's happen
w.code = 0
}
// Initialize and flush the buffer into the gzip response if there are any bytes.
// If there aren't any, we shouldn't initialize it yet because on Close it will
// write the gzip header even if nothing was ever written.
if len(w.buf) > 0 {
// Initialize the GZIP response.
w.init()
n, err := w.gw.Write(w.buf)
// This should never happen (per io.Writer docs), but if the write didn't
// accept the entire buffer but returned no specific error, we have no clue
// what's going on, so abort just to be safe.
if err == nil && n < len(w.buf) {
err = io.ErrShortWrite
}
w.buf = w.buf[:0]
return err
}
return nil
}
// startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
func (w *GzipResponseWriter) startPlain() error {
w.Header().Del(HeaderNoCompression)
if w.code != 0 {
w.ResponseWriter.WriteHeader(w.code)
// Ensure that no other WriteHeader's happen
w.code = 0
}
w.ignore = true
// If Write was never called then don't call Write on the underlying ResponseWriter.
if len(w.buf) == 0 {
return nil
}
n, err := w.ResponseWriter.Write(w.buf)
// This should never happen (per io.Writer docs), but if the write didn't
// accept the entire buffer but returned no specific error, we have no clue
// what's going on, so abort just to be safe.
if err == nil && n < len(w.buf) {
err = io.ErrShortWrite
}
w.buf = w.buf[:0]
return err
}
// WriteHeader just saves the response code until close or GZIP effective writes.
func (w *GzipResponseWriter) WriteHeader(code int) {
if w.code == 0 {
w.code = code
}
}
// init graps a new gzip writer from the gzipWriterPool and writes the correct
// content encoding header.
func (w *GzipResponseWriter) init() {
// Bytes written during ServeHTTP are redirected to this gzip writer
// before being written to the underlying response.
w.gw = w.gwFactory.New(w.ResponseWriter, w.level)
}
// Close will close the gzip.Writer and will put it back in the gzipWriterPool.
func (w *GzipResponseWriter) Close() error {
if w.ignore {
return nil
}
if w.gw == nil {
// GZIP not triggered yet, write out regular response.
err := w.startPlain()
// Returns the error if any at write.
if err != nil {
err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
}
return err
}
err := w.gw.Close()
w.gw = nil
return err
}
// Flush flushes the underlying *gzip.Writer and then the underlying
// http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
// an http.Flusher.
// If not enough bytes has been written to determine if we have reached minimum size,
// this will be ignored.
// If nothing has been written yet, nothing will be flushed.
func (w *GzipResponseWriter) Flush() {
if w.gw == nil && !w.ignore {
if len(w.buf) == 0 {
// Nothing written yet.
return
}
var (
cl, _ = atoi(w.Header().Get(contentLength))
ct = w.Header().Get(contentType)
ce = w.Header().Get(contentEncoding)
cr = w.Header().Get(contentRange)
)
if ct == "" {
ct = http.DetectContentType(w.buf)
// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
// Set the header only if the key does not exist
if _, ok := w.Header()[contentType]; w.setContentType && !ok {
w.Header().Set(contentType, ct)
}
}
if cl == 0 {
// Assume minSize.
cl = w.minSize
}
// See if we should compress...
if len(w.Header()[HeaderNoCompression]) == 0 && ce == "" && cr == "" && cl >= w.minSize && w.contentTypeFilter(ct) {
w.startGzip()
} else {
w.startPlain()
}
}
if w.gw != nil {
w.gw.Flush()
}
if fw, ok := w.ResponseWriter.(http.Flusher); ok {
fw.Flush()
}
}
// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
// Hijacker, its Hijack method is returned. Otherwise an error is returned.
func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
return hj.Hijack()
}
return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
}
// verify Hijacker interface implementation
var _ http.Hijacker = &GzipResponseWriter{}
var onceDefault sync.Once
var defaultWrapper func(http.Handler) http.HandlerFunc
// GzipHandler allows to easily wrap an http handler with default settings.
func GzipHandler(h http.Handler) http.HandlerFunc {
onceDefault.Do(func() {
var err error
defaultWrapper, err = NewWrapper()
if err != nil {
panic(err)
}
})
return defaultWrapper(h)
}
var grwPool = sync.Pool{New: func() interface{} { return &GzipResponseWriter{} }}
// NewWrapper returns a reusable wrapper with the supplied options.
func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) {
c := &config{
level: gzip.DefaultCompression,
minSize: DefaultMinSize,
writer: writer.GzipWriterFactory{
Levels: gzkp.Levels,
New: gzkp.NewWriter,
},
contentTypes: DefaultContentTypeFilter,
setContentType: true,
}
for _, o := range opts {
o(c)
}
if err := c.validate(); err != nil {
return nil, err
}
return func(h http.Handler) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Add(vary, acceptEncoding)
if acceptsGzip(r) {
gw := grwPool.Get().(*GzipResponseWriter)
*gw = GzipResponseWriter{
ResponseWriter: w,
gwFactory: c.writer,
level: c.level,
minSize: c.minSize,
contentTypeFilter: c.contentTypes,
keepAcceptRanges: c.keepAcceptRanges,
dropETag: c.dropETag,
suffixETag: c.suffixETag,
buf: gw.buf,
setContentType: c.setContentType,
}
if len(gw.buf) > 0 {
gw.buf = gw.buf[:0]
}
defer func() {
gw.Close()
gw.ResponseWriter = nil
grwPool.Put(gw)
}()
if _, ok := w.(http.CloseNotifier); ok {
gwcn := GzipResponseWriterWithCloseNotify{gw}
h.ServeHTTP(gwcn, r)
} else {
h.ServeHTTP(gw, r)
}
} else {
h.ServeHTTP(newNoGzipResponseWriter(w), r)
w.Header().Del(HeaderNoCompression)
}
}
}, nil
}
// Parsed representation of one of the inputs to ContentTypes.
// See https://golang.org/pkg/mime/#ParseMediaType
type parsedContentType struct {
mediaType string
params map[string]string
}
// equals returns whether this content type matches another content type.
func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
if pct.mediaType != mediaType {
return false
}
// if pct has no params, don't care about other's params
if len(pct.params) == 0 {
return true
}
// if pct has any params, they must be identical to other's.
if len(pct.params) != len(params) {
return false
}
for k, v := range pct.params {
if w, ok := params[k]; !ok || v != w {
return false
}
}
return true
}
// Used for functional configuration.
type config struct {
minSize int
level int
writer writer.GzipWriterFactory
contentTypes func(ct string) bool
keepAcceptRanges bool
setContentType bool
suffixETag string
dropETag bool
}
func (c *config) validate() error {
min, max := c.writer.Levels()
if c.level < min || c.level > max {
return fmt.Errorf("invalid compression level requested: %d, valid range %d -> %d", c.level, min, max)
}
if c.minSize < 0 {
return fmt.Errorf("minimum size must be more than zero")
}
return nil
}
type option func(c *config)
func MinSize(size int) option {
return func(c *config) {
c.minSize = size
}
}
// CompressionLevel sets the compression level
func CompressionLevel(level int) option {
return func(c *config) {
c.level = level
}
}
// SetContentType sets the content type before returning
// requests, if unset before returning, and it was detected.
// Default: true.
func SetContentType(b bool) option {
return func(c *config) {
c.setContentType = b
}
}
// Implementation changes the implementation of GzipWriter
//
// The default implementation is writer/stdlib/NewWriter
// which is backed by standard library's compress/zlib
func Implementation(writer writer.GzipWriterFactory) option {
return func(c *config) {
c.writer = writer
}
}
// ContentTypes specifies a list of content types to compare
// the Content-Type header to before compressing. If none
// match, the response will be returned as-is.
//
// Content types are compared in a case-insensitive, whitespace-ignored
// manner.
//
// A MIME type without any other directive will match a content type
// that has the same MIME type, regardless of that content type's other
// directives. I.e., "text/html" will match both "text/html" and
// "text/html; charset=utf-8".
//
// A MIME type with any other directive will only match a content type
// that has the same MIME type and other directives. I.e.,
// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
//
// By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
//
// Setting this will override default and any previous Content Type settings.
func ContentTypes(types []string) option {
return func(c *config) {
var contentTypes []parsedContentType
for _, v := range types {
mediaType, params, err := mime.ParseMediaType(v)
if err == nil {
contentTypes = append(contentTypes, parsedContentType{mediaType, params})
}
}
c.contentTypes = func(ct string) bool {
return handleContentType(contentTypes, ct)
}
}
}
// ExceptContentTypes specifies a list of content types to compare
// the Content-Type header to before compressing. If none
// match, the response will be compressed.
//
// Content types are compared in a case-insensitive, whitespace-ignored
// manner.
//
// A MIME type without any other directive will match a content type
// that has the same MIME type, regardless of that content type's other
// directives. I.e., "text/html" will match both "text/html" and
// "text/html; charset=utf-8".
//
// A MIME type with any other directive will only match a content type
// that has the same MIME type and other directives. I.e.,
// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
//
// By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
//
// Setting this will override default and any previous Content Type settings.
func ExceptContentTypes(types []string) option {
return func(c *config) {
var contentTypes []parsedContentType
for _, v := range types {
mediaType, params, err := mime.ParseMediaType(v)
if err == nil {
contentTypes = append(contentTypes, parsedContentType{mediaType, params})
}
}
c.contentTypes = func(ct string) bool {
return !handleContentType(contentTypes, ct)
}
}
}
// KeepAcceptRanges will keep Accept-Ranges header on gzipped responses.
// This will likely break ranged requests since that cannot be transparently
// handled by the filter.
func KeepAcceptRanges() option {
return func(c *config) {
c.keepAcceptRanges = true
}
}
// ContentTypeFilter allows adding a custom content type filter.
//
// The supplied function must return true/false to indicate if content
// should be compressed.
//
// When called no parsing of the content type 'ct' has been done.
// It may have been set or auto-detected.
//
// Setting this will override default and any previous Content Type settings.
func ContentTypeFilter(compress func(ct string) bool) option {
return func(c *config) {
c.contentTypes = compress
}
}
// SuffixETag adds the specified suffix to the ETag header (if it exists) of
// responses which are compressed.
//
// Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
// the ETag of a compressed response must differ from it's uncompressed version.
//
// A suffix such as "-gzip" is sometimes used as a workaround for generating a
// unique new ETag (see https://bz.apache.org/bugzilla/show_bug.cgi?id=39727).
func SuffixETag(suffix string) option {
return func(c *config) {
c.suffixETag = suffix
}
}
// DropETag removes the ETag of responses which are compressed. If DropETag is
// specified in conjunction with SuffixETag, this option will take precedence
// and the ETag will be dropped.
//
// Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
// the ETag of a compressed response must differ from it's uncompressed version.
//
// This workaround eliminates ETag conflicts between the compressed and
// uncompressed versions by removing the ETag from the compressed version.
func DropETag() option {
return func(c *config) {
c.dropETag = true
}
}
// acceptsGzip returns true if the given HTTP request indicates that it will
// accept a gzipped response.
func acceptsGzip(r *http.Request) bool {
// Note that we don't request this for HEAD requests,
// due to a bug in nginx:
// https://trac.nginx.org/nginx/ticket/358
// https://golang.org/issue/5522
return r.Method != http.MethodHead && parseEncodingGzip(r.Header.Get(acceptEncoding)) > 0
}
// returns true if we've been configured to compress the specific content type.
func handleContentType(contentTypes []parsedContentType, ct string) bool {
// If contentTypes is empty we handle all content types.
if len(contentTypes) == 0 {
return true
}
mediaType, params, err := mime.ParseMediaType(ct)
if err != nil {
return false
}
for _, c := range contentTypes {
if c.equals(mediaType, params) {
return true
}
}
return false
}
// parseEncodingGzip returns the qvalue of gzip compression.
func parseEncodingGzip(s string) float64 {
s = strings.TrimSpace(s)
for len(s) > 0 {
stop := strings.IndexByte(s, ',')
if stop < 0 {
stop = len(s)
}
coding, qvalue, _ := parseCoding(s[:stop])
if coding == "gzip" {
return qvalue
}
if stop == len(s) {
break
}
s = s[stop+1:]
}
return 0
}
func parseEncodings(s string) (codings, error) {
split := strings.Split(s, ",")
c := make(codings, len(split))
var e []string
for _, ss := range split {
coding, qvalue, err := parseCoding(ss)
if err != nil {
e = append(e, err.Error())
} else {
c[coding] = qvalue
}
}
// TODO (adammck): Use a proper multi-error struct, so the individual errors
// can be extracted if anyone cares.
if len(e) > 0 {
return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
}
return c, nil
}
// parseCoding parses a single coding (content-coding with an optional qvalue),
// as might appear in an Accept-Encoding header. It attempts to forgive minor
// formatting errors.
func parseCoding(s string) (coding string, qvalue float64, err error) {
for n, part := range strings.Split(s, ";") {
part = strings.TrimSpace(part)
qvalue = DefaultQValue
if n == 0 {
coding = strings.ToLower(part)
} else if strings.HasPrefix(part, "q=") {
qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
if qvalue < 0.0 {
qvalue = 0.0
} else if qvalue > 1.0 {
qvalue = 1.0
}
}
}
if coding == "" {
err = fmt.Errorf("empty content-coding")
}
return
}
// Don't compress any audio/video types.
var excludePrefixDefault = []string{"video/", "audio/", "image/jp"}
// Skip a bunch of compressed types that contains this string.
// Curated by supposedly still active formats on https://en.wikipedia.org/wiki/List_of_archive_formats
var excludeContainsDefault = []string{"compress", "zip", "snappy", "lzma", "xz", "zstd", "brotli", "stuffit"}
// DefaultContentTypeFilter excludes common compressed audio, video and archive formats.
func DefaultContentTypeFilter(ct string) bool {
ct = strings.TrimSpace(strings.ToLower(ct))
if ct == "" {
return true
}
for _, s := range excludeContainsDefault {
if strings.Contains(ct, s) {
return false
}
}
for _, prefix := range excludePrefixDefault {
if strings.HasPrefix(ct, prefix) {
return false
}
}
return true
}
// CompressAllContentTypeFilter will compress all mime types.
func CompressAllContentTypeFilter(ct string) bool {
return true
}
const intSize = 32 << (^uint(0) >> 63)
// atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
func atoi(s string) (int, bool) {
sLen := len(s)
if intSize == 32 && (0 < sLen && sLen < 10) ||
intSize == 64 && (0 < sLen && sLen < 19) {
// Fast path for small integers that fit int type.
s0 := s
if s[0] == '-' || s[0] == '+' {
s = s[1:]
if len(s) < 1 {
return 0, false
}
}
n := 0
for _, ch := range []byte(s) {
ch -= '0'
if ch > 9 {
return 0, false
}
n = n*10 + int(ch)
}
if s0[0] == '-' {
n = -n
}
return n, true
}
// Slow path for invalid, big, or underscored integers.
i64, err := strconv.ParseInt(s, 10, 0)
return int(i64), err == nil
}
// newNoGzipResponseWriter will return a response writer that
// cleans up compression artifacts.
// Depending on whether http.Hijacker is supported the returned will as well.
func newNoGzipResponseWriter(w http.ResponseWriter) http.ResponseWriter {
n := &NoGzipResponseWriter{ResponseWriter: w}
if hj, ok := w.(http.Hijacker); ok {
x := struct {
http.ResponseWriter
http.Hijacker
http.Flusher
}{
ResponseWriter: n,
Hijacker: hj,
Flusher: n,
}
return x
}
return n
}
// NoGzipResponseWriter filters out HeaderNoCompression.
type NoGzipResponseWriter struct {
http.ResponseWriter
hdrCleaned bool
}
func (n *NoGzipResponseWriter) CloseNotify() <-chan bool {
if cn, ok := n.ResponseWriter.(http.CloseNotifier); ok {
return cn.CloseNotify()
}
return nil
}
func (n *NoGzipResponseWriter) Flush() {
if !n.hdrCleaned {
n.ResponseWriter.Header().Del(HeaderNoCompression)
n.hdrCleaned = true
}
if f, ok := n.ResponseWriter.(http.Flusher); ok {
f.Flush()
}
}
func (n *NoGzipResponseWriter) Header() http.Header {
return n.ResponseWriter.Header()
}
func (n *NoGzipResponseWriter) Write(bytes []byte) (int, error) {
if !n.hdrCleaned {
n.ResponseWriter.Header().Del(HeaderNoCompression)
n.hdrCleaned = true
}
return n.ResponseWriter.Write(bytes)
}
func (n *NoGzipResponseWriter) WriteHeader(statusCode int) {
if !n.hdrCleaned {
n.ResponseWriter.Header().Del(HeaderNoCompression)
n.hdrCleaned = true
}
n.ResponseWriter.WriteHeader(statusCode)
}

View file

@ -0,0 +1,211 @@
// Copyright (c) 2021 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gzhttp
import (
"io"
"net/http"
"strings"
"sync"
"github.com/klauspost/compress/gzip"
"github.com/klauspost/compress/zstd"
)
// Transport will wrap a transport with a custom handler
// that will request gzip and automatically decompress it.
// Using this is significantly faster than using the default transport.
func Transport(parent http.RoundTripper, opts ...transportOption) http.RoundTripper {
g := gzRoundtripper{parent: parent, withZstd: true, withGzip: true}
for _, o := range opts {
o(&g)
}
var ae []string
if g.withZstd {
ae = append(ae, "zstd")
}
if g.withGzip {
ae = append(ae, "gzip")
}
g.acceptEncoding = strings.Join(ae, ",")
return &g
}
type transportOption func(c *gzRoundtripper)
// TransportEnableZstd will send Zstandard as a compression option to the server.
// Enabled by default, but may be disabled if future problems arise.
func TransportEnableZstd(b bool) transportOption {
return func(c *gzRoundtripper) {
c.withZstd = b
}
}
// TransportEnableGzip will send Gzip as a compression option to the server.
// Enabled by default.
func TransportEnableGzip(b bool) transportOption {
return func(c *gzRoundtripper) {
c.withGzip = b
}
}
type gzRoundtripper struct {
parent http.RoundTripper
acceptEncoding string
withZstd, withGzip bool
}
func (g *gzRoundtripper) RoundTrip(req *http.Request) (*http.Response, error) {
var requestedComp bool
if req.Header.Get("Accept-Encoding") == "" &&
req.Header.Get("Range") == "" &&
req.Method != "HEAD" {
// Request gzip only, not deflate. Deflate is ambiguous and
// not as universally supported anyway.
// See: https://zlib.net/zlib_faq.html#faq39
//
// Note that we don't request this for HEAD requests,
// due to a bug in nginx:
// https://trac.nginx.org/nginx/ticket/358
// https://golang.org/issue/5522
//
// We don't request gzip if the request is for a range, since
// auto-decoding a portion of a gzipped document will just fail
// anyway. See https://golang.org/issue/8923
requestedComp = len(g.acceptEncoding) > 0
req.Header.Set("Accept-Encoding", g.acceptEncoding)
}
resp, err := g.parent.RoundTrip(req)
if err != nil || !requestedComp {
return resp, err
}
// Decompress
if g.withGzip && asciiEqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
resp.Body = &gzipReader{body: resp.Body}
resp.Header.Del("Content-Encoding")
resp.Header.Del("Content-Length")
resp.ContentLength = -1
resp.Uncompressed = true
}
if g.withZstd && asciiEqualFold(resp.Header.Get("Content-Encoding"), "zstd") {
resp.Body = &zstdReader{body: resp.Body}
resp.Header.Del("Content-Encoding")
resp.Header.Del("Content-Length")
resp.ContentLength = -1
resp.Uncompressed = true
}
return resp, nil
}
var gzReaderPool sync.Pool
// gzipReader wraps a response body so it can lazily
// call gzip.NewReader on the first call to Read
type gzipReader struct {
body io.ReadCloser // underlying HTTP/1 response body framing
zr *gzip.Reader // lazily-initialized gzip reader
zerr error // any error from gzip.NewReader; sticky
}
func (gz *gzipReader) Read(p []byte) (n int, err error) {
if gz.zr == nil {
if gz.zerr == nil {
zr, ok := gzReaderPool.Get().(*gzip.Reader)
if ok {
gz.zr, gz.zerr = zr, zr.Reset(gz.body)
} else {
gz.zr, gz.zerr = gzip.NewReader(gz.body)
}
}
if gz.zerr != nil {
return 0, gz.zerr
}
}
return gz.zr.Read(p)
}
func (gz *gzipReader) Close() error {
if gz.zr != nil {
gzReaderPool.Put(gz.zr)
gz.zr = nil
}
return gz.body.Close()
}
// asciiEqualFold is strings.EqualFold, ASCII only. It reports whether s and t
// are equal, ASCII-case-insensitively.
func asciiEqualFold(s, t string) bool {
if len(s) != len(t) {
return false
}
for i := 0; i < len(s); i++ {
if lower(s[i]) != lower(t[i]) {
return false
}
}
return true
}
// lower returns the ASCII lowercase version of b.
func lower(b byte) byte {
if 'A' <= b && b <= 'Z' {
return b + ('a' - 'A')
}
return b
}
// zstdReaderPool pools zstd decoders.
var zstdReaderPool sync.Pool
// zstdReader wraps a response body so it can lazily
// call gzip.NewReader on the first call to Read
type zstdReader struct {
body io.ReadCloser // underlying HTTP/1 response body framing
zr *zstd.Decoder // lazily-initialized gzip reader
zerr error // any error from zstd.NewReader; sticky
}
func (zr *zstdReader) Read(p []byte) (n int, err error) {
if zr.zerr != nil {
return 0, zr.zerr
}
if zr.zr == nil {
if zr.zerr == nil {
reader, ok := zstdReaderPool.Get().(*zstd.Decoder)
if ok {
zr.zerr = reader.Reset(zr.body)
zr.zr = reader
} else {
zr.zr, zr.zerr = zstd.NewReader(zr.body, zstd.WithDecoderLowmem(true), zstd.WithDecoderMaxWindow(32<<20), zstd.WithDecoderConcurrency(1))
}
}
if zr.zerr != nil {
return 0, zr.zerr
}
}
n, err = zr.zr.Read(p)
if err != nil {
// Usually this will be io.EOF,
// stash the decoder and keep the error.
zr.zr.Reset(nil)
zstdReaderPool.Put(zr.zr)
zr.zr = nil
zr.zerr = err
}
return
}
func (zr *zstdReader) Close() error {
if zr.zr != nil {
zr.zr.Reset(nil)
zstdReaderPool.Put(zr.zr)
zr.zr = nil
}
return zr.body.Close()
}

View file

@ -0,0 +1,70 @@
// package gzkp provides gzip compression through github.com/klauspost/compress/gzip.
package gzkp
import (
"io"
"sync"
"github.com/klauspost/compress/gzhttp/writer"
"github.com/klauspost/compress/gzip"
)
// gzipWriterPools stores a sync.Pool for each compression level for reuse of
// gzip.Writers. Use poolIndex to covert a compression level to an index into
// gzipWriterPools.
var gzipWriterPools [gzip.BestCompression - gzip.StatelessCompression + 1]*sync.Pool
func init() {
for i := gzip.StatelessCompression; i <= gzip.BestCompression; i++ {
addLevelPool(i)
}
}
// poolIndex maps a compression level to its index into gzipWriterPools. It
// assumes that level is a valid gzip compression level.
func poolIndex(level int) int {
return level - gzip.StatelessCompression
}
func addLevelPool(level int) {
gzipWriterPools[poolIndex(level)] = &sync.Pool{
New: func() interface{} {
// NewWriterLevel only returns error on a bad level, we are guaranteeing
// that this will be a valid level so it is okay to ignore the returned
// error.
w, _ := gzip.NewWriterLevel(nil, level)
return w
},
}
}
type pooledWriter struct {
*gzip.Writer
index int
}
func (pw *pooledWriter) Close() error {
err := pw.Writer.Close()
gzipWriterPools[pw.index].Put(pw.Writer)
pw.Writer = nil
return err
}
func NewWriter(w io.Writer, level int) writer.GzipWriter {
index := poolIndex(level)
gzw := gzipWriterPools[index].Get().(*gzip.Writer)
gzw.Reset(w)
return &pooledWriter{
Writer: gzw,
index: index,
}
}
func Levels() (min, max int) {
return gzip.StatelessCompression, gzip.BestCompression
}
func ImplementationInfo() string {
return "klauspost/compress/gzip"
}

View file

@ -0,0 +1,20 @@
package writer
import "io"
// GzipWriter implements the functions needed for compressing content.
type GzipWriter interface {
Write(p []byte) (int, error)
Close() error
Flush() error
}
// GzipWriterFactory contains the information needed for custom gzip implementations.
type GzipWriterFactory struct {
// Must return the minimum and maximum supported level.
Levels func() (min, max int)
// New must return a new GzipWriter.
// level will always be within the return limits above.
New func(writer io.Writer, level int) GzipWriter
}

View file

@ -61,7 +61,7 @@ func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
b, err := fse.Decompress(in[:iSize], s.fse)
s.fse.Out = nil
if err != nil {
return s, nil, err
return s, nil, fmt.Errorf("fse decompress returned: %w", err)
}
if len(b) > 255 {
return s, nil, errors.New("corrupt input: output table too large")

View file

@ -103,6 +103,28 @@ func hash(u, shift uint32) uint32 {
return (u * 0x1e35a7bd) >> shift
}
// EncodeBlockInto exposes encodeBlock but checks dst size.
func EncodeBlockInto(dst, src []byte) (d int) {
if MaxEncodedLen(len(src)) > len(dst) {
return 0
}
// encodeBlock breaks on too big blocks, so split.
for len(src) > 0 {
p := src
src = nil
if len(p) > maxBlockSize {
p, src = p[:maxBlockSize], p[maxBlockSize:]
}
if len(p) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], p)
} else {
d += encodeBlock(dst[d:], p)
}
}
return d
}
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.

View file

@ -20,11 +20,12 @@ This is important, so you don't have to worry about spending CPU cycles on alrea
* Concurrent stream compression
* Faster decompression, even for Snappy compatible content
* Concurrent Snappy/S2 stream decompression
* Ability to quickly skip forward in compressed stream
* Skip forward in compressed stream
* Random seeking with indexes
* Compatible with reading Snappy compressed content
* Smaller block size overhead on incompressible blocks
* Block concatenation
* Block Dictionary support
* Uncompressed stream mode
* Automatic stream size padding
* Snappy compatible block compression
@ -594,6 +595,123 @@ Best... 10737418240 -> 4210602774 [39.21%]; 42.96s, 254.4MB/s
Decompression speed should be around the same as using the 'better' compression mode.
## Dictionaries
*Note: S2 dictionary compression is currently at an early implementation stage, with no assembly for
neither encoding nor decoding. Performance improvements can be expected in the future.*
Adding dictionaries allow providing a custom dictionary that will serve as lookup in the beginning of blocks.
The same dictionary *must* be used for both encoding and decoding.
S2 does not keep track of whether the same dictionary is used,
and using the wrong dictionary will most often not result in an error when decompressing.
Blocks encoded *without* dictionaries can be decompressed seamlessly *with* a dictionary.
This means it is possible to switch from an encoding without dictionaries to an encoding with dictionaries
and treat the blocks similarly.
Similar to [zStandard dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression),
the same usage scenario applies to S2 dictionaries.
> Training works if there is some correlation in a family of small data samples. The more data-specific a dictionary is, the more efficient it is (there is no universal dictionary). Hence, deploying one dictionary per type of data will provide the greatest benefits. Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm will gradually use previously decoded content to better compress the rest of the file.
S2 further limits the dictionary to only be enabled on the first 64KB of a block.
This will remove any negative (speed) impacts of the dictionaries on bigger blocks.
### Compression
Using the [github_users_sample_set](https://github.com/facebook/zstd/releases/download/v1.1.3/github_users_sample_set.tar.zst)
and a 64KB dictionary trained with zStandard the following sizes can be achieved.
| | Default | Better | Best |
|--------------------|------------------|------------------|-----------------------|
| Without Dictionary | 3362023 (44.92%) | 3083163 (41.19%) | 3057944 (40.86%) |
| With Dictionary | 921524 (12.31%) | 873154 (11.67%) | 785503 bytes (10.49%) |
So for highly repetitive content, this case provides an almost 3x reduction in size.
For less uniform data we will use the Go source code tree.
Compressing First 64KB of all `.go` files in `go/src`, Go 1.19.5, 8912 files, 51253563 bytes input:
| | Default | Better | Best |
|--------------------|-------------------|-------------------|-------------------|
| Without Dictionary | 22955767 (44.79%) | 20189613 (39.39% | 19482828 (38.01%) |
| With Dictionary | 19654568 (38.35%) | 16289357 (31.78%) | 15184589 (29.63%) |
| Saving/file | 362 bytes | 428 bytes | 472 bytes |
### Creating Dictionaries
There are no tools to create dictionaries in S2.
However, there are multiple ways to create a useful dictionary:
#### Using a Sample File
If your input is very uniform, you can just use a sample file as the dictionary.
For example in the `github_users_sample_set` above, the average compression only goes up from
10.49% to 11.48% by using the first file as dictionary compared to using a dedicated dictionary.
```Go
// Read a sample
sample, err := os.ReadFile("sample.json")
// Create a dictionary.
dict := s2.MakeDict(sample, nil)
// b := dict.Bytes() will provide a dictionary that can be saved
// and reloaded with s2.NewDict(b).
// To encode:
encoded := dict.Encode(nil, file)
// To decode:
decoded, err := dict.Decode(nil, file)
```
#### Using Zstandard
Zstandard dictionaries can easily be converted to S2 dictionaries.
This can be helpful to generate dictionaries for files that don't have a fixed structure.
Example, with training set files placed in `./training-set`:
`λ zstd -r --train-fastcover training-set/* --maxdict=65536 -o name.dict`
This will create a dictionary of 64KB, that can be converted to a dictionary like this:
```Go
// Decode the Zstandard dictionary.
insp, err := zstd.InspectDictionary(zdict)
if err != nil {
panic(err)
}
// We are only interested in the contents.
// Assume that files start with "// Copyright (c) 2023".
// Search for the longest match for that.
// This may save a few bytes.
dict := s2.MakeDict(insp.Content(), []byte("// Copyright (c) 2023"))
// b := dict.Bytes() will provide a dictionary that can be saved
// and reloaded with s2.NewDict(b).
// We can now encode using this dictionary
encodedWithDict := dict.Encode(nil, payload)
// To decode content:
decoded, err := dict.Decode(nil, encodedWithDict)
```
It is recommended to save the dictionary returned by ` b:= dict.Bytes()`, since that will contain only the S2 dictionary.
This dictionary can later be loaded using `s2.NewDict(b)`. The dictionary then no longer requires `zstd` to be initialized.
Also note how `s2.MakeDict` allows you to search for a common starting sequence of your files.
This can be omitted, at the expense of a few bytes.
# Snappy Compatibility
S2 now offers full compatibility with Snappy.
@ -929,6 +1047,72 @@ The first copy of a block cannot be a repeat offset and the offset is reset on e
Default streaming block size is 1MB.
# Dictionary Encoding
Adding dictionaries allow providing a custom dictionary that will serve as lookup in the beginning of blocks.
A dictionary provides an initial repeat value that can be used to point to a common header.
Other than that the dictionary contains values that can be used as back-references.
Often used data should be placed at the *end* of the dictionary since offsets < 2048 bytes will be smaller.
## Format
Dictionary *content* must at least 16 bytes and less or equal to 64KiB (65536 bytes).
Encoding: `[repeat value (uvarint)][dictionary content...]`
Before the dictionary content, an unsigned base-128 (uvarint) encoded value specifying the initial repeat offset.
This value is an offset into the dictionary content and not a back-reference offset,
so setting this to 0 will make the repeat value point to the first value of the dictionary.
The value must be less than the dictionary length-8
## Encoding
From the decoder point of view the dictionary content is seen as preceding the encoded content.
`[dictionary content][decoded output]`
Backreferences to the dictionary are encoded as ordinary backreferences that have an offset before the start of the decoded block.
Matches copying from the dictionary are **not** allowed to cross from the dictionary into the decoded data.
However, if a copy ends at the end of the dictionary the next repeat will point to the start of the decoded buffer, which is allowed.
The first match can be a repeat value, which will use the repeat offset stored in the dictionary.
When 64KB (65536 bytes) has been en/decoded it is no longer allowed to reference the dictionary,
neither by a copy nor repeat operations.
If the boundary is crossed while copying from the dictionary, the operation should complete,
but the next instruction is not allowed to reference the dictionary.
Valid blocks encoded *without* a dictionary can be decoded with any dictionary.
There are no checks whether the supplied dictionary is the correct for a block.
Because of this there is no overhead by using a dictionary.
## Example
This is the dictionary content. Elements are separated by `[]`.
Dictionary: `[0x0a][Yesterday 25 bananas were added to Benjamins brown bag]`.
Initial repeat offset is set at 10, which is the letter `2`.
Encoded `[LIT "10"][REPEAT len=10][LIT "hich"][MATCH off=50 len=6][MATCH off=31 len=6][MATCH off=61 len=10]`
Decoded: `[10][ bananas w][hich][ were ][brown ][were added]`
Output: `10 bananas which were brown were added`
## Streams
For streams each block can use the dictionary.
The dictionary cannot not currently be provided on the stream.
# LICENSE
This code is based on the [Snappy-Go](https://github.com/golang/snappy) implementation.

View file

@ -13,6 +13,7 @@ import (
"io/ioutil"
"math"
"runtime"
"strconv"
"sync"
)
@ -880,15 +881,20 @@ func (r *Reader) Skip(n int64) error {
// See Reader.ReadSeeker
type ReadSeeker struct {
*Reader
readAtMu sync.Mutex
}
// ReadSeeker will return an io.ReadSeeker compatible version of the reader.
// ReadSeeker will return an io.ReadSeeker and io.ReaderAt
// compatible version of the reader.
// If 'random' is specified the returned io.Seeker can be used for
// random seeking, otherwise only forward seeking is supported.
// Enabling random seeking requires the original input to support
// the io.Seeker interface.
// A custom index can be specified which will be used if supplied.
// When using a custom index, it will not be read from the input stream.
// The ReadAt position will affect regular reads and the current position of Seek.
// So using Read after ReadAt will continue from where the ReadAt stopped.
// No functions should be used concurrently.
// The returned ReadSeeker contains a shallow reference to the existing Reader,
// meaning changes performed to one is reflected in the other.
func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
@ -958,42 +964,55 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
// Reset on EOF
r.err = nil
}
if offset == 0 && whence == io.SeekCurrent {
return r.blockStart + int64(r.i), nil
}
if !r.readHeader {
// Make sure we read the header.
_, r.err = r.Read([]byte{})
}
rs, ok := r.r.(io.ReadSeeker)
if r.index == nil || !ok {
if whence == io.SeekCurrent && offset >= 0 {
err := r.Skip(offset)
return r.blockStart + int64(r.i), err
}
if whence == io.SeekStart && offset >= r.blockStart+int64(r.i) {
err := r.Skip(offset - r.blockStart - int64(r.i))
return r.blockStart + int64(r.i), err
}
return 0, ErrUnsupported
}
// Calculate absolute offset.
absOffset := offset
switch whence {
case io.SeekStart:
case io.SeekCurrent:
offset += r.blockStart + int64(r.i)
absOffset = r.blockStart + int64(r.i) + offset
case io.SeekEnd:
if offset > 0 {
return 0, errors.New("seek after end of file")
if r.index == nil {
return 0, ErrUnsupported
}
offset = r.index.TotalUncompressed + offset
absOffset = r.index.TotalUncompressed + offset
default:
r.err = ErrUnsupported
return 0, r.err
}
if offset < 0 {
if absOffset < 0 {
return 0, errors.New("seek before start of file")
}
c, u, err := r.index.Find(offset)
if !r.readHeader {
// Make sure we read the header.
_, r.err = r.Read([]byte{})
if r.err != nil {
return 0, r.err
}
}
// If we are inside current block no need to seek.
// This includes no offset changes.
if absOffset >= r.blockStart && absOffset < r.blockStart+int64(r.j) {
r.i = int(absOffset - r.blockStart)
return r.blockStart + int64(r.i), nil
}
rs, ok := r.r.(io.ReadSeeker)
if r.index == nil || !ok {
currOffset := r.blockStart + int64(r.i)
if absOffset >= currOffset {
err := r.Skip(absOffset - currOffset)
return r.blockStart + int64(r.i), err
}
return 0, ErrUnsupported
}
// We can seek and we have an index.
c, u, err := r.index.Find(absOffset)
if err != nil {
return r.blockStart + int64(r.i), err
}
@ -1005,11 +1024,56 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
}
r.i = r.j // Remove rest of current block.
if u < offset {
r.blockStart = u - int64(r.j) // Adjust current block start for accounting.
if u < absOffset {
// Forward inside block
return offset, r.Skip(offset - u)
return absOffset, r.Skip(absOffset - u)
}
return offset, nil
if u > absOffset {
return 0, fmt.Errorf("s2 seek: (internal error) u (%d) > absOffset (%d)", u, absOffset)
}
return absOffset, nil
}
// ReadAt reads len(p) bytes into p starting at offset off in the
// underlying input source. It returns the number of bytes
// read (0 <= n <= len(p)) and any error encountered.
//
// When ReadAt returns n < len(p), it returns a non-nil error
// explaining why more bytes were not returned. In this respect,
// ReadAt is stricter than Read.
//
// Even if ReadAt returns n < len(p), it may use all of p as scratch
// space during the call. If some data is available but not len(p) bytes,
// ReadAt blocks until either all the data is available or an error occurs.
// In this respect ReadAt is different from Read.
//
// If the n = len(p) bytes returned by ReadAt are at the end of the
// input source, ReadAt may return either err == EOF or err == nil.
//
// If ReadAt is reading from an input source with a seek offset,
// ReadAt should not affect nor be affected by the underlying
// seek offset.
//
// Clients of ReadAt can execute parallel ReadAt calls on the
// same input source. This is however not recommended.
func (r *ReadSeeker) ReadAt(p []byte, offset int64) (int, error) {
r.readAtMu.Lock()
defer r.readAtMu.Unlock()
_, err := r.Seek(offset, io.SeekStart)
if err != nil {
return 0, err
}
n := 0
for n < len(p) {
n2, err := r.Read(p[n:])
if err != nil {
// This will include io.EOF
return n + n2, err
}
n += n2
}
return n, nil
}
// ReadByte satisfies the io.ByteReader interface.
@ -1048,3 +1112,370 @@ func (r *Reader) SkippableCB(id uint8, fn func(r io.Reader) error) error {
r.skippableCB[id] = fn
return nil
}
// s2DecodeDict writes the decoding of src to dst. It assumes that the varint-encoded
// length of the decompressed bytes has already been read, and that len(dst)
// equals that length.
//
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
func s2DecodeDict(dst, src []byte, dict *Dict) int {
if dict == nil {
return s2Decode(dst, src)
}
const debug = false
const debugErrs = debug
if debug {
fmt.Println("Starting decode, dst len:", len(dst))
}
var d, s, length int
offset := len(dict.dict) - dict.repeat
// As long as we can read at least 5 bytes...
for s < len(src)-5 {
// Removing bounds checks is SLOWER, when if doing
// in := src[s:s+5]
// Checked on Go 1.18
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
x = uint32(src[s-1])
case x == 61:
in := src[s : s+3]
x = uint32(in[1]) | uint32(in[2])<<8
s += 3
case x == 62:
in := src[s : s+4]
// Load as 32 bit and shift down.
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
x >>= 8
s += 4
case x == 63:
in := src[s : s+5]
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
s += 5
}
length = int(x) + 1
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debugErrs {
fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
}
return decodeErrCodeCorrupt
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
length = int(src[s-2]) >> 2 & 0x7
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
length = int(src[s]) + 4
s += 1
case 6:
in := src[s : s+2]
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
s += 2
case 7:
in := src[s : s+3]
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
s += 3
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
in := src[s : s+3]
offset = int(uint32(in[1]) | uint32(in[2])<<8)
length = 1 + int(in[0])>>2
s += 3
case tagCopy4:
in := src[s : s+5]
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
length = 1 + int(in[0])>>2
s += 5
}
if offset <= 0 || length > len(dst)-d {
if debugErrs {
fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
}
return decodeErrCodeCorrupt
}
// copy from dict
if d < offset {
if d > MaxDictSrcOffset {
if debugErrs {
fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
}
return decodeErrCodeCorrupt
}
startOff := len(dict.dict) - offset + d
if startOff < 0 || startOff+length > len(dict.dict) {
if debugErrs {
fmt.Printf("offset (%d) + length (%d) bigger than dict (%d)\n", offset, length, len(dict.dict))
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("dict copy, length:", length, "offset:", offset, "d-after:", d+length, "dict start offset:", startOff)
}
copy(dst[d:d+length], dict.dict[startOff:])
d += length
continue
}
if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
// Remaining with extra checks...
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debugErrs {
fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(src[s-2]) >> 2 & 0x7
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
s += 1
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-1])) + 4
case 6:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
case 7:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || length > len(dst)-d {
if debugErrs {
fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
}
return decodeErrCodeCorrupt
}
// copy from dict
if d < offset {
if d > MaxDictSrcOffset {
if debugErrs {
fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
}
return decodeErrCodeCorrupt
}
rOff := len(dict.dict) - (offset - d)
if debug {
fmt.Println("starting dict entry from dict offset", len(dict.dict)-rOff)
}
if rOff+length > len(dict.dict) {
if debugErrs {
fmt.Println("err: END offset", rOff+length, "bigger than dict", len(dict.dict), "dict offset:", rOff, "length:", length)
}
return decodeErrCodeCorrupt
}
if rOff < 0 {
if debugErrs {
fmt.Println("err: START offset", rOff, "less than 0", len(dict.dict), "dict offset:", rOff, "length:", length)
}
return decodeErrCodeCorrupt
}
copy(dst[d:d+length], dict.dict[rOff:])
d += length
continue
}
if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
if d != len(dst) {
if debugErrs {
fmt.Println("wanted length", len(dst), "got", d)
}
return decodeErrCodeCorrupt
}
return 0
}

View file

@ -57,6 +57,9 @@ func s2Decode(dst, src []byte) int {
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debug {
fmt.Println("corrupt: lit size", length)
}
return decodeErrCodeCorrupt
}
if debug {
@ -109,6 +112,10 @@ func s2Decode(dst, src []byte) int {
}
if offset <= 0 || d < offset || length > len(dst)-d {
if debug {
fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
}
return decodeErrCodeCorrupt
}
@ -175,6 +182,9 @@ func s2Decode(dst, src []byte) int {
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debug {
fmt.Println("corrupt: lit size", length)
}
return decodeErrCodeCorrupt
}
if debug {
@ -241,6 +251,9 @@ func s2Decode(dst, src []byte) int {
}
if offset <= 0 || d < offset || length > len(dst)-d {
if debug {
fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
}
return decodeErrCodeCorrupt
}

331
vendor/github.com/klauspost/compress/s2/dict.go generated vendored Normal file
View file

@ -0,0 +1,331 @@
// Copyright (c) 2022+ Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"bytes"
"encoding/binary"
"sync"
)
const (
// MinDictSize is the minimum dictionary size when repeat has been read.
MinDictSize = 16
// MaxDictSize is the maximum dictionary size when repeat has been read.
MaxDictSize = 65536
// MaxDictSrcOffset is the maximum offset where a dictionary entry can start.
MaxDictSrcOffset = 65535
)
// Dict contains a dictionary that can be used for encoding and decoding s2
type Dict struct {
dict []byte
repeat int // Repeat as index of dict
fast, better, best sync.Once
fastTable *[1 << 14]uint16
betterTableShort *[1 << 14]uint16
betterTableLong *[1 << 17]uint16
bestTableShort *[1 << 16]uint32
bestTableLong *[1 << 19]uint32
}
// NewDict will read a dictionary.
// It will return nil if the dictionary is invalid.
func NewDict(dict []byte) *Dict {
if len(dict) == 0 {
return nil
}
var d Dict
// Repeat is the first value of the dict
r, n := binary.Uvarint(dict)
if n <= 0 {
return nil
}
dict = dict[n:]
d.dict = dict
if cap(d.dict) < len(d.dict)+16 {
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
}
if len(dict) < MinDictSize || len(dict) > MaxDictSize {
return nil
}
d.repeat = int(r)
if d.repeat > len(dict) {
return nil
}
return &d
}
// Bytes will return a serialized version of the dictionary.
// The output can be sent to NewDict.
func (d *Dict) Bytes() []byte {
dst := make([]byte, binary.MaxVarintLen16+len(d.dict))
return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...)
}
// MakeDict will create a dictionary.
// 'data' must be at least MinDictSize.
// If data is longer than MaxDictSize only the last MaxDictSize bytes will be used.
// If searchStart is set the start repeat value will be set to the last
// match of this content.
// If no matches are found, it will attempt to find shorter matches.
// This content should match the typical start of a block.
// If at least 4 bytes cannot be matched, repeat is set to start of block.
func MakeDict(data []byte, searchStart []byte) *Dict {
if len(data) == 0 {
return nil
}
if len(data) > MaxDictSize {
data = data[len(data)-MaxDictSize:]
}
var d Dict
dict := data
d.dict = dict
if cap(d.dict) < len(d.dict)+16 {
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
}
if len(dict) < MinDictSize {
return nil
}
// Find the longest match possible, last entry if multiple.
for s := len(searchStart); s > 4; s-- {
if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 {
d.repeat = idx
break
}
}
return &d
}
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func (d *Dict) Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
dstP := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:dstP]
}
if len(src) < minNonLiteralBlockSize {
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
n := encodeBlockDictGo(dst[dstP:], src, d)
if n > 0 {
dstP += n
return dst[:dstP]
}
// Not compressible
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// EncodeBetter compresses better than Encode but typically with a
// 10-40% speed decrease on both compression and decompression.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func (d *Dict) EncodeBetter(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
dstP := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:dstP]
}
if len(src) < minNonLiteralBlockSize {
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
n := encodeBlockBetterDict(dst[dstP:], src, d)
if n > 0 {
dstP += n
return dst[:dstP]
}
// Not compressible
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
// EncodeBest returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// EncodeBest compresses as good as reasonably possible but with a
// big speed decrease.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func (d *Dict) EncodeBest(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
dstP := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:dstP]
}
if len(src) < minNonLiteralBlockSize {
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
n := encodeBlockBest(dst[dstP:], src, d)
if n > 0 {
dstP += n
return dst[:dstP]
}
// Not compressible
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
func (d *Dict) Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
return nil, err
}
if dLen <= cap(dst) {
dst = dst[:dLen]
} else {
dst = make([]byte, dLen)
}
if s2DecodeDict(dst, src[s:], d) != 0 {
return nil, ErrCorrupt
}
return dst, nil
}
func (d *Dict) initFast() {
d.fast.Do(func() {
const (
tableBits = 14
maxTableSize = 1 << tableBits
)
var table [maxTableSize]uint16
// We stop so any entry of length 8 can always be read.
for i := 0; i < len(d.dict)-8-2; i += 3 {
x0 := load64(d.dict, i)
h0 := hash6(x0, tableBits)
h1 := hash6(x0>>8, tableBits)
h2 := hash6(x0>>16, tableBits)
table[h0] = uint16(i)
table[h1] = uint16(i + 1)
table[h2] = uint16(i + 2)
}
d.fastTable = &table
})
}
func (d *Dict) initBetter() {
d.better.Do(func() {
const (
// Long hash matches.
lTableBits = 17
maxLTableSize = 1 << lTableBits
// Short hash matches.
sTableBits = 14
maxSTableSize = 1 << sTableBits
)
var lTable [maxLTableSize]uint16
var sTable [maxSTableSize]uint16
// We stop so any entry of length 8 can always be read.
for i := 0; i < len(d.dict)-8; i++ {
cv := load64(d.dict, i)
lTable[hash7(cv, lTableBits)] = uint16(i)
sTable[hash4(cv, sTableBits)] = uint16(i)
}
d.betterTableShort = &sTable
d.betterTableLong = &lTable
})
}
func (d *Dict) initBest() {
d.best.Do(func() {
const (
// Long hash matches.
lTableBits = 19
maxLTableSize = 1 << lTableBits
// Short hash matches.
sTableBits = 16
maxSTableSize = 1 << sTableBits
)
var lTable [maxLTableSize]uint32
var sTable [maxSTableSize]uint32
// We stop so any entry of length 8 can always be read.
for i := 0; i < len(d.dict)-8; i++ {
cv := load64(d.dict, i)
hashL := hash8(cv, lTableBits)
hashS := hash4(cv, sTableBits)
candidateL := lTable[hashL]
candidateS := sTable[hashS]
lTable[hashL] = uint32(i) | candidateL<<16
sTable[hashS] = uint32(i) | candidateS<<16
}
d.bestTableShort = &sTable
d.bestTableLong = &lTable
})
}

View file

@ -58,6 +58,32 @@ func Encode(dst, src []byte) []byte {
return dst[:d]
}
// EstimateBlockSize will perform a very fast compression
// without outputting the result and return the compressed output size.
// The function returns -1 if no improvement could be achieved.
// Using actual compression will most often produce better compression than the estimate.
func EstimateBlockSize(src []byte) (d int) {
if len(src) < 6 || int64(len(src)) > 0xffffffff {
return -1
}
if len(src) <= 1024 {
d = calcBlockSizeSmall(src)
} else {
d = calcBlockSize(src)
}
if d == 0 {
return -1
}
// Size of the varint encoded block size.
d += (bits.Len64(uint64(len(src))) + 7) / 7
if d >= len(src) {
return -1
}
return d
}
// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
@ -132,7 +158,7 @@ func EncodeBest(dst, src []byte) []byte {
d += emitLiteral(dst[d:], src)
return dst[:d]
}
n := encodeBlockBest(dst[d:], src)
n := encodeBlockBest(dst[d:], src, nil)
if n > 0 {
d += n
return dst[:d]
@ -408,6 +434,7 @@ type Writer struct {
randSrc io.Reader
writerWg sync.WaitGroup
index Index
customEnc func(dst, src []byte) int
// wroteStreamHeader is whether we have written the stream header.
wroteStreamHeader bool
@ -773,6 +800,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
}
func (w *Writer) encodeBlock(obuf, uncompressed []byte) int {
if w.customEnc != nil {
return w.customEnc(obuf, uncompressed)
}
if w.snappy {
switch w.level {
case levelFast:
@ -790,7 +820,7 @@ func (w *Writer) encodeBlock(obuf, uncompressed []byte) int {
case levelBetter:
return encodeBlockBetter(obuf, uncompressed)
case levelBest:
return encodeBlockBest(obuf, uncompressed)
return encodeBlockBest(obuf, uncompressed, nil)
}
return 0
}
@ -1339,3 +1369,15 @@ func WriterFlushOnWrite() WriterOption {
return nil
}
}
// WriterCustomEncoder allows to override the encoder for blocks on the stream.
// The function must compress 'src' into 'dst' and return the bytes used in dst as an integer.
// Block size (initial varint) should not be added by the encoder.
// Returning value 0 indicates the block could not be compressed.
// The function should expect to be called concurrently.
func WriterCustomEncoder(fn func(dst, src []byte) int) WriterOption {
return func(w *Writer) error {
w.customEnc = fn
return nil
}
}

View file

@ -8,6 +8,7 @@ package s2
import (
"bytes"
"encoding/binary"
"fmt"
"math/bits"
)
@ -455,3 +456,594 @@ emitRemainder:
}
return d
}
// encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
// Initialize the hash table.
const (
tableBits = 14
maxTableSize = 1 << tableBits
maxAhead = 8 // maximum bytes ahead without checking sLimit
debug = false
)
dict.initFast()
var table [maxTableSize]uint32
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
if sLimit > MaxDictSrcOffset-maxAhead {
sLimit = MaxDictSrcOffset - maxAhead
}
// Bail if we can't compress to at least this.
dstLimit := len(src) - len(src)>>5 - 5
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form can start with a dict entry (copy or repeat).
s := 0
// Convert dict repeat to offset
repeat := len(dict.dict) - dict.repeat
cv := load64(src, 0)
// While in dict
searchDict:
for {
// Next src position to check
nextS := s + (s-nextEmit)>>6 + 4
hash0 := hash6(cv, tableBits)
hash1 := hash6(cv>>8, tableBits)
if nextS > sLimit {
if debug {
fmt.Println("slimit reached", s, nextS)
}
break searchDict
}
candidateDict := int(dict.fastTable[hash0])
candidateDict2 := int(dict.fastTable[hash1])
candidate2 := int(table[hash1])
candidate := int(table[hash0])
table[hash0] = uint32(s)
table[hash1] = uint32(s + 1)
hash2 := hash6(cv>>16, tableBits)
// Check repeat at offset checkRep.
const checkRep = 1
if repeat > s {
candidate := len(dict.dict) - repeat + s
if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
// Extend back
base := s
for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteral(dst[d:], src[nextEmit:base])
if debug && nextEmit != base {
fmt.Println("emitted ", base-nextEmit, "literals")
}
s += 4
candidate += 4
for candidate < len(dict.dict)-8 && s <= len(src)-8 {
if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitRepeat(dst[d:], repeat, s-base)
if debug {
fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
}
nextEmit = s
if s >= sLimit {
break searchDict
}
cv = load64(src, s)
continue
}
} else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteral(dst[d:], src[nextEmit:base])
if debug && nextEmit != base {
fmt.Println("emitted ", base-nextEmit, "literals")
}
// Extend forward
candidate := s - repeat + 4 + checkRep
s += 4 + checkRep
for s <= sLimit {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
if debug {
// Validate match.
if s <= candidate {
panic("s <= candidate")
}
a := src[base:s]
b := src[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
if nextEmit > 0 {
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
d += emitRepeat(dst[d:], repeat, s-base)
} else {
// First match, cannot be repeat.
d += emitCopy(dst[d:], repeat, s-base)
}
nextEmit = s
if s >= sLimit {
break searchDict
}
if debug {
fmt.Println("emitted reg repeat", s-base, "s:", s)
}
cv = load64(src, s)
continue searchDict
}
if s == 0 {
cv = load64(src, nextS)
s = nextS
continue searchDict
}
// Start with table. These matches will always be closer.
if uint32(cv) == load32(src, candidate) {
goto emitMatch
}
candidate = int(table[hash2])
if uint32(cv>>8) == load32(src, candidate2) {
table[hash2] = uint32(s + 2)
candidate = candidate2
s++
goto emitMatch
}
// Check dict. Dicts have longer offsets, so we want longer matches.
if cv == load64(dict.dict, candidateDict) {
table[hash2] = uint32(s + 2)
goto emitDict
}
candidateDict = int(dict.fastTable[hash2])
// Check if upper 7 bytes match
if candidateDict2 >= 1 {
if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
table[hash2] = uint32(s + 2)
candidateDict = candidateDict2
s++
goto emitDict
}
}
table[hash2] = uint32(s + 2)
if uint32(cv>>16) == load32(src, candidate) {
s += 2
goto emitMatch
}
if candidateDict >= 2 {
// Check if upper 6 bytes match
if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
s += 2
goto emitDict
}
}
cv = load64(src, nextS)
s = nextS
continue searchDict
emitDict:
{
if debug {
if load32(dict.dict, candidateDict) != load32(src, s) {
panic("dict emit mismatch")
}
}
// Extend backwards.
// The top bytes will be rechecked to get the full match.
for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
candidateDict--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteral(dst[d:], src[nextEmit:s])
if debug && nextEmit != s {
fmt.Println("emitted ", s-nextEmit, "literals")
}
{
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
repeat = s + (len(dict.dict)) - candidateDict
// Extend the 4-byte match as long as possible.
s += 4
candidateDict += 4
for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidateDict += 8
}
// Matches longer than 64 are split.
if s <= sLimit || s-base < 8 {
d += emitCopy(dst[d:], repeat, s-base)
} else {
// Split to ensure we don't start a copy within next block
d += emitCopy(dst[d:], repeat, 4)
d += emitRepeat(dst[d:], repeat, s-base-4)
}
if false {
// Validate match.
if s <= candidate {
panic("s <= candidate")
}
a := src[base:s]
b := dict.dict[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
if debug {
fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
}
nextEmit = s
if s >= sLimit {
break searchDict
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Index and continue loop to try new candidate.
x := load64(src, s-2)
m2Hash := hash6(x, tableBits)
currHash := hash6(x>>8, tableBits)
candidate = int(table[currHash])
table[m2Hash] = uint32(s - 2)
table[currHash] = uint32(s - 1)
cv = load64(src, s)
}
continue
}
emitMatch:
// Extend backwards.
// The top bytes will be rechecked to get the full match.
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
candidate--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteral(dst[d:], src[nextEmit:s])
if debug && nextEmit != s {
fmt.Println("emitted ", s-nextEmit, "literals")
}
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
repeat = base - candidate
// Extend the 4-byte match as long as possible.
s += 4
candidate += 4
for s <= len(src)-8 {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopy(dst[d:], repeat, s-base)
if debug {
// Validate match.
if s <= candidate {
panic("s <= candidate")
}
a := src[base:s]
b := src[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
if debug {
fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
}
nextEmit = s
if s >= sLimit {
break searchDict
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Check for an immediate match, otherwise start search at s+1
x := load64(src, s-2)
m2Hash := hash6(x, tableBits)
currHash := hash6(x>>16, tableBits)
candidate = int(table[currHash])
table[m2Hash] = uint32(s - 2)
table[currHash] = uint32(s)
if debug && s == candidate {
panic("s == candidate")
}
if uint32(x>>16) != load32(src, candidate) {
cv = load64(src, s+1)
s++
break
}
}
}
// Search without dict:
if repeat > s {
repeat = 0
}
// No more dict
sLimit = len(src) - inputMargin
if s >= sLimit {
goto emitRemainder
}
if debug {
fmt.Println("non-dict matching at", s, "repeat:", repeat)
}
cv = load64(src, s)
if debug {
fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
}
for {
candidate := 0
for {
// Next src position to check
nextS := s + (s-nextEmit)>>6 + 4
if nextS > sLimit {
goto emitRemainder
}
hash0 := hash6(cv, tableBits)
hash1 := hash6(cv>>8, tableBits)
candidate = int(table[hash0])
candidate2 := int(table[hash1])
table[hash0] = uint32(s)
table[hash1] = uint32(s + 1)
hash2 := hash6(cv>>16, tableBits)
// Check repeat at offset checkRep.
const checkRep = 1
if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteral(dst[d:], src[nextEmit:base])
if debug && nextEmit != base {
fmt.Println("emitted ", base-nextEmit, "literals")
}
// Extend forward
candidate := s - repeat + 4 + checkRep
s += 4 + checkRep
for s <= sLimit {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
if debug {
// Validate match.
if s <= candidate {
panic("s <= candidate")
}
a := src[base:s]
b := src[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
if nextEmit > 0 {
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
d += emitRepeat(dst[d:], repeat, s-base)
} else {
// First match, cannot be repeat.
d += emitCopy(dst[d:], repeat, s-base)
}
if debug {
fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
if uint32(cv) == load32(src, candidate) {
break
}
candidate = int(table[hash2])
if uint32(cv>>8) == load32(src, candidate2) {
table[hash2] = uint32(s + 2)
candidate = candidate2
s++
break
}
table[hash2] = uint32(s + 2)
if uint32(cv>>16) == load32(src, candidate) {
s += 2
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards.
// The top bytes will be rechecked to get the full match.
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
candidate--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteral(dst[d:], src[nextEmit:s])
if debug && nextEmit != s {
fmt.Println("emitted ", s-nextEmit, "literals")
}
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
repeat = base - candidate
// Extend the 4-byte match as long as possible.
s += 4
candidate += 4
for s <= len(src)-8 {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopy(dst[d:], repeat, s-base)
if debug {
// Validate match.
if s <= candidate {
panic("s <= candidate")
}
a := src[base:s]
b := src[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
if debug {
fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Check for an immediate match, otherwise start search at s+1
x := load64(src, s-2)
m2Hash := hash6(x, tableBits)
currHash := hash6(x>>16, tableBits)
candidate = int(table[currHash])
table[m2Hash] = uint32(s - 2)
table[currHash] = uint32(s)
if debug && s == candidate {
panic("s == candidate")
}
if uint32(x>>16) != load32(src, candidate) {
cv = load64(src, s+1)
s++
break
}
}
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
d += emitLiteral(dst[d:], src[nextEmit:])
if debug && nextEmit != s {
fmt.Println("emitted ", len(src)-nextEmit, "literals")
}
}
return d
}

View file

@ -3,6 +3,8 @@
package s2
const hasAmd64Asm = true
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.

View file

@ -7,6 +7,7 @@ package s2
import (
"fmt"
"math"
"math/bits"
)
@ -18,7 +19,7 @@ import (
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBest(dst, src []byte) (d int) {
func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
// Initialize the hash tables.
const (
// Long hash matches.
@ -30,6 +31,8 @@ func encodeBlockBest(dst, src []byte) (d int) {
maxSTableSize = 1 << sTableBits
inputMargin = 8 + 2
debug = false
)
// sLimit is when to stop looking for offset/length copies. The inputMargin
@ -39,6 +42,10 @@ func encodeBlockBest(dst, src []byte) (d int) {
if len(src) < minNonLiteralBlockSize {
return 0
}
sLimitDict := len(src) - inputMargin
if sLimitDict > MaxDictSrcOffset-inputMargin {
sLimitDict = MaxDictSrcOffset - inputMargin
}
var lTable [maxLTableSize]uint64
var sTable [maxSTableSize]uint64
@ -52,10 +59,15 @@ func encodeBlockBest(dst, src []byte) (d int) {
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
repeat := 1
if dict != nil {
dict.initBest()
s = 0
repeat = len(dict.dict) - dict.repeat
}
cv := load64(src, s)
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
repeat := 1
const lowbitMask = 0xffffffff
getCur := func(x uint64) int {
return int(x & lowbitMask)
@ -71,7 +83,7 @@ func encodeBlockBest(dst, src []byte) (d int) {
s int
length int
score int
rep bool
rep, dict bool
}
var best match
for {
@ -85,6 +97,12 @@ func encodeBlockBest(dst, src []byte) (d int) {
if nextS > sLimit {
goto emitRemainder
}
if dict != nil && s >= MaxDictSrcOffset {
dict = nil
if repeat > s {
repeat = math.MinInt32
}
}
hashL := hash8(cv, lTableBits)
hashS := hash4(cv, sTableBits)
candidateL := lTable[hashL]
@ -114,7 +132,15 @@ func encodeBlockBest(dst, src []byte) (d int) {
}
m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
s += 4
for s <= sLimit {
for s < len(src) {
if len(src)-s < 8 {
if src[s] == src[m.length] {
m.length++
s++
continue
}
break
}
if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
m.length += bits.TrailingZeros64(diff) >> 3
break
@ -130,6 +156,62 @@ func encodeBlockBest(dst, src []byte) (d int) {
}
return m
}
matchDict := func(candidate, s int, first uint32, rep bool) match {
// Calculate offset as if in continuous array with s
offset := -len(dict.dict) + candidate
if best.length != 0 && best.s-best.offset == s-offset && !rep {
// Don't retest if we have the same offset.
return match{offset: offset, s: s}
}
if load32(dict.dict, candidate) != first {
return match{offset: offset, s: s}
}
m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
s += 4
if !rep {
for s < sLimitDict && m.length < len(dict.dict) {
if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
if src[s] == dict.dict[m.length] {
m.length++
s++
continue
}
break
}
if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
m.length += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
m.length += 8
}
} else {
for s < len(src) && m.length < len(dict.dict) {
if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
if src[s] == dict.dict[m.length] {
m.length++
s++
continue
}
break
}
if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
m.length += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
m.length += 8
}
}
m.length -= candidate
m.score = score(m)
if m.score <= -m.s {
// Eliminate if no savings, we might find a better one.
m.length = 0
}
return m
}
bestOf := func(a, b match) match {
if b.length == 0 {
@ -146,35 +228,82 @@ func encodeBlockBest(dst, src []byte) (d int) {
return b
}
if s > 0 {
best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
}
if dict != nil {
candidateL := dict.bestTableLong[hashL]
candidateS := dict.bestTableShort[hashS]
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
}
{
if (dict == nil || repeat <= s) && repeat > 0 {
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
} else if s-repeat < -4 && dict != nil {
candidate := len(dict.dict) - (repeat - s)
best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
candidate++
best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
}
if best.length > 0 {
hashS := hash4(cv>>8, sTableBits)
// s+1
nextShort := sTable[hash4(cv>>8, sTableBits)]
nextShort := sTable[hashS]
s := s + 1
cv := load64(src, s)
nextLong := lTable[hash8(cv, lTableBits)]
hashL := hash8(cv, lTableBits)
nextLong := lTable[hashL]
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
// Repeat at + 2
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
// Dict at + 1
if dict != nil {
candidateL := dict.bestTableLong[hashL]
candidateS := dict.bestTableShort[hashS]
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
}
// s+2
if true {
nextShort = sTable[hash4(cv>>8, sTableBits)]
hashS := hash4(cv>>8, sTableBits)
nextShort = sTable[hashS]
s++
cv = load64(src, s)
nextLong = lTable[hash8(cv, lTableBits)]
hashL := hash8(cv, lTableBits)
nextLong = lTable[hashL]
if (dict == nil || repeat <= s) && repeat > 0 {
// Repeat at + 2
best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
} else if repeat-s > 4 && dict != nil {
candidate := len(dict.dict) - (repeat - s)
best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
}
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
// Dict at +2
// Very small gain
if dict != nil {
candidateL := dict.bestTableLong[hashL]
candidateS := dict.bestTableShort[hashS]
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
}
}
// Search for a match at best match end, see if that is better.
// Allow some bytes at the beginning to mismatch.
@ -227,7 +356,7 @@ func encodeBlockBest(dst, src []byte) (d int) {
// Extend backwards, not needed for repeats...
s = best.s
if !best.rep {
if !best.rep && !best.dict {
for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
best.offset--
best.length++
@ -244,7 +373,6 @@ func encodeBlockBest(dst, src []byte) (d int) {
base := s
offset := s - best.offset
s += best.length
if offset > 65535 && s-base <= 5 && !best.rep {
@ -256,16 +384,28 @@ func encodeBlockBest(dst, src []byte) (d int) {
cv = load64(src, s)
continue
}
if debug && nextEmit != base {
fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
}
d += emitLiteral(dst[d:], src[nextEmit:base])
if best.rep {
if nextEmit > 0 {
if nextEmit > 0 || best.dict {
if debug {
fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
}
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
d += emitRepeat(dst[d:], offset, best.length)
} else {
// First match, cannot be repeat.
// First match without dict cannot be a repeat.
if debug {
fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
}
d += emitCopy(dst[d:], offset, best.length)
}
} else {
if debug {
fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
}
d += emitCopy(dst[d:], offset, best.length)
}
repeat = offset
@ -296,6 +436,9 @@ emitRemainder:
if d+len(src)-nextEmit > dstLimit {
return 0
}
if debug && nextEmit != s {
fmt.Println("emitted ", len(src)-nextEmit, "literals")
}
d += emitLiteral(dst[d:], src[nextEmit:])
}
return d
@ -642,7 +785,6 @@ func emitRepeatSize(offset, length int) int {
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
length = maxRepeat - 4
}
if left > 0 {
return 5 + emitRepeatSize(offset, left)

View file

@ -6,6 +6,8 @@
package s2
import (
"bytes"
"fmt"
"math/bits"
)
@ -476,3 +478,623 @@ emitRemainder:
}
return d
}
// encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterDict(dst, src []byte, dict *Dict) (d int) {
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
// Initialize the hash tables.
const (
// Long hash matches.
lTableBits = 17
maxLTableSize = 1 << lTableBits
// Short hash matches.
sTableBits = 14
maxSTableSize = 1 << sTableBits
maxAhead = 8 // maximum bytes ahead without checking sLimit
debug = false
)
sLimit := len(src) - inputMargin
if sLimit > MaxDictSrcOffset-maxAhead {
sLimit = MaxDictSrcOffset - maxAhead
}
if len(src) < minNonLiteralBlockSize {
return 0
}
dict.initBetter()
var lTable [maxLTableSize]uint32
var sTable [maxSTableSize]uint32
// Bail if we can't compress to at least this.
dstLimit := len(src) - len(src)>>5 - 6
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 0
cv := load64(src, s)
// We initialize repeat to 0, so we never match on first attempt
repeat := len(dict.dict) - dict.repeat
// While in dict
searchDict:
for {
candidateL := 0
nextS := 0
for {
// Next src position to check
nextS = s + (s-nextEmit)>>7 + 1
if nextS > sLimit {
break searchDict
}
hashL := hash7(cv, lTableBits)
hashS := hash4(cv, sTableBits)
candidateL = int(lTable[hashL])
candidateS := int(sTable[hashS])
dictL := int(dict.betterTableLong[hashL])
dictS := int(dict.betterTableShort[hashS])
lTable[hashL] = uint32(s)
sTable[hashS] = uint32(s)
valLong := load64(src, candidateL)
valShort := load64(src, candidateS)
// If long matches at least 8 bytes, use that.
if s != 0 {
if cv == valLong {
goto emitMatch
}
if cv == valShort {
candidateL = candidateS
goto emitMatch
}
}
// Check dict repeat.
if repeat >= s+4 {
candidate := len(dict.dict) - repeat + s
if candidate > 0 && uint32(cv) == load32(dict.dict, candidate) {
// Extend back
base := s
for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteral(dst[d:], src[nextEmit:base])
if debug && nextEmit != base {
fmt.Println("emitted ", base-nextEmit, "literals")
}
s += 4
candidate += 4
for candidate < len(dict.dict)-8 && s <= len(src)-8 {
if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitRepeat(dst[d:], repeat, s-base)
if debug {
fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
}
nextEmit = s
if s >= sLimit {
break searchDict
}
cv = load64(src, s)
// Index in-between
index0 := base + 1
index1 := s - 2
cv = load64(src, s)
for index0 < index1 {
cv0 := load64(src, index0)
cv1 := load64(src, index1)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
index0 += 2
index1 -= 2
}
continue
}
}
// Don't try to find match at s==0
if s == 0 {
cv = load64(src, nextS)
s = nextS
continue
}
// Long likely matches 7, so take that.
if uint32(cv) == uint32(valLong) {
goto emitMatch
}
// Long dict...
if uint32(cv) == load32(dict.dict, dictL) {
candidateL = dictL
goto emitDict
}
// Check our short candidate
if uint32(cv) == uint32(valShort) {
// Try a long candidate at s+1
hashL = hash7(cv>>8, lTableBits)
candidateL = int(lTable[hashL])
lTable[hashL] = uint32(s + 1)
if uint32(cv>>8) == load32(src, candidateL) {
s++
goto emitMatch
}
// Use our short candidate.
candidateL = candidateS
goto emitMatch
}
if uint32(cv) == load32(dict.dict, dictS) {
// Try a long candidate at s+1
hashL = hash7(cv>>8, lTableBits)
candidateL = int(lTable[hashL])
lTable[hashL] = uint32(s + 1)
if uint32(cv>>8) == load32(src, candidateL) {
s++
goto emitMatch
}
candidateL = dictS
goto emitDict
}
cv = load64(src, nextS)
s = nextS
}
emitDict:
{
if debug {
if load32(dict.dict, candidateL) != load32(src, s) {
panic("dict emit mismatch")
}
}
// Extend backwards.
// The top bytes will be rechecked to get the full match.
for candidateL > 0 && s > nextEmit && dict.dict[candidateL-1] == src[s-1] {
candidateL--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteral(dst[d:], src[nextEmit:s])
if debug && nextEmit != s {
fmt.Println("emitted ", s-nextEmit, "literals")
}
{
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
offset := s + (len(dict.dict)) - candidateL
// Extend the 4-byte match as long as possible.
s += 4
candidateL += 4
for s <= len(src)-8 && len(dict.dict)-candidateL >= 8 {
if diff := load64(src, s) ^ load64(dict.dict, candidateL); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidateL += 8
}
if repeat == offset {
if debug {
fmt.Println("emitted dict repeat, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL)
}
d += emitRepeat(dst[d:], offset, s-base)
} else {
if debug {
fmt.Println("emitted dict copy, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL)
}
// Matches longer than 64 are split.
if s <= sLimit || s-base < 8 {
d += emitCopy(dst[d:], offset, s-base)
} else {
// Split to ensure we don't start a copy within next block.
d += emitCopy(dst[d:], offset, 4)
d += emitRepeat(dst[d:], offset, s-base-4)
}
repeat = offset
}
if false {
// Validate match.
if s <= candidateL {
panic("s <= candidate")
}
a := src[base:s]
b := dict.dict[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
nextEmit = s
if s >= sLimit {
break searchDict
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Index short & long
index0 := base + 1
index1 := s - 2
cv0 := load64(src, index0)
cv1 := load64(src, index1)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
index0 += 1
index1 -= 1
cv = load64(src, s)
// index every second long in between.
for index0 < index1 {
lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
index0 += 2
index1 -= 2
}
}
continue
}
emitMatch:
// Extend backwards
for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
candidateL--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
base := s
offset := base - candidateL
// Extend the 4-byte match as long as possible.
s += 4
candidateL += 4
for s < len(src) {
if len(src)-s < 8 {
if src[s] == src[candidateL] {
s++
candidateL++
continue
}
break
}
if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidateL += 8
}
if offset > 65535 && s-base <= 5 && repeat != offset {
// Bail if the match is equal or worse to the encoding.
s = nextS + 1
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
d += emitLiteral(dst[d:], src[nextEmit:base])
if debug && nextEmit != s {
fmt.Println("emitted ", s-nextEmit, "literals")
}
if repeat == offset {
if debug {
fmt.Println("emitted match repeat, length", s-base, "offset:", offset, "s:", s)
}
d += emitRepeat(dst[d:], offset, s-base)
} else {
if debug {
fmt.Println("emitted match copy, length", s-base, "offset:", offset, "s:", s)
}
d += emitCopy(dst[d:], offset, s-base)
repeat = offset
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Index short & long
index0 := base + 1
index1 := s - 2
cv0 := load64(src, index0)
cv1 := load64(src, index1)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
index0 += 1
index1 -= 1
cv = load64(src, s)
// index every second long in between.
for index0 < index1 {
lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
index0 += 2
index1 -= 2
}
}
// Search without dict:
if repeat > s {
repeat = 0
}
// No more dict
sLimit = len(src) - inputMargin
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
if debug {
fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
}
for {
candidateL := 0
nextS := 0
for {
// Next src position to check
nextS = s + (s-nextEmit)>>7 + 1
if nextS > sLimit {
goto emitRemainder
}
hashL := hash7(cv, lTableBits)
hashS := hash4(cv, sTableBits)
candidateL = int(lTable[hashL])
candidateS := int(sTable[hashS])
lTable[hashL] = uint32(s)
sTable[hashS] = uint32(s)
valLong := load64(src, candidateL)
valShort := load64(src, candidateS)
// If long matches at least 8 bytes, use that.
if cv == valLong {
break
}
if cv == valShort {
candidateL = candidateS
break
}
// Check repeat at offset checkRep.
const checkRep = 1
// Minimum length of a repeat. Tested with various values.
// While 4-5 offers improvements in some, 6 reduces
// regressions significantly.
const wantRepeatBytes = 6
const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteral(dst[d:], src[nextEmit:base])
// Extend forward
candidate := s - repeat + wantRepeatBytes + checkRep
s += wantRepeatBytes + checkRep
for s < len(src) {
if len(src)-s < 8 {
if src[s] == src[candidate] {
s++
candidate++
continue
}
break
}
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
d += emitRepeat(dst[d:], repeat, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
// Index in-between
index0 := base + 1
index1 := s - 2
cv = load64(src, s)
for index0 < index1 {
cv0 := load64(src, index0)
cv1 := load64(src, index1)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
index0 += 2
index1 -= 2
}
cv = load64(src, s)
continue
}
// Long likely matches 7, so take that.
if uint32(cv) == uint32(valLong) {
break
}
// Check our short candidate
if uint32(cv) == uint32(valShort) {
// Try a long candidate at s+1
hashL = hash7(cv>>8, lTableBits)
candidateL = int(lTable[hashL])
lTable[hashL] = uint32(s + 1)
if uint32(cv>>8) == load32(src, candidateL) {
s++
break
}
// Use our short candidate.
candidateL = candidateS
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards
for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
candidateL--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
base := s
offset := base - candidateL
// Extend the 4-byte match as long as possible.
s += 4
candidateL += 4
for s < len(src) {
if len(src)-s < 8 {
if src[s] == src[candidateL] {
s++
candidateL++
continue
}
break
}
if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidateL += 8
}
if offset > 65535 && s-base <= 5 && repeat != offset {
// Bail if the match is equal or worse to the encoding.
s = nextS + 1
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
d += emitLiteral(dst[d:], src[nextEmit:base])
if repeat == offset {
d += emitRepeat(dst[d:], offset, s-base)
} else {
d += emitCopy(dst[d:], offset, s-base)
repeat = offset
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Index short & long
index0 := base + 1
index1 := s - 2
cv0 := load64(src, index0)
cv1 := load64(src, index1)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
index0 += 1
index1 -= 1
cv = load64(src, s)
// index every second long in between.
for index0 < index1 {
lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
index0 += 2
index1 -= 2
}
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
d += emitLiteral(dst[d:], src[nextEmit:])
}
return d
}

View file

@ -4,9 +4,12 @@
package s2
import (
"bytes"
"math/bits"
)
const hasAmd64Asm = false
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
@ -312,3 +315,405 @@ func matchLen(a []byte, b []byte) int {
}
return len(a) + checked
}
func calcBlockSize(src []byte) (d int) {
// Initialize the hash table.
const (
tableBits = 13
maxTableSize = 1 << tableBits
)
var table [maxTableSize]uint32
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// Bail if we can't compress to at least this.
dstLimit := len(src) - len(src)>>5 - 5
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
cv := load64(src, s)
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
repeat := 1
for {
candidate := 0
for {
// Next src position to check
nextS := s + (s-nextEmit)>>6 + 4
if nextS > sLimit {
goto emitRemainder
}
hash0 := hash6(cv, tableBits)
hash1 := hash6(cv>>8, tableBits)
candidate = int(table[hash0])
candidate2 := int(table[hash1])
table[hash0] = uint32(s)
table[hash1] = uint32(s + 1)
hash2 := hash6(cv>>16, tableBits)
// Check repeat at offset checkRep.
const checkRep = 1
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteralSize(src[nextEmit:base])
// Extend forward
candidate := s - repeat + 4 + checkRep
s += 4 + checkRep
for s <= sLimit {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopyNoRepeatSize(repeat, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
if uint32(cv) == load32(src, candidate) {
break
}
candidate = int(table[hash2])
if uint32(cv>>8) == load32(src, candidate2) {
table[hash2] = uint32(s + 2)
candidate = candidate2
s++
break
}
table[hash2] = uint32(s + 2)
if uint32(cv>>16) == load32(src, candidate) {
s += 2
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
candidate--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteralSize(src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
repeat = base - candidate
// Extend the 4-byte match as long as possible.
s += 4
candidate += 4
for s <= len(src)-8 {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopyNoRepeatSize(repeat, s-base)
if false {
// Validate match.
a := src[base:s]
b := src[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Check for an immediate match, otherwise start search at s+1
x := load64(src, s-2)
m2Hash := hash6(x, tableBits)
currHash := hash6(x>>16, tableBits)
candidate = int(table[currHash])
table[m2Hash] = uint32(s - 2)
table[currHash] = uint32(s)
if uint32(x>>16) != load32(src, candidate) {
cv = load64(src, s+1)
s++
break
}
}
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
d += emitLiteralSize(src[nextEmit:])
}
return d
}
func calcBlockSizeSmall(src []byte) (d int) {
// Initialize the hash table.
const (
tableBits = 9
maxTableSize = 1 << tableBits
)
var table [maxTableSize]uint32
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// Bail if we can't compress to at least this.
dstLimit := len(src) - len(src)>>5 - 5
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
cv := load64(src, s)
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
repeat := 1
for {
candidate := 0
for {
// Next src position to check
nextS := s + (s-nextEmit)>>6 + 4
if nextS > sLimit {
goto emitRemainder
}
hash0 := hash6(cv, tableBits)
hash1 := hash6(cv>>8, tableBits)
candidate = int(table[hash0])
candidate2 := int(table[hash1])
table[hash0] = uint32(s)
table[hash1] = uint32(s + 1)
hash2 := hash6(cv>>16, tableBits)
// Check repeat at offset checkRep.
const checkRep = 1
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteralSize(src[nextEmit:base])
// Extend forward
candidate := s - repeat + 4 + checkRep
s += 4 + checkRep
for s <= sLimit {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopyNoRepeatSize(repeat, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
if uint32(cv) == load32(src, candidate) {
break
}
candidate = int(table[hash2])
if uint32(cv>>8) == load32(src, candidate2) {
table[hash2] = uint32(s + 2)
candidate = candidate2
s++
break
}
table[hash2] = uint32(s + 2)
if uint32(cv>>16) == load32(src, candidate) {
s += 2
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
candidate--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteralSize(src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
repeat = base - candidate
// Extend the 4-byte match as long as possible.
s += 4
candidate += 4
for s <= len(src)-8 {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopyNoRepeatSize(repeat, s-base)
if false {
// Validate match.
a := src[base:s]
b := src[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Check for an immediate match, otherwise start search at s+1
x := load64(src, s-2)
m2Hash := hash6(x, tableBits)
currHash := hash6(x>>16, tableBits)
candidate = int(table[currHash])
table[m2Hash] = uint32(s - 2)
table[currHash] = uint32(s)
if uint32(x>>16) != load32(src, candidate) {
cv = load64(src, s+1)
s++
break
}
}
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
d += emitLiteralSize(src[nextEmit:])
}
return d
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32
func emitLiteralSize(lit []byte) int {
if len(lit) == 0 {
return 0
}
switch {
case len(lit) <= 60:
return len(lit) + 1
case len(lit) <= 1<<8:
return len(lit) + 2
case len(lit) <= 1<<16:
return len(lit) + 3
case len(lit) <= 1<<24:
return len(lit) + 4
default:
return len(lit) + 5
}
}
func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4BlockAsm should be unreachable")
}
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4BlockSnappyAsm should be unreachable")
}

View file

@ -146,6 +146,20 @@ func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
//go:noescape
func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
// calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func calcBlockSize(src []byte) int
// calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 1024 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func calcBlockSizeSmall(src []byte) int
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
@ -192,3 +206,13 @@ func emitCopyNoRepeat(dst []byte, offset int, length int) int
//
//go:noescape
func matchLen(a []byte, b []byte) int
// cvtLZ4Block converts an LZ4 block to S2
//
//go:noescape
func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// cvtLZ4Block converts an LZ4 block to S2
//
//go:noescape
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)

File diff suppressed because it is too large Load diff

585
vendor/github.com/klauspost/compress/s2/lz4convert.go generated vendored Normal file
View file

@ -0,0 +1,585 @@
// Copyright (c) 2022 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"encoding/binary"
"errors"
"fmt"
)
// LZ4Converter provides conversion from LZ4 blocks as defined here:
// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
type LZ4Converter struct {
}
// ErrDstTooSmall is returned when provided destination is too small.
var ErrDstTooSmall = errors.New("s2: destination too small")
// ConvertBlock will convert an LZ4 block and append it as an S2
// block without block length to dst.
// The uncompressed size is returned as well.
// dst must have capacity to contain the entire compressed block.
func (l *LZ4Converter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
if len(src) == 0 {
return dst, 0, nil
}
const debug = false
const inline = true
const lz4MinMatch = 4
s, d := 0, len(dst)
dst = dst[:cap(dst)]
if !debug && hasAmd64Asm {
res, sz := cvtLZ4BlockAsm(dst[d:], src)
if res < 0 {
const (
errCorrupt = -1
errDstTooSmall = -2
)
switch res {
case errCorrupt:
return nil, 0, ErrCorrupt
case errDstTooSmall:
return nil, 0, ErrDstTooSmall
default:
return nil, 0, fmt.Errorf("unexpected result: %d", res)
}
}
if d+sz > len(dst) {
return nil, 0, ErrDstTooSmall
}
return dst[:d+sz], res, nil
}
dLimit := len(dst) - 10
var lastOffset uint16
var uncompressed int
if debug {
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}
for {
if s >= len(src) {
return dst[:d], 0, ErrCorrupt
}
// Read literal info
token := src[s]
ll := int(token >> 4)
ml := int(lz4MinMatch + (token & 0xf))
// If upper nibble is 15, literal length is extended
if token >= 0xf0 {
for {
s++
if s >= len(src) {
if debug {
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
}
return dst[:d], 0, ErrCorrupt
}
val := src[s]
ll += int(val)
if val != 255 {
break
}
}
}
// Skip past token
if s+ll >= len(src) {
if debug {
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
}
return nil, 0, ErrCorrupt
}
s++
if ll > 0 {
if d+ll > dLimit {
return nil, 0, ErrDstTooSmall
}
if debug {
fmt.Printf("emit %d literals\n", ll)
}
d += emitLiteralGo(dst[d:], src[s:s+ll])
s += ll
uncompressed += ll
}
// Check if we are done...
if s == len(src) && ml == lz4MinMatch {
break
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return nil, 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
s += 2
if offset == 0 {
if debug {
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
}
return nil, 0, ErrCorrupt
}
if int(offset) > uncompressed {
if debug {
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
}
return nil, 0, ErrCorrupt
}
if ml == lz4MinMatch+15 {
for {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
s++
ml += int(val)
if val != 255 {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
break
}
}
}
if offset == lastOffset {
if debug {
fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
}
if !inline {
d += emitRepeat16(dst[d:], offset, ml)
} else {
length := ml
dst := dst[d:]
for len(dst) > 5 {
// Repeat offset, make length cheaper
length -= 4
if length <= 4 {
dst[0] = uint8(length)<<2 | tagCopy1
dst[1] = 0
d += 2
break
}
if length < 8 && offset < 2048 {
// Encode WITH offset
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
d += 2
break
}
if length < (1<<8)+4 {
length -= 4
dst[2] = uint8(length)
dst[1] = 0
dst[0] = 5<<2 | tagCopy1
d += 3
break
}
if length < (1<<16)+(1<<8) {
length -= 1 << 8
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 6<<2 | tagCopy1
d += 4
break
}
const maxRepeat = (1 << 24) - 1
length -= 1 << 16
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
length = maxRepeat - 4
}
dst[4] = uint8(length >> 16)
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 7<<2 | tagCopy1
if left > 0 {
d += 5 + emitRepeat16(dst[5:], offset, left)
break
}
d += 5
break
}
}
} else {
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
}
if !inline {
d += emitCopy16(dst[d:], offset, ml)
} else {
length := ml
dst := dst[d:]
for len(dst) > 5 {
// Offset no more than 2 bytes.
if length > 64 {
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
d += off + emitRepeat16(dst[off:], offset, length)
break
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = uint8(length-1)<<2 | tagCopy2
d += 3
break
}
// Emit the remaining copy, encoded as 2 bytes.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
d += 2
break
}
}
lastOffset = offset
}
uncompressed += ml
if d > dLimit {
return nil, 0, ErrDstTooSmall
}
}
return dst[:d], uncompressed, nil
}
// ConvertBlockSnappy will convert an LZ4 block and append it
// as a Snappy block without block length to dst.
// The uncompressed size is returned as well.
// dst must have capacity to contain the entire compressed block.
func (l *LZ4Converter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
if len(src) == 0 {
return dst, 0, nil
}
const debug = false
const lz4MinMatch = 4
s, d := 0, len(dst)
dst = dst[:cap(dst)]
// Use assembly when possible
if !debug && hasAmd64Asm {
res, sz := cvtLZ4BlockSnappyAsm(dst[d:], src)
if res < 0 {
const (
errCorrupt = -1
errDstTooSmall = -2
)
switch res {
case errCorrupt:
return nil, 0, ErrCorrupt
case errDstTooSmall:
return nil, 0, ErrDstTooSmall
default:
return nil, 0, fmt.Errorf("unexpected result: %d", res)
}
}
if d+sz > len(dst) {
return nil, 0, ErrDstTooSmall
}
return dst[:d+sz], res, nil
}
dLimit := len(dst) - 10
var uncompressed int
if debug {
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}
for {
if s >= len(src) {
return nil, 0, ErrCorrupt
}
// Read literal info
token := src[s]
ll := int(token >> 4)
ml := int(lz4MinMatch + (token & 0xf))
// If upper nibble is 15, literal length is extended
if token >= 0xf0 {
for {
s++
if s >= len(src) {
if debug {
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
ll += int(val)
if val != 255 {
break
}
}
}
// Skip past token
if s+ll >= len(src) {
if debug {
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
}
return nil, 0, ErrCorrupt
}
s++
if ll > 0 {
if d+ll > dLimit {
return nil, 0, ErrDstTooSmall
}
if debug {
fmt.Printf("emit %d literals\n", ll)
}
d += emitLiteralGo(dst[d:], src[s:s+ll])
s += ll
uncompressed += ll
}
// Check if we are done...
if s == len(src) && ml == lz4MinMatch {
break
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return nil, 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
s += 2
if offset == 0 {
if debug {
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
}
return nil, 0, ErrCorrupt
}
if int(offset) > uncompressed {
if debug {
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
}
return nil, 0, ErrCorrupt
}
if ml == lz4MinMatch+15 {
for {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
s++
ml += int(val)
if val != 255 {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
break
}
}
}
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
}
length := ml
// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
for length > 0 {
if d >= dLimit {
return nil, 0, ErrDstTooSmall
}
// Offset no more than 2 bytes.
if length > 64 {
// Emit a length 64 copy, encoded as 3 bytes.
dst[d+2] = uint8(offset >> 8)
dst[d+1] = uint8(offset)
dst[d+0] = 63<<2 | tagCopy2
length -= 64
d += 3
continue
}
if length >= 12 || offset >= 2048 || length < 4 {
// Emit the remaining copy, encoded as 3 bytes.
dst[d+2] = uint8(offset >> 8)
dst[d+1] = uint8(offset)
dst[d+0] = uint8(length-1)<<2 | tagCopy2
d += 3
break
}
// Emit the remaining copy, encoded as 2 bytes.
dst[d+1] = uint8(offset)
dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
d += 2
break
}
uncompressed += ml
if d > dLimit {
return nil, 0, ErrDstTooSmall
}
}
return dst[:d], uncompressed, nil
}
// emitRepeat writes a repeat chunk and returns the number of bytes written.
// Length must be at least 4 and < 1<<24
func emitRepeat16(dst []byte, offset uint16, length int) int {
// Repeat offset, make length cheaper
length -= 4
if length <= 4 {
dst[0] = uint8(length)<<2 | tagCopy1
dst[1] = 0
return 2
}
if length < 8 && offset < 2048 {
// Encode WITH offset
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
return 2
}
if length < (1<<8)+4 {
length -= 4
dst[2] = uint8(length)
dst[1] = 0
dst[0] = 5<<2 | tagCopy1
return 3
}
if length < (1<<16)+(1<<8) {
length -= 1 << 8
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 6<<2 | tagCopy1
return 4
}
const maxRepeat = (1 << 24) - 1
length -= 1 << 16
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
length = maxRepeat - 4
}
dst[4] = uint8(length >> 16)
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 7<<2 | tagCopy1
if left > 0 {
return 5 + emitRepeat16(dst[5:], offset, left)
}
return 5
}
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint16
// 4 <= length && length <= math.MaxUint32
func emitCopy16(dst []byte, offset uint16, length int) int {
// Offset no more than 2 bytes.
if length > 64 {
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
return off + emitRepeat16(dst[off:], offset, length)
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = uint8(length-1)<<2 | tagCopy2
return 3
}
// Emit the remaining copy, encoded as 2 bytes.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
return 2
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32
func emitLiteralGo(dst, lit []byte) int {
if len(lit) == 0 {
return 0
}
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[1] = uint8(n)
dst[0] = 60<<2 | tagLiteral
i = 2
case n < 1<<16:
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 61<<2 | tagLiteral
i = 3
case n < 1<<24:
dst[3] = uint8(n >> 16)
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 62<<2 | tagLiteral
i = 4
default:
dst[4] = uint8(n >> 24)
dst[3] = uint8(n >> 16)
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 63<<2 | tagLiteral
i = 5
}
return i + copy(dst[i:], lit)
}

View file

@ -32,14 +32,38 @@ func (d *dict) ID() uint32 {
return d.id
}
// DictContentSize returns the dictionary content size or 0 if d is nil.
func (d *dict) DictContentSize() int {
// ContentSize returns the dictionary content size or 0 if d is nil.
func (d *dict) ContentSize() int {
if d == nil {
return 0
}
return len(d.content)
}
// Content returns the dictionary content.
func (d *dict) Content() []byte {
if d == nil {
return nil
}
return d.content
}
// Offsets returns the initial offsets.
func (d *dict) Offsets() [3]int {
if d == nil {
return [3]int{}
}
return d.offsets
}
// LitEncoder returns the literal encoder.
func (d *dict) LitEncoder() *huff0.Scratch {
if d == nil {
return nil
}
return d.litEnc
}
// Load a dictionary as described in
// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
func loadDict(b []byte) (*dict, error) {
@ -64,7 +88,7 @@ func loadDict(b []byte) (*dict, error) {
var err error
d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
if err != nil {
return nil, err
return nil, fmt.Errorf("loading literal table: %w", err)
}
d.litEnc.Reuse = huff0.ReusePolicyMust
@ -122,3 +146,16 @@ func loadDict(b []byte) (*dict, error) {
return &d, nil
}
// InspectDictionary loads a zstd dictionary and provides functions to inspect the content.
func InspectDictionary(b []byte) (interface {
ID() uint32
ContentSize() int
Content() []byte
Offsets() [3]int
LitEncoder() *huff0.Scratch
}, error) {
initPredefined()
d, err := loadDict(b)
return d, err
}

View file

@ -149,7 +149,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
if singleBlock {
e.lowMem = true
}
e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
e.lowMem = low
}

7
vendor/modules.txt vendored
View file

@ -338,11 +338,14 @@ github.com/jpillora/backoff
# github.com/json-iterator/go v1.1.12
## explicit; go 1.12
github.com/json-iterator/go
# github.com/klauspost/compress v1.15.15
## explicit; go 1.17
# github.com/klauspost/compress v1.16.0
## explicit; go 1.18
github.com/klauspost/compress
github.com/klauspost/compress/flate
github.com/klauspost/compress/fse
github.com/klauspost/compress/gzhttp
github.com/klauspost/compress/gzhttp/writer
github.com/klauspost/compress/gzhttp/writer/gzkp
github.com/klauspost/compress/gzip
github.com/klauspost/compress/huff0
github.com/klauspost/compress/internal/cpuinfo