mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
app/vmagent/remotewrite: add benchmarks for comparing the performance of standard Snappy encoder with github.com/klauspost/compress/s2 encoder
The standard Snappy encoder from github.com/golang/snappy shows quite good performance number for compressing the Prometheus remote_write proto messages according to the added benchmarks, so there is no need in switching to github.com/klauspost/compress/s2 yet.
This commit is contained in:
parent
b4410b1c63
commit
2b55d167d7
23 changed files with 25601 additions and 4 deletions
62
app/vmagent/remotewrite/pendingseries_test.go
Normal file
62
app/vmagent/remotewrite/pendingseries_test.go
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
package remotewrite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/golang/snappy"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPushWriteRequest(t *testing.T) {
|
||||||
|
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||||
|
t.Run(fmt.Sprintf("%d", rowsCount), func(t *testing.T) {
|
||||||
|
testPushWriteRequest(t, rowsCount)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testPushWriteRequest(t *testing.T, rowsCount int) {
|
||||||
|
wr := newTestWriteRequest(rowsCount, 10)
|
||||||
|
pushBlockLen := 0
|
||||||
|
pushBlock := func(block []byte) {
|
||||||
|
if pushBlockLen > 0 {
|
||||||
|
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||||
|
}
|
||||||
|
pushBlockLen = len(block)
|
||||||
|
}
|
||||||
|
pushWriteRequest(wr, pushBlock)
|
||||||
|
b := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||||
|
zb := snappy.Encode(nil, b)
|
||||||
|
maxPushBlockLen := len(zb)
|
||||||
|
minPushBlockLen := maxPushBlockLen / 2
|
||||||
|
if pushBlockLen < minPushBlockLen {
|
||||||
|
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be at least %d bytes", pushBlockLen, minPushBlockLen)
|
||||||
|
}
|
||||||
|
if pushBlockLen > maxPushBlockLen {
|
||||||
|
t.Fatalf("unexpected block len after pushWriteRequest; got %d bytes; must be smaller or equal to %d bytes", pushBlockLen, maxPushBlockLen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestWriteRequest(seriesCount, labelsCount int) *prompbmarshal.WriteRequest {
|
||||||
|
var wr prompbmarshal.WriteRequest
|
||||||
|
for i := 0; i < seriesCount; i++ {
|
||||||
|
var labels []prompbmarshal.Label
|
||||||
|
for j := 0; j < labelsCount; j++ {
|
||||||
|
labels = append(labels, prompbmarshal.Label{
|
||||||
|
Name: fmt.Sprintf("label_%d_%d", i, j),
|
||||||
|
Value: fmt.Sprintf("value_%d_%d", i, j),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
wr.Timeseries = append(wr.Timeseries, prompbmarshal.TimeSeries{
|
||||||
|
Labels: labels,
|
||||||
|
Samples: []prompbmarshal.Sample{
|
||||||
|
{
|
||||||
|
Value: float64(i),
|
||||||
|
Timestamp: 1000*int64(i),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return &wr
|
||||||
|
}
|
36
app/vmagent/remotewrite/pendingseries_timing_test.go
Normal file
36
app/vmagent/remotewrite/pendingseries_timing_test.go
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
package remotewrite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/golang/snappy"
|
||||||
|
"github.com/klauspost/compress/s2"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkCompressWriteRequestSnappy(b *testing.B) {
|
||||||
|
b.Run("snappy", func(b *testing.B) {
|
||||||
|
benchmarkCompressWriteRequest(b, snappy.Encode)
|
||||||
|
})
|
||||||
|
b.Run("s2", func(b *testing.B) {
|
||||||
|
benchmarkCompressWriteRequest(b, s2.EncodeSnappy)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkCompressWriteRequest(b *testing.B, compressFunc func(dst, src []byte) []byte) {
|
||||||
|
for _, rowsCount := range []int{1, 10, 100, 1e3, 1e4} {
|
||||||
|
b.Run(fmt.Sprintf("rows_%d", rowsCount), func(b *testing.B) {
|
||||||
|
wr := newTestWriteRequest(rowsCount, 10)
|
||||||
|
data := prompbmarshal.MarshalWriteRequest(nil, wr)
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.SetBytes(int64(rowsCount))
|
||||||
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
var zb []byte
|
||||||
|
for pb.Next() {
|
||||||
|
zb = compressFunc(zb[:cap(zb)], data)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
2
go.mod
2
go.mod
|
@ -4,7 +4,7 @@ go 1.19
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cloud.google.com/go/storage v1.26.0
|
cloud.google.com/go/storage v1.26.0
|
||||||
github.com/VictoriaMetrics/fastcache v1.10.0
|
github.com/VictoriaMetrics/fastcache v1.12.0
|
||||||
|
|
||||||
// Do not use the original github.com/valyala/fasthttp because of issues
|
// Do not use the original github.com/valyala/fasthttp because of issues
|
||||||
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
|
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -104,8 +104,8 @@ github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdko
|
||||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||||
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
|
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
|
||||||
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
|
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
|
||||||
github.com/VictoriaMetrics/fastcache v1.10.0 h1:5hDJnLsKLpnUEToub7ETuRu8RCkb40woBZAUiKonXzY=
|
github.com/VictoriaMetrics/fastcache v1.12.0 h1:vnVi/y9yKDcD9akmc4NqAoqgQhJrOwUF+j9LTgn4QDE=
|
||||||
github.com/VictoriaMetrics/fastcache v1.10.0/go.mod h1:tjiYeEfYXCqacuvYw/7UoDIeJaNxq6132xHICNP77w8=
|
github.com/VictoriaMetrics/fastcache v1.12.0/go.mod h1:tjiYeEfYXCqacuvYw/7UoDIeJaNxq6132xHICNP77w8=
|
||||||
github.com/VictoriaMetrics/fasthttp v1.1.0 h1:3crd4YWHsMwu60GUXRH6OstowiFvqrwS4a/ueoLdLL0=
|
github.com/VictoriaMetrics/fasthttp v1.1.0 h1:3crd4YWHsMwu60GUXRH6OstowiFvqrwS4a/ueoLdLL0=
|
||||||
github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR2uydjiWvoLp5ZTqQ=
|
github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR2uydjiWvoLp5ZTqQ=
|
||||||
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
|
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
|
||||||
|
|
15
vendor/github.com/klauspost/compress/s2/.gitignore
generated
vendored
Normal file
15
vendor/github.com/klauspost/compress/s2/.gitignore
generated
vendored
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
testdata/bench
|
||||||
|
|
||||||
|
# These explicitly listed benchmark data files are for an obsolete version of
|
||||||
|
# snappy_test.go.
|
||||||
|
testdata/alice29.txt
|
||||||
|
testdata/asyoulik.txt
|
||||||
|
testdata/fireworks.jpeg
|
||||||
|
testdata/geo.protodata
|
||||||
|
testdata/html
|
||||||
|
testdata/html_x_4
|
||||||
|
testdata/kppkn.gtb
|
||||||
|
testdata/lcet10.txt
|
||||||
|
testdata/paper-100k.pdf
|
||||||
|
testdata/plrabn12.txt
|
||||||
|
testdata/urls.10K
|
28
vendor/github.com/klauspost/compress/s2/LICENSE
generated
vendored
Normal file
28
vendor/github.com/klauspost/compress/s2/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
|
||||||
|
Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
965
vendor/github.com/klauspost/compress/s2/README.md
generated
vendored
Normal file
965
vendor/github.com/klauspost/compress/s2/README.md
generated
vendored
Normal file
|
@ -0,0 +1,965 @@
|
||||||
|
# S2 Compression
|
||||||
|
|
||||||
|
S2 is an extension of [Snappy](https://github.com/google/snappy).
|
||||||
|
|
||||||
|
S2 is aimed for high throughput, which is why it features concurrent compression for bigger payloads.
|
||||||
|
|
||||||
|
Decoding is compatible with Snappy compressed content, but content compressed with S2 cannot be decompressed by Snappy.
|
||||||
|
This means that S2 can seamlessly replace Snappy without converting compressed content.
|
||||||
|
|
||||||
|
S2 can produce Snappy compatible output, faster and better than Snappy.
|
||||||
|
If you want full benefit of the changes you should use s2 without Snappy compatibility.
|
||||||
|
|
||||||
|
S2 is designed to have high throughput on content that cannot be compressed.
|
||||||
|
This is important, so you don't have to worry about spending CPU cycles on already compressed data.
|
||||||
|
|
||||||
|
## Benefits over Snappy
|
||||||
|
|
||||||
|
* Better compression
|
||||||
|
* Adjustable compression (3 levels)
|
||||||
|
* Concurrent stream compression
|
||||||
|
* Faster decompression, even for Snappy compatible content
|
||||||
|
* Concurrent Snappy/S2 stream decompression
|
||||||
|
* Ability to quickly skip forward in compressed stream
|
||||||
|
* Random seeking with indexes
|
||||||
|
* Compatible with reading Snappy compressed content
|
||||||
|
* Smaller block size overhead on incompressible blocks
|
||||||
|
* Block concatenation
|
||||||
|
* Uncompressed stream mode
|
||||||
|
* Automatic stream size padding
|
||||||
|
* Snappy compatible block compression
|
||||||
|
|
||||||
|
## Drawbacks over Snappy
|
||||||
|
|
||||||
|
* Not optimized for 32 bit systems
|
||||||
|
* Streams use slightly more memory due to larger blocks and concurrency (configurable)
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
Installation: `go get -u github.com/klauspost/compress/s2`
|
||||||
|
|
||||||
|
Full package documentation:
|
||||||
|
|
||||||
|
[![godoc][1]][2]
|
||||||
|
|
||||||
|
[1]: https://godoc.org/github.com/klauspost/compress?status.svg
|
||||||
|
[2]: https://godoc.org/github.com/klauspost/compress/s2
|
||||||
|
|
||||||
|
## Compression
|
||||||
|
|
||||||
|
```Go
|
||||||
|
func EncodeStream(src io.Reader, dst io.Writer) error {
|
||||||
|
enc := s2.NewWriter(dst)
|
||||||
|
_, err := io.Copy(enc, src)
|
||||||
|
if err != nil {
|
||||||
|
enc.Close()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Blocks until compression is done.
|
||||||
|
return enc.Close()
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
You should always call `enc.Close()`, otherwise you will leak resources and your encode will be incomplete.
|
||||||
|
|
||||||
|
For the best throughput, you should attempt to reuse the `Writer` using the `Reset()` method.
|
||||||
|
|
||||||
|
The Writer in S2 is always buffered, therefore `NewBufferedWriter` in Snappy can be replaced with `NewWriter` in S2.
|
||||||
|
It is possible to flush any buffered data using the `Flush()` method.
|
||||||
|
This will block until all data sent to the encoder has been written to the output.
|
||||||
|
|
||||||
|
S2 also supports the `io.ReaderFrom` interface, which will consume all input from a reader.
|
||||||
|
|
||||||
|
As a final method to compress data, if you have a single block of data you would like to have encoded as a stream,
|
||||||
|
a slightly more efficient method is to use the `EncodeBuffer` method.
|
||||||
|
This will take ownership of the buffer until the stream is closed.
|
||||||
|
|
||||||
|
```Go
|
||||||
|
func EncodeStream(src []byte, dst io.Writer) error {
|
||||||
|
enc := s2.NewWriter(dst)
|
||||||
|
// The encoder owns the buffer until Flush or Close is called.
|
||||||
|
err := enc.EncodeBuffer(buf)
|
||||||
|
if err != nil {
|
||||||
|
enc.Close()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Blocks until compression is done.
|
||||||
|
return enc.Close()
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Each call to `EncodeBuffer` will result in discrete blocks being created without buffering,
|
||||||
|
so it should only be used a single time per stream.
|
||||||
|
If you need to write several blocks, you should use the regular io.Writer interface.
|
||||||
|
|
||||||
|
|
||||||
|
## Decompression
|
||||||
|
|
||||||
|
```Go
|
||||||
|
func DecodeStream(src io.Reader, dst io.Writer) error {
|
||||||
|
dec := s2.NewReader(src)
|
||||||
|
_, err := io.Copy(dst, dec)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Similar to the Writer, a Reader can be reused using the `Reset` method.
|
||||||
|
|
||||||
|
For the best possible throughput, there is a `EncodeBuffer(buf []byte)` function available.
|
||||||
|
However, it requires that the provided buffer isn't used after it is handed over to S2 and until the stream is flushed or closed.
|
||||||
|
|
||||||
|
For smaller data blocks, there is also a non-streaming interface: `Encode()`, `EncodeBetter()` and `Decode()`.
|
||||||
|
Do however note that these functions (similar to Snappy) does not provide validation of data,
|
||||||
|
so data corruption may be undetected. Stream encoding provides CRC checks of data.
|
||||||
|
|
||||||
|
It is possible to efficiently skip forward in a compressed stream using the `Skip()` method.
|
||||||
|
For big skips the decompressor is able to skip blocks without decompressing them.
|
||||||
|
|
||||||
|
## Single Blocks
|
||||||
|
|
||||||
|
Similar to Snappy S2 offers single block compression.
|
||||||
|
Blocks do not offer the same flexibility and safety as streams,
|
||||||
|
but may be preferable for very small payloads, less than 100K.
|
||||||
|
|
||||||
|
Using a simple `dst := s2.Encode(nil, src)` will compress `src` and return the compressed result.
|
||||||
|
It is possible to provide a destination buffer.
|
||||||
|
If the buffer has a capacity of `s2.MaxEncodedLen(len(src))` it will be used.
|
||||||
|
If not a new will be allocated.
|
||||||
|
|
||||||
|
Alternatively `EncodeBetter`/`EncodeBest` can also be used for better, but slightly slower compression.
|
||||||
|
|
||||||
|
Similarly to decompress a block you can use `dst, err := s2.Decode(nil, src)`.
|
||||||
|
Again an optional destination buffer can be supplied.
|
||||||
|
The `s2.DecodedLen(src)` can be used to get the minimum capacity needed.
|
||||||
|
If that is not satisfied a new buffer will be allocated.
|
||||||
|
|
||||||
|
Block function always operate on a single goroutine since it should only be used for small payloads.
|
||||||
|
|
||||||
|
# Commandline tools
|
||||||
|
|
||||||
|
Some very simply commandline tools are provided; `s2c` for compression and `s2d` for decompression.
|
||||||
|
|
||||||
|
Binaries can be downloaded on the [Releases Page](https://github.com/klauspost/compress/releases).
|
||||||
|
|
||||||
|
Installing then requires Go to be installed. To install them, use:
|
||||||
|
|
||||||
|
`go install github.com/klauspost/compress/s2/cmd/s2c@latest && go install github.com/klauspost/compress/s2/cmd/s2d@latest`
|
||||||
|
|
||||||
|
To build binaries to the current folder use:
|
||||||
|
|
||||||
|
`go build github.com/klauspost/compress/s2/cmd/s2c && go build github.com/klauspost/compress/s2/cmd/s2d`
|
||||||
|
|
||||||
|
|
||||||
|
## s2c
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: s2c [options] file1 file2
|
||||||
|
|
||||||
|
Compresses all files supplied as input separately.
|
||||||
|
Output files are written as 'filename.ext.s2' or 'filename.ext.snappy'.
|
||||||
|
By default output files will be overwritten.
|
||||||
|
Use - as the only file name to read from stdin and write to stdout.
|
||||||
|
|
||||||
|
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
|
||||||
|
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
|
||||||
|
|
||||||
|
File names beginning with 'http://' and 'https://' will be downloaded and compressed.
|
||||||
|
Only http response code 200 is accepted.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-bench int
|
||||||
|
Run benchmark n times. No output will be written
|
||||||
|
-blocksize string
|
||||||
|
Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M")
|
||||||
|
-c Write all output to stdout. Multiple input files will be concatenated
|
||||||
|
-cpu int
|
||||||
|
Compress using this amount of threads (default 32)
|
||||||
|
-faster
|
||||||
|
Compress faster, but with a minor compression loss
|
||||||
|
-help
|
||||||
|
Display help
|
||||||
|
-index
|
||||||
|
Add seek index (default true)
|
||||||
|
-o string
|
||||||
|
Write output to another file. Single input file only
|
||||||
|
-pad string
|
||||||
|
Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1")
|
||||||
|
-q Don't write any output to terminal, except errors
|
||||||
|
-rm
|
||||||
|
Delete source file(s) after successful compression
|
||||||
|
-safe
|
||||||
|
Do not overwrite output files
|
||||||
|
-slower
|
||||||
|
Compress more, but a lot slower
|
||||||
|
-snappy
|
||||||
|
Generate Snappy compatible output stream
|
||||||
|
-verify
|
||||||
|
Verify written files
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## s2d
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: s2d [options] file1 file2
|
||||||
|
|
||||||
|
Decompresses all files supplied as input. Input files must end with '.s2' or '.snappy'.
|
||||||
|
Output file names have the extension removed. By default output files will be overwritten.
|
||||||
|
Use - as the only file name to read from stdin and write to stdout.
|
||||||
|
|
||||||
|
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
|
||||||
|
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
|
||||||
|
|
||||||
|
File names beginning with 'http://' and 'https://' will be downloaded and decompressed.
|
||||||
|
Extensions on downloaded files are ignored. Only http response code 200 is accepted.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-bench int
|
||||||
|
Run benchmark n times. No output will be written
|
||||||
|
-c Write all output to stdout. Multiple input files will be concatenated
|
||||||
|
-help
|
||||||
|
Display help
|
||||||
|
-o string
|
||||||
|
Write output to another file. Single input file only
|
||||||
|
-offset string
|
||||||
|
Start at offset. Examples: 92, 64K, 256K, 1M, 4M. Requires Index
|
||||||
|
-q Don't write any output to terminal, except errors
|
||||||
|
-rm
|
||||||
|
Delete source file(s) after successful decompression
|
||||||
|
-safe
|
||||||
|
Do not overwrite output files
|
||||||
|
-tail string
|
||||||
|
Return last of compressed file. Examples: 92, 64K, 256K, 1M, 4M. Requires Index
|
||||||
|
-verify
|
||||||
|
Verify files, but do not write output
|
||||||
|
```
|
||||||
|
|
||||||
|
## s2sx: self-extracting archives
|
||||||
|
|
||||||
|
s2sx allows creating self-extracting archives with no dependencies.
|
||||||
|
|
||||||
|
By default, executables are created for the same platforms as the host os,
|
||||||
|
but this can be overridden with `-os` and `-arch` parameters.
|
||||||
|
|
||||||
|
Extracted files have 0666 permissions, except when untar option used.
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: s2sx [options] file1 file2
|
||||||
|
|
||||||
|
Compresses all files supplied as input separately.
|
||||||
|
If files have '.s2' extension they are assumed to be compressed already.
|
||||||
|
Output files are written as 'filename.s2sx' and with '.exe' for windows targets.
|
||||||
|
If output is big, an additional file with ".more" is written. This must be included as well.
|
||||||
|
By default output files will be overwritten.
|
||||||
|
|
||||||
|
Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
|
||||||
|
Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-arch string
|
||||||
|
Destination architecture (default "amd64")
|
||||||
|
-c Write all output to stdout. Multiple input files will be concatenated
|
||||||
|
-cpu int
|
||||||
|
Compress using this amount of threads (default 32)
|
||||||
|
-help
|
||||||
|
Display help
|
||||||
|
-max string
|
||||||
|
Maximum executable size. Rest will be written to another file. (default "1G")
|
||||||
|
-os string
|
||||||
|
Destination operating system (default "windows")
|
||||||
|
-q Don't write any output to terminal, except errors
|
||||||
|
-rm
|
||||||
|
Delete source file(s) after successful compression
|
||||||
|
-safe
|
||||||
|
Do not overwrite output files
|
||||||
|
-untar
|
||||||
|
Untar on destination
|
||||||
|
```
|
||||||
|
|
||||||
|
Available platforms are:
|
||||||
|
|
||||||
|
* darwin-amd64
|
||||||
|
* darwin-arm64
|
||||||
|
* linux-amd64
|
||||||
|
* linux-arm
|
||||||
|
* linux-arm64
|
||||||
|
* linux-mips64
|
||||||
|
* linux-ppc64le
|
||||||
|
* windows-386
|
||||||
|
* windows-amd64
|
||||||
|
|
||||||
|
By default, there is a size limit of 1GB for the output executable.
|
||||||
|
|
||||||
|
When this is exceeded the remaining file content is written to a file called
|
||||||
|
output+`.more`. This file must be included for a successful extraction and
|
||||||
|
placed alongside the executable for a successful extraction.
|
||||||
|
|
||||||
|
This file *must* have the same name as the executable, so if the executable is renamed,
|
||||||
|
so must the `.more` file.
|
||||||
|
|
||||||
|
This functionality is disabled with stdin/stdout.
|
||||||
|
|
||||||
|
### Self-extracting TAR files
|
||||||
|
|
||||||
|
If you wrap a TAR file you can specify `-untar` to make it untar on the destination host.
|
||||||
|
|
||||||
|
Files are extracted to the current folder with the path specified in the tar file.
|
||||||
|
|
||||||
|
Note that tar files are not validated before they are wrapped.
|
||||||
|
|
||||||
|
For security reasons files that move below the root folder are not allowed.
|
||||||
|
|
||||||
|
# Performance
|
||||||
|
|
||||||
|
This section will focus on comparisons to Snappy.
|
||||||
|
This package is solely aimed at replacing Snappy as a high speed compression package.
|
||||||
|
If you are mainly looking for better compression [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd)
|
||||||
|
gives better compression, but typically at speeds slightly below "better" mode in this package.
|
||||||
|
|
||||||
|
Compression is increased compared to Snappy, mostly around 5-20% and the throughput is typically 25-40% increased (single threaded) compared to the Snappy Go implementation.
|
||||||
|
|
||||||
|
Streams are concurrently compressed. The stream will be distributed among all available CPU cores for the best possible throughput.
|
||||||
|
|
||||||
|
A "better" compression mode is also available. This allows to trade a bit of speed for a minor compression gain.
|
||||||
|
The content compressed in this mode is fully compatible with the standard decoder.
|
||||||
|
|
||||||
|
Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all threads and a single thread (1 CPU):
|
||||||
|
|
||||||
|
| File | S2 speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller |
|
||||||
|
|-----------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------|
|
||||||
|
| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 12.70x | 10556 MB/s | 7.35% | 4.15x | 3455 MB/s | 12.79% |
|
||||||
|
| (1 CPU) | 1.14x | 948 MB/s | - | 0.42x | 349 MB/s | - |
|
||||||
|
| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 17.13x | 14484 MB/s | 31.60% | 10.09x | 8533 MB/s | 37.71% |
|
||||||
|
| (1 CPU) | 1.33x | 1127 MB/s | - | 0.70x | 589 MB/s | - |
|
||||||
|
| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12000 MB/s | -5.79% | 6.59x | 5223 MB/s | 5.80% |
|
||||||
|
| (1 CPU) | 1.11x | 877 MB/s | - | 0.47x | 370 MB/s | - |
|
||||||
|
| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 14.62x | 12116 MB/s | 15.90% | 5.35x | 4430 MB/s | 16.08% |
|
||||||
|
| (1 CPU) | 1.38x | 1146 MB/s | - | 0.38x | 312 MB/s | - |
|
||||||
|
| [adresser.json](https://files.klauspost.com/compress/adresser.json.zst) | 8.83x | 17579 MB/s | 43.86% | 6.54x | 13011 MB/s | 47.23% |
|
||||||
|
| (1 CPU) | 1.14x | 2259 MB/s | - | 0.74x | 1475 MB/s | - |
|
||||||
|
| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 16.72x | 14019 MB/s | 24.02% | 10.11x | 8477 MB/s | 30.48% |
|
||||||
|
| (1 CPU) | 1.24x | 1043 MB/s | - | 0.70x | 586 MB/s | - |
|
||||||
|
| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9254 MB/s | 1.84% | 6.75x | 4686 MB/s | 6.72% |
|
||||||
|
| (1 CPU) | 0.97x | 672 MB/s | - | 0.53x | 366 MB/s | - |
|
||||||
|
| sharnd.out.2gb | 2.11x | 12639 MB/s | 0.01% | 1.98x | 11833 MB/s | 0.01% |
|
||||||
|
| (1 CPU) | 0.93x | 5594 MB/s | - | 1.34x | 8030 MB/s | - |
|
||||||
|
| [enwik9](http://mattmahoney.net/dc/textdata.html) | 19.34x | 8220 MB/s | 3.98% | 7.87x | 3345 MB/s | 15.82% |
|
||||||
|
| (1 CPU) | 1.06x | 452 MB/s | - | 0.50x | 213 MB/s | - |
|
||||||
|
| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 10.48x | 6124 MB/s | 5.67% | 3.76x | 2197 MB/s | 12.60% |
|
||||||
|
| (1 CPU) | 0.97x | 568 MB/s | - | 0.46x | 271 MB/s | - |
|
||||||
|
| [enwik10](https://encode.su/threads/3315-enwik10-benchmark-results) | 21.07x | 9020 MB/s | 6.36% | 6.91x | 2959 MB/s | 16.95% |
|
||||||
|
| (1 CPU) | 1.07x | 460 MB/s | - | 0.51x | 220 MB/s | - |
|
||||||
|
|
||||||
|
### Legend
|
||||||
|
|
||||||
|
* `S2 speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core.
|
||||||
|
* `S2 throughput`: Throughput of S2 in MB/s.
|
||||||
|
* `S2 % smaller`: How many percent of the Snappy output size is S2 better.
|
||||||
|
* `S2 "better"`: Speed when enabling "better" compression mode in S2 compared to Snappy.
|
||||||
|
* `"better" throughput`: Speed when enabling "better" compression mode in S2 compared to Snappy.
|
||||||
|
* `"better" % smaller`: How many percent of the Snappy output size is S2 better when using "better" compression.
|
||||||
|
|
||||||
|
There is a good speedup across the board when using a single thread and a significant speedup when using multiple threads.
|
||||||
|
|
||||||
|
Machine generated data gets by far the biggest compression boost, with size being being reduced by up to 45% of Snappy size.
|
||||||
|
|
||||||
|
The "better" compression mode sees a good improvement in all cases, but usually at a performance cost.
|
||||||
|
|
||||||
|
Incompressible content (`sharnd.out.2gb`, 2GB random data) sees the smallest speedup.
|
||||||
|
This is likely dominated by synchronization overhead, which is confirmed by the fact that single threaded performance is higher (see above).
|
||||||
|
|
||||||
|
## Decompression
|
||||||
|
|
||||||
|
S2 attempts to create content that is also fast to decompress, except in "better" mode where the smallest representation is used.
|
||||||
|
|
||||||
|
S2 vs Snappy **decompression** speed. Both operating on single core:
|
||||||
|
|
||||||
|
| File | S2 Throughput | vs. Snappy | Better Throughput | vs. Snappy |
|
||||||
|
|-----------------------------------------------------------------------------------------------------|---------------|------------|-------------------|------------|
|
||||||
|
| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 2117 MB/s | 1.14x | 1738 MB/s | 0.94x |
|
||||||
|
| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 2401 MB/s | 1.25x | 2307 MB/s | 1.20x |
|
||||||
|
| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 2075 MB/s | 0.98x | 1764 MB/s | 0.83x |
|
||||||
|
| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 2967 MB/s | 1.05x | 2885 MB/s | 1.02x |
|
||||||
|
| [adresser.json](https://files.klauspost.com/compress/adresser.json.zst) | 4141 MB/s | 1.07x | 4184 MB/s | 1.08x |
|
||||||
|
| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 2264 MB/s | 1.12x | 2185 MB/s | 1.08x |
|
||||||
|
| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 1525 MB/s | 1.03x | 1347 MB/s | 0.91x |
|
||||||
|
| sharnd.out.2gb | 3813 MB/s | 0.79x | 3900 MB/s | 0.81x |
|
||||||
|
| [enwik9](http://mattmahoney.net/dc/textdata.html) | 1246 MB/s | 1.29x | 967 MB/s | 1.00x |
|
||||||
|
| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 1433 MB/s | 1.12x | 1203 MB/s | 0.94x |
|
||||||
|
| [enwik10](https://encode.su/threads/3315-enwik10-benchmark-results) | 1284 MB/s | 1.32x | 1010 MB/s | 1.04x |
|
||||||
|
|
||||||
|
### Legend
|
||||||
|
|
||||||
|
* `S2 Throughput`: Decompression speed of S2 encoded content.
|
||||||
|
* `Better Throughput`: Decompression speed of S2 "better" encoded content.
|
||||||
|
* `vs Snappy`: Decompression speed of S2 "better" mode compared to Snappy and absolute speed.
|
||||||
|
|
||||||
|
|
||||||
|
While the decompression code hasn't changed, there is a significant speedup in decompression speed.
|
||||||
|
S2 prefers longer matches and will typically only find matches that are 6 bytes or longer.
|
||||||
|
While this reduces compression a bit, it improves decompression speed.
|
||||||
|
|
||||||
|
The "better" compression mode will actively look for shorter matches, which is why it has a decompression speed quite similar to Snappy.
|
||||||
|
|
||||||
|
Without assembly decompression is also very fast; single goroutine decompression speed. No assembly:
|
||||||
|
|
||||||
|
| File | S2 Throughput | S2 throughput |
|
||||||
|
|--------------------------------|--------------|---------------|
|
||||||
|
| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s |
|
||||||
|
| 10gb.tar.s2 | 1.30x | 867.07 MB/s |
|
||||||
|
| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s |
|
||||||
|
| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s |
|
||||||
|
| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s |
|
||||||
|
| enwik9.s2 | 1.67x | 681.53 MB/s |
|
||||||
|
| adresser.json.s2 | 3.41x | 4230.53 MB/s |
|
||||||
|
| silesia.tar.s2 | 1.52x | 811.58 |
|
||||||
|
|
||||||
|
Even though S2 typically compresses better than Snappy, decompression speed is always better.
|
||||||
|
|
||||||
|
### Concurrent Stream Decompression
|
||||||
|
|
||||||
|
For full stream decompression S2 offers a [DecodeConcurrent](https://pkg.go.dev/github.com/klauspost/compress/s2#Reader.DecodeConcurrent)
|
||||||
|
that will decode a full stream using multiple goroutines.
|
||||||
|
|
||||||
|
Example scaling, AMD Ryzen 3950X, 16 cores, decompression using `s2d -bench=3 <input>`, best of 3:
|
||||||
|
|
||||||
|
| Input | `-cpu=1` | `-cpu=2` | `-cpu=4` | `-cpu=8` | `-cpu=16` |
|
||||||
|
|-------------------------------------------|------------|------------|------------|------------|-------------|
|
||||||
|
| enwik10.snappy | 1098.6MB/s | 1819.8MB/s | 3625.6MB/s | 6910.6MB/s | 10818.2MB/s |
|
||||||
|
| enwik10.s2 | 1303.5MB/s | 2606.1MB/s | 4847.9MB/s | 8878.4MB/s | 9592.1MB/s |
|
||||||
|
| sofia-air-quality-dataset.tar.snappy | 1302.0MB/s | 2165.0MB/s | 4244.5MB/s | 8241.0MB/s | 12920.5MB/s |
|
||||||
|
| sofia-air-quality-dataset.tar.s2 | 1399.2MB/s | 2463.2MB/s | 5196.5MB/s | 9639.8MB/s | 11439.5MB/s |
|
||||||
|
| sofia-air-quality-dataset.tar.s2 (no asm) | 837.5MB/s | 1652.6MB/s | 3183.6MB/s | 5945.0MB/s | 9620.7MB/s |
|
||||||
|
|
||||||
|
Scaling can be expected to be pretty linear until memory bandwidth is saturated.
|
||||||
|
|
||||||
|
For now the DecodeConcurrent can only be used for full streams without seeking or combining with regular reads.
|
||||||
|
|
||||||
|
## Block compression
|
||||||
|
|
||||||
|
|
||||||
|
When compressing blocks no concurrent compression is performed just as Snappy.
|
||||||
|
This is because blocks are for smaller payloads and generally will not benefit from concurrent compression.
|
||||||
|
|
||||||
|
An important change is that incompressible blocks will not be more than at most 10 bytes bigger than the input.
|
||||||
|
In rare, worst case scenario Snappy blocks could be significantly bigger than the input.
|
||||||
|
|
||||||
|
### Mixed content blocks
|
||||||
|
|
||||||
|
The most reliable is a wide dataset.
|
||||||
|
For this we use [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z),
|
||||||
|
53927 files, total input size: 4,014,735,833 bytes. Single goroutine used.
|
||||||
|
|
||||||
|
| * | Input | Output | Reduction | MB/s |
|
||||||
|
|-------------------|------------|------------|-----------|--------|
|
||||||
|
| S2 | 4014735833 | 1059723369 | 73.60% | **934.34** |
|
||||||
|
| S2 Better | 4014735833 | 969670507 | 75.85% | 532.70 |
|
||||||
|
| S2 Best | 4014735833 | 906625668 | **77.85%** | 46.84 |
|
||||||
|
| Snappy | 4014735833 | 1128706759 | 71.89% | 762.59 |
|
||||||
|
| S2, Snappy Output | 4014735833 | 1093821420 | 72.75% | 908.60 |
|
||||||
|
| LZ4 | 4014735833 | 1079259294 | 73.12% | 526.94 |
|
||||||
|
|
||||||
|
S2 delivers both the best single threaded throughput with regular mode and the best compression rate with "best".
|
||||||
|
"Better" mode provides the same compression speed as LZ4 with better compression ratio.
|
||||||
|
|
||||||
|
When outputting Snappy compatible output it still delivers better throughput (150MB/s more) and better compression.
|
||||||
|
|
||||||
|
As can be seen from the other benchmarks decompression should also be easier on the S2 generated output.
|
||||||
|
|
||||||
|
Though they cannot be compared due to different decompression speeds here are the speed/size comparisons for
|
||||||
|
other Go compressors:
|
||||||
|
|
||||||
|
| * | Input | Output | Reduction | MB/s |
|
||||||
|
|-------------------|------------|------------|-----------|--------|
|
||||||
|
| Zstd Fastest (Go) | 4014735833 | 794608518 | 80.21% | 236.04 |
|
||||||
|
| Zstd Best (Go) | 4014735833 | 704603356 | 82.45% | 35.63 |
|
||||||
|
| Deflate (Go) l1 | 4014735833 | 871294239 | 78.30% | 214.04 |
|
||||||
|
| Deflate (Go) l9 | 4014735833 | 730389060 | 81.81% | 41.17 |
|
||||||
|
|
||||||
|
### Standard block compression
|
||||||
|
|
||||||
|
Benchmarking single block performance is subject to a lot more variation since it only tests a limited number of file patterns.
|
||||||
|
So individual benchmarks should only be seen as a guideline and the overall picture is more important.
|
||||||
|
|
||||||
|
These micro-benchmarks are with data in cache and trained branch predictors. For a more realistic benchmark see the mixed content above.
|
||||||
|
|
||||||
|
Block compression. Parallel benchmark running on 16 cores, 16 goroutines.
|
||||||
|
|
||||||
|
AMD64 assembly is use for both S2 and Snappy.
|
||||||
|
|
||||||
|
| Absolute Perf | Snappy size | S2 Size | Snappy Speed | S2 Speed | Snappy dec | S2 dec |
|
||||||
|
|-----------------------|-------------|---------|--------------|-------------|-------------|-------------|
|
||||||
|
| html | 22843 | 21111 | 16246 MB/s | 17438 MB/s | 40972 MB/s | 49263 MB/s |
|
||||||
|
| urls.10K | 335492 | 287326 | 7943 MB/s | 9693 MB/s | 22523 MB/s | 26484 MB/s |
|
||||||
|
| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 273889 MB/s | 718321 MB/s | 827552 MB/s |
|
||||||
|
| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 17773 MB/s | 33691 MB/s | 52421 MB/s |
|
||||||
|
| paper-100k.pdf | 85304 | 84459 | 167546 MB/s | 101263 MB/s | 326905 MB/s | 291944 MB/s |
|
||||||
|
| html_x_4 | 92234 | 21113 | 15194 MB/s | 50670 MB/s | 30843 MB/s | 32217 MB/s |
|
||||||
|
| alice29.txt | 88034 | 85975 | 5936 MB/s | 6139 MB/s | 12882 MB/s | 20044 MB/s |
|
||||||
|
| asyoulik.txt | 77503 | 79650 | 5517 MB/s | 6366 MB/s | 12735 MB/s | 22806 MB/s |
|
||||||
|
| lcet10.txt | 234661 | 220670 | 6235 MB/s | 6067 MB/s | 14519 MB/s | 18697 MB/s |
|
||||||
|
| plrabn12.txt | 319267 | 317985 | 5159 MB/s | 5726 MB/s | 11923 MB/s | 19901 MB/s |
|
||||||
|
| geo.protodata | 23335 | 18690 | 21220 MB/s | 26529 MB/s | 56271 MB/s | 62540 MB/s |
|
||||||
|
| kppkn.gtb | 69526 | 65312 | 9732 MB/s | 8559 MB/s | 18491 MB/s | 18969 MB/s |
|
||||||
|
| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 15489 MB/s | 31883 MB/s | 38874 MB/s |
|
||||||
|
| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13000 MB/s | 48056 MB/s | 52341 MB/s |
|
||||||
|
| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12806 MB/s | 32378 MB/s | 46322 MB/s |
|
||||||
|
| alice29.txt (20000B) | 12686 | 13574 | 7733 MB/s | 11210 MB/s | 30566 MB/s | 58969 MB/s |
|
||||||
|
|
||||||
|
|
||||||
|
| Relative Perf | Snappy size | S2 size improved | S2 Speed | S2 Dec Speed |
|
||||||
|
|-----------------------|-------------|------------------|----------|--------------|
|
||||||
|
| html | 22.31% | 7.58% | 1.07x | 1.20x |
|
||||||
|
| urls.10K | 47.78% | 14.36% | 1.22x | 1.18x |
|
||||||
|
| fireworks.jpeg | 99.95% | -0.05% | 0.78x | 1.15x |
|
||||||
|
| fireworks.jpeg (200B) | 73.00% | -6.16% | 2.00x | 1.56x |
|
||||||
|
| paper-100k.pdf | 83.30% | 0.99% | 0.60x | 0.89x |
|
||||||
|
| html_x_4 | 22.52% | 77.11% | 3.33x | 1.04x |
|
||||||
|
| alice29.txt | 57.88% | 2.34% | 1.03x | 1.56x |
|
||||||
|
| asyoulik.txt | 61.91% | -2.77% | 1.15x | 1.79x |
|
||||||
|
| lcet10.txt | 54.99% | 5.96% | 0.97x | 1.29x |
|
||||||
|
| plrabn12.txt | 66.26% | 0.40% | 1.11x | 1.67x |
|
||||||
|
| geo.protodata | 19.68% | 19.91% | 1.25x | 1.11x |
|
||||||
|
| kppkn.gtb | 37.72% | 6.06% | 0.88x | 1.03x |
|
||||||
|
| alice29.txt (128B) | 62.50% | -2.50% | 2.31x | 1.22x |
|
||||||
|
| alice29.txt (1000B) | 77.40% | 0.00% | 1.07x | 1.09x |
|
||||||
|
| alice29.txt (10000B) | 66.48% | -4.29% | 1.27x | 1.43x |
|
||||||
|
| alice29.txt (20000B) | 63.43% | -7.00% | 1.45x | 1.93x |
|
||||||
|
|
||||||
|
Speed is generally at or above Snappy. Small blocks gets a significant speedup, although at the expense of size.
|
||||||
|
|
||||||
|
Decompression speed is better than Snappy, except in one case.
|
||||||
|
|
||||||
|
Since payloads are very small the variance in terms of size is rather big, so they should only be seen as a general guideline.
|
||||||
|
|
||||||
|
Size is on average around Snappy, but varies on content type.
|
||||||
|
In cases where compression is worse, it usually is compensated by a speed boost.
|
||||||
|
|
||||||
|
|
||||||
|
### Better compression
|
||||||
|
|
||||||
|
Benchmarking single block performance is subject to a lot more variation since it only tests a limited number of file patterns.
|
||||||
|
So individual benchmarks should only be seen as a guideline and the overall picture is more important.
|
||||||
|
|
||||||
|
| Absolute Perf | Snappy size | Better Size | Snappy Speed | Better Speed | Snappy dec | Better dec |
|
||||||
|
|-----------------------|-------------|-------------|--------------|--------------|-------------|-------------|
|
||||||
|
| html | 22843 | 19833 | 16246 MB/s | 7731 MB/s | 40972 MB/s | 40292 MB/s |
|
||||||
|
| urls.10K | 335492 | 253529 | 7943 MB/s | 3980 MB/s | 22523 MB/s | 20981 MB/s |
|
||||||
|
| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 9760 MB/s | 718321 MB/s | 823698 MB/s |
|
||||||
|
| fireworks.jpeg (200B) | 146 | 142 | 8869 MB/s | 594 MB/s | 33691 MB/s | 30101 MB/s |
|
||||||
|
| paper-100k.pdf | 85304 | 82915 | 167546 MB/s | 7470 MB/s | 326905 MB/s | 198869 MB/s |
|
||||||
|
| html_x_4 | 92234 | 19841 | 15194 MB/s | 23403 MB/s | 30843 MB/s | 30937 MB/s |
|
||||||
|
| alice29.txt | 88034 | 73218 | 5936 MB/s | 2945 MB/s | 12882 MB/s | 16611 MB/s |
|
||||||
|
| asyoulik.txt | 77503 | 66844 | 5517 MB/s | 2739 MB/s | 12735 MB/s | 14975 MB/s |
|
||||||
|
| lcet10.txt | 234661 | 190589 | 6235 MB/s | 3099 MB/s | 14519 MB/s | 16634 MB/s |
|
||||||
|
| plrabn12.txt | 319267 | 270828 | 5159 MB/s | 2600 MB/s | 11923 MB/s | 13382 MB/s |
|
||||||
|
| geo.protodata | 23335 | 18278 | 21220 MB/s | 11208 MB/s | 56271 MB/s | 57961 MB/s |
|
||||||
|
| kppkn.gtb | 69526 | 61851 | 9732 MB/s | 4556 MB/s | 18491 MB/s | 16524 MB/s |
|
||||||
|
| alice29.txt (128B) | 80 | 81 | 6691 MB/s | 529 MB/s | 31883 MB/s | 34225 MB/s |
|
||||||
|
| alice29.txt (1000B) | 774 | 748 | 12204 MB/s | 1943 MB/s | 48056 MB/s | 42068 MB/s |
|
||||||
|
| alice29.txt (10000B) | 6648 | 6234 | 10044 MB/s | 2949 MB/s | 32378 MB/s | 28813 MB/s |
|
||||||
|
| alice29.txt (20000B) | 12686 | 11584 | 7733 MB/s | 2822 MB/s | 30566 MB/s | 27315 MB/s |
|
||||||
|
|
||||||
|
|
||||||
|
| Relative Perf | Snappy size | Better size | Better Speed | Better dec |
|
||||||
|
|-----------------------|-------------|-------------|--------------|------------|
|
||||||
|
| html | 22.31% | 13.18% | 0.48x | 0.98x |
|
||||||
|
| urls.10K | 47.78% | 24.43% | 0.50x | 0.93x |
|
||||||
|
| fireworks.jpeg | 99.95% | -0.05% | 0.03x | 1.15x |
|
||||||
|
| fireworks.jpeg (200B) | 73.00% | 2.74% | 0.07x | 0.89x |
|
||||||
|
| paper-100k.pdf | 83.30% | 2.80% | 0.07x | 0.61x |
|
||||||
|
| html_x_4 | 22.52% | 78.49% | 0.04x | 1.00x |
|
||||||
|
| alice29.txt | 57.88% | 16.83% | 1.54x | 1.29x |
|
||||||
|
| asyoulik.txt | 61.91% | 13.75% | 0.50x | 1.18x |
|
||||||
|
| lcet10.txt | 54.99% | 18.78% | 0.50x | 1.15x |
|
||||||
|
| plrabn12.txt | 66.26% | 15.17% | 0.50x | 1.12x |
|
||||||
|
| geo.protodata | 19.68% | 21.67% | 0.50x | 1.03x |
|
||||||
|
| kppkn.gtb | 37.72% | 11.04% | 0.53x | 0.89x |
|
||||||
|
| alice29.txt (128B) | 62.50% | -1.25% | 0.47x | 1.07x |
|
||||||
|
| alice29.txt (1000B) | 77.40% | 3.36% | 0.08x | 0.88x |
|
||||||
|
| alice29.txt (10000B) | 66.48% | 6.23% | 0.16x | 0.89x |
|
||||||
|
| alice29.txt (20000B) | 63.43% | 8.69% | 0.29x | 0.89x |
|
||||||
|
|
||||||
|
Except for the mostly incompressible JPEG image compression is better and usually in the
|
||||||
|
double digits in terms of percentage reduction over Snappy.
|
||||||
|
|
||||||
|
The PDF sample shows a significant slowdown compared to Snappy, as this mode tries harder
|
||||||
|
to compress the data. Very small blocks are also not favorable for better compression, so throughput is way down.
|
||||||
|
|
||||||
|
This mode aims to provide better compression at the expense of performance and achieves that
|
||||||
|
without a huge performance penalty, except on very small blocks.
|
||||||
|
|
||||||
|
Decompression speed suffers a little compared to the regular S2 mode,
|
||||||
|
but still manages to be close to Snappy in spite of increased compression.
|
||||||
|
|
||||||
|
# Best compression mode
|
||||||
|
|
||||||
|
S2 offers a "best" compression mode.
|
||||||
|
|
||||||
|
This will compress as much as possible with little regard to CPU usage.
|
||||||
|
|
||||||
|
Mainly for offline compression, but where decompression speed should still
|
||||||
|
be high and compatible with other S2 compressed data.
|
||||||
|
|
||||||
|
Some examples compared on 16 core CPU, amd64 assembly used:
|
||||||
|
|
||||||
|
```
|
||||||
|
* enwik10
|
||||||
|
Default... 10000000000 -> 4761467548 [47.61%]; 1.098s, 8685.6MB/s
|
||||||
|
Better... 10000000000 -> 4219438251 [42.19%]; 1.925s, 4954.2MB/s
|
||||||
|
Best... 10000000000 -> 3627364337 [36.27%]; 43.051s, 221.5MB/s
|
||||||
|
|
||||||
|
* github-june-2days-2019.json
|
||||||
|
Default... 6273951764 -> 1043196283 [16.63%]; 431ms, 13882.3MB/s
|
||||||
|
Better... 6273951764 -> 949146808 [15.13%]; 547ms, 10938.4MB/s
|
||||||
|
Best... 6273951764 -> 832855506 [13.27%]; 9.455s, 632.8MB/s
|
||||||
|
|
||||||
|
* nyc-taxi-data-10M.csv
|
||||||
|
Default... 3325605752 -> 1095998837 [32.96%]; 324ms, 9788.7MB/s
|
||||||
|
Better... 3325605752 -> 954776589 [28.71%]; 491ms, 6459.4MB/s
|
||||||
|
Best... 3325605752 -> 779098746 [23.43%]; 8.29s, 382.6MB/s
|
||||||
|
|
||||||
|
* 10gb.tar
|
||||||
|
Default... 10065157632 -> 5916578242 [58.78%]; 1.028s, 9337.4MB/s
|
||||||
|
Better... 10065157632 -> 5649207485 [56.13%]; 1.597s, 6010.6MB/s
|
||||||
|
Best... 10065157632 -> 5208719802 [51.75%]; 32.78s, 292.8MB/
|
||||||
|
|
||||||
|
* consensus.db.10gb
|
||||||
|
Default... 10737418240 -> 4562648848 [42.49%]; 882ms, 11610.0MB/s
|
||||||
|
Better... 10737418240 -> 4542428129 [42.30%]; 1.533s, 6679.7MB/s
|
||||||
|
Best... 10737418240 -> 4244773384 [39.53%]; 42.96s, 238.4MB/s
|
||||||
|
```
|
||||||
|
|
||||||
|
Decompression speed should be around the same as using the 'better' compression mode.
|
||||||
|
|
||||||
|
# Snappy Compatibility
|
||||||
|
|
||||||
|
S2 now offers full compatibility with Snappy.
|
||||||
|
|
||||||
|
This means that the efficient encoders of S2 can be used to generate fully Snappy compatible output.
|
||||||
|
|
||||||
|
There is a [snappy](https://github.com/klauspost/compress/tree/master/snappy) package that can be used by
|
||||||
|
simply changing imports from `github.com/golang/snappy` to `github.com/klauspost/compress/snappy`.
|
||||||
|
This uses "better" mode for all operations.
|
||||||
|
If you would like more control, you can use the s2 package as described below:
|
||||||
|
|
||||||
|
## Blocks
|
||||||
|
|
||||||
|
Snappy compatible blocks can be generated with the S2 encoder.
|
||||||
|
Compression and speed is typically a bit better `MaxEncodedLen` is also smaller for smaller memory usage. Replace
|
||||||
|
|
||||||
|
| Snappy | S2 replacement |
|
||||||
|
|----------------------------|-------------------------|
|
||||||
|
| snappy.Encode(...) | s2.EncodeSnappy(...) |
|
||||||
|
| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) |
|
||||||
|
|
||||||
|
`s2.EncodeSnappy` can be replaced with `s2.EncodeSnappyBetter` or `s2.EncodeSnappyBest` to get more efficiently compressed snappy compatible output.
|
||||||
|
|
||||||
|
`s2.ConcatBlocks` is compatible with snappy blocks.
|
||||||
|
|
||||||
|
Comparison of [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z),
|
||||||
|
53927 files, total input size: 4,014,735,833 bytes. amd64, single goroutine used:
|
||||||
|
|
||||||
|
| Encoder | Size | MB/s | Reduction |
|
||||||
|
|-----------------------|------------|------------|------------
|
||||||
|
| snappy.Encode | 1128706759 | 725.59 | 71.89% |
|
||||||
|
| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% |
|
||||||
|
| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% |
|
||||||
|
| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%**|
|
||||||
|
|
||||||
|
## Streams
|
||||||
|
|
||||||
|
For streams, replace `enc = snappy.NewBufferedWriter(w)` with `enc = s2.NewWriter(w, s2.WriterSnappyCompat())`.
|
||||||
|
All other options are available, but note that block size limit is different for snappy.
|
||||||
|
|
||||||
|
Comparison of different streams, AMD Ryzen 3950x, 16 cores. Size and throughput:
|
||||||
|
|
||||||
|
| File | snappy.NewWriter | S2 Snappy | S2 Snappy, Better | S2 Snappy, Best |
|
||||||
|
|-----------------------------|--------------------------|---------------------------|--------------------------|-------------------------|
|
||||||
|
| nyc-taxi-data-10M.csv | 1316042016 - 539.47MB/s | 1307003093 - 10132.73MB/s | 1174534014 - 5002.44MB/s | 1115904679 - 177.97MB/s |
|
||||||
|
| enwik10 (xml) | 5088294643 - 451.13MB/s | 5175840939 - 9440.69MB/s | 4560784526 - 4487.21MB/s | 4340299103 - 158.92MB/s |
|
||||||
|
| 10gb.tar (mixed) | 6056946612 - 729.73MB/s | 6208571995 - 9978.05MB/s | 5741646126 - 4919.98MB/s | 5548973895 - 180.44MB/s |
|
||||||
|
| github-june-2days-2019.json | 1525176492 - 933.00MB/s | 1476519054 - 13150.12MB/s | 1400547532 - 5803.40MB/s | 1321887137 - 204.29MB/s |
|
||||||
|
| consensus.db.10gb (db) | 5412897703 - 1102.14MB/s | 5354073487 - 13562.91MB/s | 5335069899 - 5294.73MB/s | 5201000954 - 175.72MB/s |
|
||||||
|
|
||||||
|
# Decompression
|
||||||
|
|
||||||
|
All decompression functions map directly to equivalent s2 functions.
|
||||||
|
|
||||||
|
| Snappy | S2 replacement |
|
||||||
|
|------------------------|--------------------|
|
||||||
|
| snappy.Decode(...) | s2.Decode(...) |
|
||||||
|
| snappy.DecodedLen(...) | s2.DecodedLen(...) |
|
||||||
|
| snappy.NewReader(...) | s2.NewReader(...) |
|
||||||
|
|
||||||
|
Features like [quick forward skipping without decompression](https://pkg.go.dev/github.com/klauspost/compress/s2#Reader.Skip)
|
||||||
|
are also available for Snappy streams.
|
||||||
|
|
||||||
|
If you know you are only decompressing snappy streams, setting [`ReaderMaxBlockSize(64<<10)`](https://pkg.go.dev/github.com/klauspost/compress/s2#ReaderMaxBlockSize)
|
||||||
|
on your Reader will reduce memory consumption.
|
||||||
|
|
||||||
|
# Concatenating blocks and streams.
|
||||||
|
|
||||||
|
Concatenating streams will concatenate the output of both without recompressing them.
|
||||||
|
While this is inefficient in terms of compression it might be usable in certain scenarios.
|
||||||
|
The 10 byte 'stream identifier' of the second stream can optionally be stripped, but it is not a requirement.
|
||||||
|
|
||||||
|
Blocks can be concatenated using the `ConcatBlocks` function.
|
||||||
|
|
||||||
|
Snappy blocks/streams can safely be concatenated with S2 blocks and streams.
|
||||||
|
Streams with indexes (see below) will currently not work on concatenated streams.
|
||||||
|
|
||||||
|
# Stream Seek Index
|
||||||
|
|
||||||
|
S2 and Snappy streams can have indexes. These indexes will allow random seeking within the compressed data.
|
||||||
|
|
||||||
|
The index can either be appended to the stream as a skippable block or returned for separate storage.
|
||||||
|
|
||||||
|
When the index is appended to a stream it will be skipped by regular decoders,
|
||||||
|
so the output remains compatible with other decoders.
|
||||||
|
|
||||||
|
## Creating an Index
|
||||||
|
|
||||||
|
To automatically add an index to a stream, add `WriterAddIndex()` option to your writer.
|
||||||
|
Then the index will be added to the stream when `Close()` is called.
|
||||||
|
|
||||||
|
```
|
||||||
|
// Add Index to stream...
|
||||||
|
enc := s2.NewWriter(w, s2.WriterAddIndex())
|
||||||
|
io.Copy(enc, r)
|
||||||
|
enc.Close()
|
||||||
|
```
|
||||||
|
|
||||||
|
If you want to store the index separately, you can use `CloseIndex()` instead of the regular `Close()`.
|
||||||
|
This will return the index. Note that `CloseIndex()` should only be called once, and you shouldn't call `Close()`.
|
||||||
|
|
||||||
|
```
|
||||||
|
// Get index for separate storage...
|
||||||
|
enc := s2.NewWriter(w)
|
||||||
|
io.Copy(enc, r)
|
||||||
|
index, err := enc.CloseIndex()
|
||||||
|
```
|
||||||
|
|
||||||
|
The `index` can then be used needing to read from the stream.
|
||||||
|
This means the index can be used without needing to seek to the end of the stream
|
||||||
|
or for manually forwarding streams. See below.
|
||||||
|
|
||||||
|
Finally, an existing S2/Snappy stream can be indexed using the `s2.IndexStream(r io.Reader)` function.
|
||||||
|
|
||||||
|
## Using Indexes
|
||||||
|
|
||||||
|
To use indexes there is a `ReadSeeker(random bool, index []byte) (*ReadSeeker, error)` function available.
|
||||||
|
|
||||||
|
Calling ReadSeeker will return an [io.ReadSeeker](https://pkg.go.dev/io#ReadSeeker) compatible version of the reader.
|
||||||
|
|
||||||
|
If 'random' is specified the returned io.Seeker can be used for random seeking, otherwise only forward seeking is supported.
|
||||||
|
Enabling random seeking requires the original input to support the [io.Seeker](https://pkg.go.dev/io#Seeker) interface.
|
||||||
|
|
||||||
|
```
|
||||||
|
dec := s2.NewReader(r)
|
||||||
|
rs, err := dec.ReadSeeker(false, nil)
|
||||||
|
rs.Seek(wantOffset, io.SeekStart)
|
||||||
|
```
|
||||||
|
|
||||||
|
Get a seeker to seek forward. Since no index is provided, the index is read from the stream.
|
||||||
|
This requires that an index was added and that `r` supports the [io.Seeker](https://pkg.go.dev/io#Seeker) interface.
|
||||||
|
|
||||||
|
A custom index can be specified which will be used if supplied.
|
||||||
|
When using a custom index, it will not be read from the input stream.
|
||||||
|
|
||||||
|
```
|
||||||
|
dec := s2.NewReader(r)
|
||||||
|
rs, err := dec.ReadSeeker(false, index)
|
||||||
|
rs.Seek(wantOffset, io.SeekStart)
|
||||||
|
```
|
||||||
|
|
||||||
|
This will read the index from `index`. Since we specify non-random (forward only) seeking `r` does not have to be an io.Seeker
|
||||||
|
|
||||||
|
```
|
||||||
|
dec := s2.NewReader(r)
|
||||||
|
rs, err := dec.ReadSeeker(true, index)
|
||||||
|
rs.Seek(wantOffset, io.SeekStart)
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, since we specify that we want to do random seeking `r` must be an io.Seeker.
|
||||||
|
|
||||||
|
The returned [ReadSeeker](https://pkg.go.dev/github.com/klauspost/compress/s2#ReadSeeker) contains a shallow reference to the existing Reader,
|
||||||
|
meaning changes performed to one is reflected in the other.
|
||||||
|
|
||||||
|
To check if a stream contains an index at the end, the `(*Index).LoadStream(rs io.ReadSeeker) error` can be used.
|
||||||
|
|
||||||
|
## Manually Forwarding Streams
|
||||||
|
|
||||||
|
Indexes can also be read outside the decoder using the [Index](https://pkg.go.dev/github.com/klauspost/compress/s2#Index) type.
|
||||||
|
This can be used for parsing indexes, either separate or in streams.
|
||||||
|
|
||||||
|
In some cases it may not be possible to serve a seekable stream.
|
||||||
|
This can for instance be an HTTP stream, where the Range request
|
||||||
|
is sent at the start of the stream.
|
||||||
|
|
||||||
|
With a little bit of extra code it is still possible to use indexes
|
||||||
|
to forward to specific offset with a single forward skip.
|
||||||
|
|
||||||
|
It is possible to load the index manually like this:
|
||||||
|
```
|
||||||
|
var index s2.Index
|
||||||
|
_, err = index.Load(idxBytes)
|
||||||
|
```
|
||||||
|
|
||||||
|
This can be used to figure out how much to offset the compressed stream:
|
||||||
|
|
||||||
|
```
|
||||||
|
compressedOffset, uncompressedOffset, err := index.Find(wantOffset)
|
||||||
|
```
|
||||||
|
|
||||||
|
The `compressedOffset` is the number of bytes that should be skipped
|
||||||
|
from the beginning of the compressed file.
|
||||||
|
|
||||||
|
The `uncompressedOffset` will then be offset of the uncompressed bytes returned
|
||||||
|
when decoding from that position. This will always be <= wantOffset.
|
||||||
|
|
||||||
|
When creating a decoder it must be specified that it should *not* expect a stream identifier
|
||||||
|
at the beginning of the stream. Assuming the io.Reader `r` has been forwarded to `compressedOffset`
|
||||||
|
we create the decoder like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
dec := s2.NewReader(r, s2.ReaderIgnoreStreamIdentifier())
|
||||||
|
```
|
||||||
|
|
||||||
|
We are not completely done. We still need to forward the stream the uncompressed bytes we didn't want.
|
||||||
|
This is done using the regular "Skip" function:
|
||||||
|
|
||||||
|
```
|
||||||
|
err = dec.Skip(wantOffset - uncompressedOffset)
|
||||||
|
```
|
||||||
|
|
||||||
|
This will ensure that we are at exactly the offset we want, and reading from `dec` will start at the requested offset.
|
||||||
|
|
||||||
|
## Index Format:
|
||||||
|
|
||||||
|
Each block is structured as a snappy skippable block, with the chunk ID 0x99.
|
||||||
|
|
||||||
|
The block can be read from the front, but contains information so it can be read from the back as well.
|
||||||
|
|
||||||
|
Numbers are stored as fixed size little endian values or [zigzag encoded](https://developers.google.com/protocol-buffers/docs/encoding#signed_integers) [base 128 varints](https://developers.google.com/protocol-buffers/docs/encoding),
|
||||||
|
with un-encoded value length of 64 bits, unless other limits are specified.
|
||||||
|
|
||||||
|
| Content | Format |
|
||||||
|
|---------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| ID, `[1]byte` | Always 0x99. |
|
||||||
|
| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. |
|
||||||
|
| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". |
|
||||||
|
| UncompressedSize, Varint | Total Uncompressed size. |
|
||||||
|
| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. |
|
||||||
|
| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. |
|
||||||
|
| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. |
|
||||||
|
| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. |
|
||||||
|
| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. |
|
||||||
|
| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. |
|
||||||
|
| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. |
|
||||||
|
| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. |
|
||||||
|
|
||||||
|
For regular streams the uncompressed offsets are fully predictable,
|
||||||
|
so `HasUncompressedOffsets` allows to specify that compressed blocks all have
|
||||||
|
exactly `EstBlockSize` bytes of uncompressed content.
|
||||||
|
|
||||||
|
Entries *must* be in order, starting with the lowest offset,
|
||||||
|
and there *must* be no uncompressed offset duplicates.
|
||||||
|
Entries *may* point to the start of a skippable block,
|
||||||
|
but it is then not allowed to also have an entry for the next block since
|
||||||
|
that would give an uncompressed offset duplicate.
|
||||||
|
|
||||||
|
There is no requirement for all blocks to be represented in the index.
|
||||||
|
In fact there is a maximum of 65536 block entries in an index.
|
||||||
|
|
||||||
|
The writer can use any method to reduce the number of entries.
|
||||||
|
An implicit block start at 0,0 can be assumed.
|
||||||
|
|
||||||
|
### Decoding entries:
|
||||||
|
|
||||||
|
```
|
||||||
|
// Read Uncompressed entries.
|
||||||
|
// Each assumes EstBlockSize delta from previous.
|
||||||
|
for each entry {
|
||||||
|
uOff = 0
|
||||||
|
if HasUncompressedOffsets == 1 {
|
||||||
|
uOff = ReadVarInt // Read value from stream
|
||||||
|
}
|
||||||
|
|
||||||
|
// Except for the first entry, use previous values.
|
||||||
|
if entryNum == 0 {
|
||||||
|
entry[entryNum].UncompressedOffset = uOff
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uncompressed uses previous offset and adds EstBlockSize
|
||||||
|
entry[entryNum].UncompressedOffset = entry[entryNum-1].UncompressedOffset + EstBlockSize + uOff
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Guess that the first block will be 50% of uncompressed size.
|
||||||
|
// Integer truncating division must be used.
|
||||||
|
CompressGuess := EstBlockSize / 2
|
||||||
|
|
||||||
|
// Read Compressed entries.
|
||||||
|
// Each assumes CompressGuess delta from previous.
|
||||||
|
// CompressGuess is adjusted for each value.
|
||||||
|
for each entry {
|
||||||
|
cOff = ReadVarInt // Read value from stream
|
||||||
|
|
||||||
|
// Except for the first entry, use previous values.
|
||||||
|
if entryNum == 0 {
|
||||||
|
entry[entryNum].CompressedOffset = cOff
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compressed uses previous and our estimate.
|
||||||
|
entry[entryNum].CompressedOffset = entry[entryNum-1].CompressedOffset + CompressGuess + cOff
|
||||||
|
|
||||||
|
// Adjust compressed offset for next loop, integer truncating division must be used.
|
||||||
|
CompressGuess += cOff/2
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
To decode from any given uncompressed offset `(wantOffset)`:
|
||||||
|
|
||||||
|
* Iterate entries until `entry[n].UncompressedOffset > wantOffset`.
|
||||||
|
* Start decoding from `entry[n-1].CompressedOffset`.
|
||||||
|
* Discard `entry[n-1].UncompressedOffset - wantOffset` bytes from the decoded stream.
|
||||||
|
|
||||||
|
See [using indexes](https://github.com/klauspost/compress/tree/master/s2#using-indexes) for functions that perform the operations with a simpler interface.
|
||||||
|
|
||||||
|
# Format Extensions
|
||||||
|
|
||||||
|
* Frame [Stream identifier](https://github.com/google/snappy/blob/master/framing_format.txt#L68) changed from `sNaPpY` to `S2sTwO`.
|
||||||
|
* [Framed compressed blocks](https://github.com/google/snappy/blob/master/format_description.txt) can be up to 4MB (up from 64KB).
|
||||||
|
* Compressed blocks can have an offset of `0`, which indicates to repeat the last seen offset.
|
||||||
|
|
||||||
|
Repeat offsets must be encoded as a [2.2.1. Copy with 1-byte offset (01)](https://github.com/google/snappy/blob/master/format_description.txt#L89), where the offset is 0.
|
||||||
|
|
||||||
|
The length is specified by reading the 3-bit length specified in the tag and decode using this table:
|
||||||
|
|
||||||
|
| Length | Actual Length |
|
||||||
|
|--------|----------------------|
|
||||||
|
| 0 | 4 |
|
||||||
|
| 1 | 5 |
|
||||||
|
| 2 | 6 |
|
||||||
|
| 3 | 7 |
|
||||||
|
| 4 | 8 |
|
||||||
|
| 5 | 8 + read 1 byte |
|
||||||
|
| 6 | 260 + read 2 bytes |
|
||||||
|
| 7 | 65540 + read 3 bytes |
|
||||||
|
|
||||||
|
This allows any repeat offset + length to be represented by 2 to 5 bytes.
|
||||||
|
|
||||||
|
Lengths are stored as little endian values.
|
||||||
|
|
||||||
|
The first copy of a block cannot be a repeat offset and the offset is not carried across blocks in streams.
|
||||||
|
|
||||||
|
Default streaming block size is 1MB.
|
||||||
|
|
||||||
|
# LICENSE
|
||||||
|
|
||||||
|
This code is based on the [Snappy-Go](https://github.com/golang/snappy) implementation.
|
||||||
|
|
||||||
|
Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
|
1046
vendor/github.com/klauspost/compress/s2/decode.go
generated
vendored
Normal file
1046
vendor/github.com/klauspost/compress/s2/decode.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
568
vendor/github.com/klauspost/compress/s2/decode_amd64.s
generated
vendored
Normal file
568
vendor/github.com/klauspost/compress/s2/decode_amd64.s
generated
vendored
Normal file
|
@ -0,0 +1,568 @@
|
||||||
|
// Copyright 2016 The Go Authors. All rights reserved.
|
||||||
|
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !noasm
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define R_TMP0 AX
|
||||||
|
#define R_TMP1 BX
|
||||||
|
#define R_LEN CX
|
||||||
|
#define R_OFF DX
|
||||||
|
#define R_SRC SI
|
||||||
|
#define R_DST DI
|
||||||
|
#define R_DBASE R8
|
||||||
|
#define R_DLEN R9
|
||||||
|
#define R_DEND R10
|
||||||
|
#define R_SBASE R11
|
||||||
|
#define R_SLEN R12
|
||||||
|
#define R_SEND R13
|
||||||
|
#define R_TMP2 R14
|
||||||
|
#define R_TMP3 R15
|
||||||
|
|
||||||
|
// The asm code generally follows the pure Go code in decode_other.go, except
|
||||||
|
// where marked with a "!!!".
|
||||||
|
|
||||||
|
// func decode(dst, src []byte) int
|
||||||
|
//
|
||||||
|
// All local variables fit into registers. The non-zero stack size is only to
|
||||||
|
// spill registers and push args when issuing a CALL. The register allocation:
|
||||||
|
// - R_TMP0 scratch
|
||||||
|
// - R_TMP1 scratch
|
||||||
|
// - R_LEN length or x (shared)
|
||||||
|
// - R_OFF offset
|
||||||
|
// - R_SRC &src[s]
|
||||||
|
// - R_DST &dst[d]
|
||||||
|
// + R_DBASE dst_base
|
||||||
|
// + R_DLEN dst_len
|
||||||
|
// + R_DEND dst_base + dst_len
|
||||||
|
// + R_SBASE src_base
|
||||||
|
// + R_SLEN src_len
|
||||||
|
// + R_SEND src_base + src_len
|
||||||
|
// - R_TMP2 used by doCopy
|
||||||
|
// - R_TMP3 used by doCopy
|
||||||
|
//
|
||||||
|
// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
|
||||||
|
// function, and after a CALL returns, and are not otherwise modified.
|
||||||
|
//
|
||||||
|
// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
|
||||||
|
// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
|
||||||
|
TEXT ·s2Decode(SB), NOSPLIT, $48-56
|
||||||
|
// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
|
||||||
|
MOVQ dst_base+0(FP), R_DBASE
|
||||||
|
MOVQ dst_len+8(FP), R_DLEN
|
||||||
|
MOVQ R_DBASE, R_DST
|
||||||
|
MOVQ R_DBASE, R_DEND
|
||||||
|
ADDQ R_DLEN, R_DEND
|
||||||
|
MOVQ src_base+24(FP), R_SBASE
|
||||||
|
MOVQ src_len+32(FP), R_SLEN
|
||||||
|
MOVQ R_SBASE, R_SRC
|
||||||
|
MOVQ R_SBASE, R_SEND
|
||||||
|
ADDQ R_SLEN, R_SEND
|
||||||
|
XORQ R_OFF, R_OFF
|
||||||
|
|
||||||
|
loop:
|
||||||
|
// for s < len(src)
|
||||||
|
CMPQ R_SRC, R_SEND
|
||||||
|
JEQ end
|
||||||
|
|
||||||
|
// R_LEN = uint32(src[s])
|
||||||
|
//
|
||||||
|
// switch src[s] & 0x03
|
||||||
|
MOVBLZX (R_SRC), R_LEN
|
||||||
|
MOVL R_LEN, R_TMP1
|
||||||
|
ANDL $3, R_TMP1
|
||||||
|
CMPL R_TMP1, $1
|
||||||
|
JAE tagCopy
|
||||||
|
|
||||||
|
// ----------------------------------------
|
||||||
|
// The code below handles literal tags.
|
||||||
|
|
||||||
|
// case tagLiteral:
|
||||||
|
// x := uint32(src[s] >> 2)
|
||||||
|
// switch
|
||||||
|
SHRL $2, R_LEN
|
||||||
|
CMPL R_LEN, $60
|
||||||
|
JAE tagLit60Plus
|
||||||
|
|
||||||
|
// case x < 60:
|
||||||
|
// s++
|
||||||
|
INCQ R_SRC
|
||||||
|
|
||||||
|
doLit:
|
||||||
|
// This is the end of the inner "switch", when we have a literal tag.
|
||||||
|
//
|
||||||
|
// We assume that R_LEN == x and x fits in a uint32, where x is the variable
|
||||||
|
// used in the pure Go decode_other.go code.
|
||||||
|
|
||||||
|
// length = int(x) + 1
|
||||||
|
//
|
||||||
|
// Unlike the pure Go code, we don't need to check if length <= 0 because
|
||||||
|
// R_LEN can hold 64 bits, so the increment cannot overflow.
|
||||||
|
INCQ R_LEN
|
||||||
|
|
||||||
|
// Prepare to check if copying length bytes will run past the end of dst or
|
||||||
|
// src.
|
||||||
|
//
|
||||||
|
// R_TMP0 = len(dst) - d
|
||||||
|
// R_TMP1 = len(src) - s
|
||||||
|
MOVQ R_DEND, R_TMP0
|
||||||
|
SUBQ R_DST, R_TMP0
|
||||||
|
MOVQ R_SEND, R_TMP1
|
||||||
|
SUBQ R_SRC, R_TMP1
|
||||||
|
|
||||||
|
// !!! Try a faster technique for short (16 or fewer bytes) copies.
|
||||||
|
//
|
||||||
|
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
|
||||||
|
// goto callMemmove // Fall back on calling runtime·memmove.
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
|
||||||
|
// against 21 instead of 16, because it cannot assume that all of its input
|
||||||
|
// is contiguous in memory and so it needs to leave enough source bytes to
|
||||||
|
// read the next tag without refilling buffers, but Go's Decode assumes
|
||||||
|
// contiguousness (the src argument is a []byte).
|
||||||
|
CMPQ R_LEN, $16
|
||||||
|
JGT callMemmove
|
||||||
|
CMPQ R_TMP0, $16
|
||||||
|
JLT callMemmove
|
||||||
|
CMPQ R_TMP1, $16
|
||||||
|
JLT callMemmove
|
||||||
|
|
||||||
|
// !!! Implement the copy from src to dst as a 16-byte load and store.
|
||||||
|
// (Decode's documentation says that dst and src must not overlap.)
|
||||||
|
//
|
||||||
|
// This always copies 16 bytes, instead of only length bytes, but that's
|
||||||
|
// OK. If the input is a valid Snappy encoding then subsequent iterations
|
||||||
|
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
|
||||||
|
// non-nil error), so the overrun will be ignored.
|
||||||
|
//
|
||||||
|
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
|
||||||
|
// 16-byte loads and stores. This technique probably wouldn't be as
|
||||||
|
// effective on architectures that are fussier about alignment.
|
||||||
|
MOVOU 0(R_SRC), X0
|
||||||
|
MOVOU X0, 0(R_DST)
|
||||||
|
|
||||||
|
// d += length
|
||||||
|
// s += length
|
||||||
|
ADDQ R_LEN, R_DST
|
||||||
|
ADDQ R_LEN, R_SRC
|
||||||
|
JMP loop
|
||||||
|
|
||||||
|
callMemmove:
|
||||||
|
// if length > len(dst)-d || length > len(src)-s { etc }
|
||||||
|
CMPQ R_LEN, R_TMP0
|
||||||
|
JGT errCorrupt
|
||||||
|
CMPQ R_LEN, R_TMP1
|
||||||
|
JGT errCorrupt
|
||||||
|
|
||||||
|
// copy(dst[d:], src[s:s+length])
|
||||||
|
//
|
||||||
|
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
|
||||||
|
// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
|
||||||
|
// three registers to the stack, to save local variables across the CALL.
|
||||||
|
MOVQ R_DST, 0(SP)
|
||||||
|
MOVQ R_SRC, 8(SP)
|
||||||
|
MOVQ R_LEN, 16(SP)
|
||||||
|
MOVQ R_DST, 24(SP)
|
||||||
|
MOVQ R_SRC, 32(SP)
|
||||||
|
MOVQ R_LEN, 40(SP)
|
||||||
|
MOVQ R_OFF, 48(SP)
|
||||||
|
CALL runtime·memmove(SB)
|
||||||
|
|
||||||
|
// Restore local variables: unspill registers from the stack and
|
||||||
|
// re-calculate R_DBASE-R_SEND.
|
||||||
|
MOVQ 24(SP), R_DST
|
||||||
|
MOVQ 32(SP), R_SRC
|
||||||
|
MOVQ 40(SP), R_LEN
|
||||||
|
MOVQ 48(SP), R_OFF
|
||||||
|
MOVQ dst_base+0(FP), R_DBASE
|
||||||
|
MOVQ dst_len+8(FP), R_DLEN
|
||||||
|
MOVQ R_DBASE, R_DEND
|
||||||
|
ADDQ R_DLEN, R_DEND
|
||||||
|
MOVQ src_base+24(FP), R_SBASE
|
||||||
|
MOVQ src_len+32(FP), R_SLEN
|
||||||
|
MOVQ R_SBASE, R_SEND
|
||||||
|
ADDQ R_SLEN, R_SEND
|
||||||
|
|
||||||
|
// d += length
|
||||||
|
// s += length
|
||||||
|
ADDQ R_LEN, R_DST
|
||||||
|
ADDQ R_LEN, R_SRC
|
||||||
|
JMP loop
|
||||||
|
|
||||||
|
tagLit60Plus:
|
||||||
|
// !!! This fragment does the
|
||||||
|
//
|
||||||
|
// s += x - 58; if uint(s) > uint(len(src)) { etc }
|
||||||
|
//
|
||||||
|
// checks. In the asm version, we code it once instead of once per switch case.
|
||||||
|
ADDQ R_LEN, R_SRC
|
||||||
|
SUBQ $58, R_SRC
|
||||||
|
CMPQ R_SRC, R_SEND
|
||||||
|
JA errCorrupt
|
||||||
|
|
||||||
|
// case x == 60:
|
||||||
|
CMPL R_LEN, $61
|
||||||
|
JEQ tagLit61
|
||||||
|
JA tagLit62Plus
|
||||||
|
|
||||||
|
// x = uint32(src[s-1])
|
||||||
|
MOVBLZX -1(R_SRC), R_LEN
|
||||||
|
JMP doLit
|
||||||
|
|
||||||
|
tagLit61:
|
||||||
|
// case x == 61:
|
||||||
|
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||||
|
MOVWLZX -2(R_SRC), R_LEN
|
||||||
|
JMP doLit
|
||||||
|
|
||||||
|
tagLit62Plus:
|
||||||
|
CMPL R_LEN, $62
|
||||||
|
JA tagLit63
|
||||||
|
|
||||||
|
// case x == 62:
|
||||||
|
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||||
|
// We read one byte, safe to read one back, since we are just reading tag.
|
||||||
|
// x = binary.LittleEndian.Uint32(src[s-1:]) >> 8
|
||||||
|
MOVL -4(R_SRC), R_LEN
|
||||||
|
SHRL $8, R_LEN
|
||||||
|
JMP doLit
|
||||||
|
|
||||||
|
tagLit63:
|
||||||
|
// case x == 63:
|
||||||
|
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||||
|
MOVL -4(R_SRC), R_LEN
|
||||||
|
JMP doLit
|
||||||
|
|
||||||
|
// The code above handles literal tags.
|
||||||
|
// ----------------------------------------
|
||||||
|
// The code below handles copy tags.
|
||||||
|
|
||||||
|
tagCopy4:
|
||||||
|
// case tagCopy4:
|
||||||
|
// s += 5
|
||||||
|
ADDQ $5, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
CMPQ R_SRC, R_SEND
|
||||||
|
JA errCorrupt
|
||||||
|
|
||||||
|
// length = 1 + int(src[s-5])>>2
|
||||||
|
SHRQ $2, R_LEN
|
||||||
|
INCQ R_LEN
|
||||||
|
|
||||||
|
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||||
|
MOVLQZX -4(R_SRC), R_OFF
|
||||||
|
JMP doCopy
|
||||||
|
|
||||||
|
tagCopy2:
|
||||||
|
// case tagCopy2:
|
||||||
|
// s += 3
|
||||||
|
ADDQ $3, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
CMPQ R_SRC, R_SEND
|
||||||
|
JA errCorrupt
|
||||||
|
|
||||||
|
// length = 1 + int(src[s-3])>>2
|
||||||
|
SHRQ $2, R_LEN
|
||||||
|
INCQ R_LEN
|
||||||
|
|
||||||
|
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||||
|
MOVWQZX -2(R_SRC), R_OFF
|
||||||
|
JMP doCopy
|
||||||
|
|
||||||
|
tagCopy:
|
||||||
|
// We have a copy tag. We assume that:
|
||||||
|
// - R_TMP1 == src[s] & 0x03
|
||||||
|
// - R_LEN == src[s]
|
||||||
|
CMPQ R_TMP1, $2
|
||||||
|
JEQ tagCopy2
|
||||||
|
JA tagCopy4
|
||||||
|
|
||||||
|
// case tagCopy1:
|
||||||
|
// s += 2
|
||||||
|
ADDQ $2, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
CMPQ R_SRC, R_SEND
|
||||||
|
JA errCorrupt
|
||||||
|
|
||||||
|
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||||
|
// length = 4 + int(src[s-2])>>2&0x7
|
||||||
|
MOVBQZX -1(R_SRC), R_TMP1
|
||||||
|
MOVQ R_LEN, R_TMP0
|
||||||
|
SHRQ $2, R_LEN
|
||||||
|
ANDQ $0xe0, R_TMP0
|
||||||
|
ANDQ $7, R_LEN
|
||||||
|
SHLQ $3, R_TMP0
|
||||||
|
ADDQ $4, R_LEN
|
||||||
|
ORQ R_TMP1, R_TMP0
|
||||||
|
|
||||||
|
// check if repeat code, ZF set by ORQ.
|
||||||
|
JZ repeatCode
|
||||||
|
|
||||||
|
// This is a regular copy, transfer our temporary value to R_OFF (length)
|
||||||
|
MOVQ R_TMP0, R_OFF
|
||||||
|
JMP doCopy
|
||||||
|
|
||||||
|
// This is a repeat code.
|
||||||
|
repeatCode:
|
||||||
|
// If length < 9, reuse last offset, with the length already calculated.
|
||||||
|
CMPQ R_LEN, $9
|
||||||
|
JL doCopyRepeat
|
||||||
|
|
||||||
|
// Read additional bytes for length.
|
||||||
|
JE repeatLen1
|
||||||
|
|
||||||
|
// Rare, so the extra branch shouldn't hurt too much.
|
||||||
|
CMPQ R_LEN, $10
|
||||||
|
JE repeatLen2
|
||||||
|
JMP repeatLen3
|
||||||
|
|
||||||
|
// Read repeat lengths.
|
||||||
|
repeatLen1:
|
||||||
|
// s ++
|
||||||
|
ADDQ $1, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
CMPQ R_SRC, R_SEND
|
||||||
|
JA errCorrupt
|
||||||
|
|
||||||
|
// length = src[s-1] + 8
|
||||||
|
MOVBQZX -1(R_SRC), R_LEN
|
||||||
|
ADDL $8, R_LEN
|
||||||
|
JMP doCopyRepeat
|
||||||
|
|
||||||
|
repeatLen2:
|
||||||
|
// s +=2
|
||||||
|
ADDQ $2, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
CMPQ R_SRC, R_SEND
|
||||||
|
JA errCorrupt
|
||||||
|
|
||||||
|
// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + (1 << 8)
|
||||||
|
MOVWQZX -2(R_SRC), R_LEN
|
||||||
|
ADDL $260, R_LEN
|
||||||
|
JMP doCopyRepeat
|
||||||
|
|
||||||
|
repeatLen3:
|
||||||
|
// s +=3
|
||||||
|
ADDQ $3, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
CMPQ R_SRC, R_SEND
|
||||||
|
JA errCorrupt
|
||||||
|
|
||||||
|
// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + (1 << 16)
|
||||||
|
// Read one byte further back (just part of the tag, shifted out)
|
||||||
|
MOVL -4(R_SRC), R_LEN
|
||||||
|
SHRL $8, R_LEN
|
||||||
|
ADDL $65540, R_LEN
|
||||||
|
JMP doCopyRepeat
|
||||||
|
|
||||||
|
doCopy:
|
||||||
|
// This is the end of the outer "switch", when we have a copy tag.
|
||||||
|
//
|
||||||
|
// We assume that:
|
||||||
|
// - R_LEN == length && R_LEN > 0
|
||||||
|
// - R_OFF == offset
|
||||||
|
|
||||||
|
// if d < offset { etc }
|
||||||
|
MOVQ R_DST, R_TMP1
|
||||||
|
SUBQ R_DBASE, R_TMP1
|
||||||
|
CMPQ R_TMP1, R_OFF
|
||||||
|
JLT errCorrupt
|
||||||
|
|
||||||
|
// Repeat values can skip the test above, since any offset > 0 will be in dst.
|
||||||
|
doCopyRepeat:
|
||||||
|
// if offset <= 0 { etc }
|
||||||
|
CMPQ R_OFF, $0
|
||||||
|
JLE errCorrupt
|
||||||
|
|
||||||
|
// if length > len(dst)-d { etc }
|
||||||
|
MOVQ R_DEND, R_TMP1
|
||||||
|
SUBQ R_DST, R_TMP1
|
||||||
|
CMPQ R_LEN, R_TMP1
|
||||||
|
JGT errCorrupt
|
||||||
|
|
||||||
|
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
|
||||||
|
//
|
||||||
|
// Set:
|
||||||
|
// - R_TMP2 = len(dst)-d
|
||||||
|
// - R_TMP3 = &dst[d-offset]
|
||||||
|
MOVQ R_DEND, R_TMP2
|
||||||
|
SUBQ R_DST, R_TMP2
|
||||||
|
MOVQ R_DST, R_TMP3
|
||||||
|
SUBQ R_OFF, R_TMP3
|
||||||
|
|
||||||
|
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
|
||||||
|
//
|
||||||
|
// First, try using two 8-byte load/stores, similar to the doLit technique
|
||||||
|
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
|
||||||
|
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
|
||||||
|
// and not one 16-byte load/store, and the first store has to be before the
|
||||||
|
// second load, due to the overlap if offset is in the range [8, 16).
|
||||||
|
//
|
||||||
|
// if length > 16 || offset < 8 || len(dst)-d < 16 {
|
||||||
|
// goto slowForwardCopy
|
||||||
|
// }
|
||||||
|
// copy 16 bytes
|
||||||
|
// d += length
|
||||||
|
CMPQ R_LEN, $16
|
||||||
|
JGT slowForwardCopy
|
||||||
|
CMPQ R_OFF, $8
|
||||||
|
JLT slowForwardCopy
|
||||||
|
CMPQ R_TMP2, $16
|
||||||
|
JLT slowForwardCopy
|
||||||
|
MOVQ 0(R_TMP3), R_TMP0
|
||||||
|
MOVQ R_TMP0, 0(R_DST)
|
||||||
|
MOVQ 8(R_TMP3), R_TMP1
|
||||||
|
MOVQ R_TMP1, 8(R_DST)
|
||||||
|
ADDQ R_LEN, R_DST
|
||||||
|
JMP loop
|
||||||
|
|
||||||
|
slowForwardCopy:
|
||||||
|
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
|
||||||
|
// can still try 8-byte load stores, provided we can overrun up to 10 extra
|
||||||
|
// bytes. As above, the overrun will be fixed up by subsequent iterations
|
||||||
|
// of the outermost loop.
|
||||||
|
//
|
||||||
|
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
|
||||||
|
// commentary says:
|
||||||
|
//
|
||||||
|
// ----
|
||||||
|
//
|
||||||
|
// The main part of this loop is a simple copy of eight bytes at a time
|
||||||
|
// until we've copied (at least) the requested amount of bytes. However,
|
||||||
|
// if d and d-offset are less than eight bytes apart (indicating a
|
||||||
|
// repeating pattern of length < 8), we first need to expand the pattern in
|
||||||
|
// order to get the correct results. For instance, if the buffer looks like
|
||||||
|
// this, with the eight-byte <d-offset> and <d> patterns marked as
|
||||||
|
// intervals:
|
||||||
|
//
|
||||||
|
// abxxxxxxxxxxxx
|
||||||
|
// [------] d-offset
|
||||||
|
// [------] d
|
||||||
|
//
|
||||||
|
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
|
||||||
|
// once, after which we can move <d> two bytes without moving <d-offset>:
|
||||||
|
//
|
||||||
|
// ababxxxxxxxxxx
|
||||||
|
// [------] d-offset
|
||||||
|
// [------] d
|
||||||
|
//
|
||||||
|
// and repeat the exercise until the two no longer overlap.
|
||||||
|
//
|
||||||
|
// This allows us to do very well in the special case of one single byte
|
||||||
|
// repeated many times, without taking a big hit for more general cases.
|
||||||
|
//
|
||||||
|
// The worst case of extra writing past the end of the match occurs when
|
||||||
|
// offset == 1 and length == 1; the last copy will read from byte positions
|
||||||
|
// [0..7] and write to [4..11], whereas it was only supposed to write to
|
||||||
|
// position 1. Thus, ten excess bytes.
|
||||||
|
//
|
||||||
|
// ----
|
||||||
|
//
|
||||||
|
// That "10 byte overrun" worst case is confirmed by Go's
|
||||||
|
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
|
||||||
|
// and finishSlowForwardCopy algorithm.
|
||||||
|
//
|
||||||
|
// if length > len(dst)-d-10 {
|
||||||
|
// goto verySlowForwardCopy
|
||||||
|
// }
|
||||||
|
SUBQ $10, R_TMP2
|
||||||
|
CMPQ R_LEN, R_TMP2
|
||||||
|
JGT verySlowForwardCopy
|
||||||
|
|
||||||
|
// We want to keep the offset, so we use R_TMP2 from here.
|
||||||
|
MOVQ R_OFF, R_TMP2
|
||||||
|
|
||||||
|
makeOffsetAtLeast8:
|
||||||
|
// !!! As above, expand the pattern so that offset >= 8 and we can use
|
||||||
|
// 8-byte load/stores.
|
||||||
|
//
|
||||||
|
// for offset < 8 {
|
||||||
|
// copy 8 bytes from dst[d-offset:] to dst[d:]
|
||||||
|
// length -= offset
|
||||||
|
// d += offset
|
||||||
|
// offset += offset
|
||||||
|
// // The two previous lines together means that d-offset, and therefore
|
||||||
|
// // R_TMP3, is unchanged.
|
||||||
|
// }
|
||||||
|
CMPQ R_TMP2, $8
|
||||||
|
JGE fixUpSlowForwardCopy
|
||||||
|
MOVQ (R_TMP3), R_TMP1
|
||||||
|
MOVQ R_TMP1, (R_DST)
|
||||||
|
SUBQ R_TMP2, R_LEN
|
||||||
|
ADDQ R_TMP2, R_DST
|
||||||
|
ADDQ R_TMP2, R_TMP2
|
||||||
|
JMP makeOffsetAtLeast8
|
||||||
|
|
||||||
|
fixUpSlowForwardCopy:
|
||||||
|
// !!! Add length (which might be negative now) to d (implied by R_DST being
|
||||||
|
// &dst[d]) so that d ends up at the right place when we jump back to the
|
||||||
|
// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
|
||||||
|
// length is positive, copying the remaining length bytes will write to the
|
||||||
|
// right place.
|
||||||
|
MOVQ R_DST, R_TMP0
|
||||||
|
ADDQ R_LEN, R_DST
|
||||||
|
|
||||||
|
finishSlowForwardCopy:
|
||||||
|
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
|
||||||
|
// length means that we overrun, but as above, that will be fixed up by
|
||||||
|
// subsequent iterations of the outermost loop.
|
||||||
|
CMPQ R_LEN, $0
|
||||||
|
JLE loop
|
||||||
|
MOVQ (R_TMP3), R_TMP1
|
||||||
|
MOVQ R_TMP1, (R_TMP0)
|
||||||
|
ADDQ $8, R_TMP3
|
||||||
|
ADDQ $8, R_TMP0
|
||||||
|
SUBQ $8, R_LEN
|
||||||
|
JMP finishSlowForwardCopy
|
||||||
|
|
||||||
|
verySlowForwardCopy:
|
||||||
|
// verySlowForwardCopy is a simple implementation of forward copy. In C
|
||||||
|
// parlance, this is a do/while loop instead of a while loop, since we know
|
||||||
|
// that length > 0. In Go syntax:
|
||||||
|
//
|
||||||
|
// for {
|
||||||
|
// dst[d] = dst[d - offset]
|
||||||
|
// d++
|
||||||
|
// length--
|
||||||
|
// if length == 0 {
|
||||||
|
// break
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
MOVB (R_TMP3), R_TMP1
|
||||||
|
MOVB R_TMP1, (R_DST)
|
||||||
|
INCQ R_TMP3
|
||||||
|
INCQ R_DST
|
||||||
|
DECQ R_LEN
|
||||||
|
JNZ verySlowForwardCopy
|
||||||
|
JMP loop
|
||||||
|
|
||||||
|
// The code above handles copy tags.
|
||||||
|
// ----------------------------------------
|
||||||
|
|
||||||
|
end:
|
||||||
|
// This is the end of the "for s < len(src)".
|
||||||
|
//
|
||||||
|
// if d != len(dst) { etc }
|
||||||
|
CMPQ R_DST, R_DEND
|
||||||
|
JNE errCorrupt
|
||||||
|
|
||||||
|
// return 0
|
||||||
|
MOVQ $0, ret+48(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
errCorrupt:
|
||||||
|
// return decodeErrCodeCorrupt
|
||||||
|
MOVQ $1, ret+48(FP)
|
||||||
|
RET
|
574
vendor/github.com/klauspost/compress/s2/decode_arm64.s
generated
vendored
Normal file
574
vendor/github.com/klauspost/compress/s2/decode_arm64.s
generated
vendored
Normal file
|
@ -0,0 +1,574 @@
|
||||||
|
// Copyright 2020 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !noasm
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define R_TMP0 R2
|
||||||
|
#define R_TMP1 R3
|
||||||
|
#define R_LEN R4
|
||||||
|
#define R_OFF R5
|
||||||
|
#define R_SRC R6
|
||||||
|
#define R_DST R7
|
||||||
|
#define R_DBASE R8
|
||||||
|
#define R_DLEN R9
|
||||||
|
#define R_DEND R10
|
||||||
|
#define R_SBASE R11
|
||||||
|
#define R_SLEN R12
|
||||||
|
#define R_SEND R13
|
||||||
|
#define R_TMP2 R14
|
||||||
|
#define R_TMP3 R15
|
||||||
|
|
||||||
|
// TEST_SRC will check if R_SRC is <= SRC_END
|
||||||
|
#define TEST_SRC() \
|
||||||
|
CMP R_SEND, R_SRC \
|
||||||
|
BGT errCorrupt
|
||||||
|
|
||||||
|
// MOVD R_SRC, R_TMP1
|
||||||
|
// SUB R_SBASE, R_TMP1, R_TMP1
|
||||||
|
// CMP R_SLEN, R_TMP1
|
||||||
|
// BGT errCorrupt
|
||||||
|
|
||||||
|
// The asm code generally follows the pure Go code in decode_other.go, except
|
||||||
|
// where marked with a "!!!".
|
||||||
|
|
||||||
|
// func decode(dst, src []byte) int
|
||||||
|
//
|
||||||
|
// All local variables fit into registers. The non-zero stack size is only to
|
||||||
|
// spill registers and push args when issuing a CALL. The register allocation:
|
||||||
|
// - R_TMP0 scratch
|
||||||
|
// - R_TMP1 scratch
|
||||||
|
// - R_LEN length or x
|
||||||
|
// - R_OFF offset
|
||||||
|
// - R_SRC &src[s]
|
||||||
|
// - R_DST &dst[d]
|
||||||
|
// + R_DBASE dst_base
|
||||||
|
// + R_DLEN dst_len
|
||||||
|
// + R_DEND dst_base + dst_len
|
||||||
|
// + R_SBASE src_base
|
||||||
|
// + R_SLEN src_len
|
||||||
|
// + R_SEND src_base + src_len
|
||||||
|
// - R_TMP2 used by doCopy
|
||||||
|
// - R_TMP3 used by doCopy
|
||||||
|
//
|
||||||
|
// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
|
||||||
|
// function, and after a CALL returns, and are not otherwise modified.
|
||||||
|
//
|
||||||
|
// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
|
||||||
|
// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
|
||||||
|
TEXT ·s2Decode(SB), NOSPLIT, $56-64
|
||||||
|
// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
|
||||||
|
MOVD dst_base+0(FP), R_DBASE
|
||||||
|
MOVD dst_len+8(FP), R_DLEN
|
||||||
|
MOVD R_DBASE, R_DST
|
||||||
|
MOVD R_DBASE, R_DEND
|
||||||
|
ADD R_DLEN, R_DEND, R_DEND
|
||||||
|
MOVD src_base+24(FP), R_SBASE
|
||||||
|
MOVD src_len+32(FP), R_SLEN
|
||||||
|
MOVD R_SBASE, R_SRC
|
||||||
|
MOVD R_SBASE, R_SEND
|
||||||
|
ADD R_SLEN, R_SEND, R_SEND
|
||||||
|
MOVD $0, R_OFF
|
||||||
|
|
||||||
|
loop:
|
||||||
|
// for s < len(src)
|
||||||
|
CMP R_SEND, R_SRC
|
||||||
|
BEQ end
|
||||||
|
|
||||||
|
// R_LEN = uint32(src[s])
|
||||||
|
//
|
||||||
|
// switch src[s] & 0x03
|
||||||
|
MOVBU (R_SRC), R_LEN
|
||||||
|
MOVW R_LEN, R_TMP1
|
||||||
|
ANDW $3, R_TMP1
|
||||||
|
MOVW $1, R1
|
||||||
|
CMPW R1, R_TMP1
|
||||||
|
BGE tagCopy
|
||||||
|
|
||||||
|
// ----------------------------------------
|
||||||
|
// The code below handles literal tags.
|
||||||
|
|
||||||
|
// case tagLiteral:
|
||||||
|
// x := uint32(src[s] >> 2)
|
||||||
|
// switch
|
||||||
|
MOVW $60, R1
|
||||||
|
LSRW $2, R_LEN, R_LEN
|
||||||
|
CMPW R_LEN, R1
|
||||||
|
BLS tagLit60Plus
|
||||||
|
|
||||||
|
// case x < 60:
|
||||||
|
// s++
|
||||||
|
ADD $1, R_SRC, R_SRC
|
||||||
|
|
||||||
|
doLit:
|
||||||
|
// This is the end of the inner "switch", when we have a literal tag.
|
||||||
|
//
|
||||||
|
// We assume that R_LEN == x and x fits in a uint32, where x is the variable
|
||||||
|
// used in the pure Go decode_other.go code.
|
||||||
|
|
||||||
|
// length = int(x) + 1
|
||||||
|
//
|
||||||
|
// Unlike the pure Go code, we don't need to check if length <= 0 because
|
||||||
|
// R_LEN can hold 64 bits, so the increment cannot overflow.
|
||||||
|
ADD $1, R_LEN, R_LEN
|
||||||
|
|
||||||
|
// Prepare to check if copying length bytes will run past the end of dst or
|
||||||
|
// src.
|
||||||
|
//
|
||||||
|
// R_TMP0 = len(dst) - d
|
||||||
|
// R_TMP1 = len(src) - s
|
||||||
|
MOVD R_DEND, R_TMP0
|
||||||
|
SUB R_DST, R_TMP0, R_TMP0
|
||||||
|
MOVD R_SEND, R_TMP1
|
||||||
|
SUB R_SRC, R_TMP1, R_TMP1
|
||||||
|
|
||||||
|
// !!! Try a faster technique for short (16 or fewer bytes) copies.
|
||||||
|
//
|
||||||
|
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
|
||||||
|
// goto callMemmove // Fall back on calling runtime·memmove.
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
|
||||||
|
// against 21 instead of 16, because it cannot assume that all of its input
|
||||||
|
// is contiguous in memory and so it needs to leave enough source bytes to
|
||||||
|
// read the next tag without refilling buffers, but Go's Decode assumes
|
||||||
|
// contiguousness (the src argument is a []byte).
|
||||||
|
CMP $16, R_LEN
|
||||||
|
BGT callMemmove
|
||||||
|
CMP $16, R_TMP0
|
||||||
|
BLT callMemmove
|
||||||
|
CMP $16, R_TMP1
|
||||||
|
BLT callMemmove
|
||||||
|
|
||||||
|
// !!! Implement the copy from src to dst as a 16-byte load and store.
|
||||||
|
// (Decode's documentation says that dst and src must not overlap.)
|
||||||
|
//
|
||||||
|
// This always copies 16 bytes, instead of only length bytes, but that's
|
||||||
|
// OK. If the input is a valid Snappy encoding then subsequent iterations
|
||||||
|
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
|
||||||
|
// non-nil error), so the overrun will be ignored.
|
||||||
|
//
|
||||||
|
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
|
||||||
|
// 16-byte loads and stores. This technique probably wouldn't be as
|
||||||
|
// effective on architectures that are fussier about alignment.
|
||||||
|
LDP 0(R_SRC), (R_TMP2, R_TMP3)
|
||||||
|
STP (R_TMP2, R_TMP3), 0(R_DST)
|
||||||
|
|
||||||
|
// d += length
|
||||||
|
// s += length
|
||||||
|
ADD R_LEN, R_DST, R_DST
|
||||||
|
ADD R_LEN, R_SRC, R_SRC
|
||||||
|
B loop
|
||||||
|
|
||||||
|
callMemmove:
|
||||||
|
// if length > len(dst)-d || length > len(src)-s { etc }
|
||||||
|
CMP R_TMP0, R_LEN
|
||||||
|
BGT errCorrupt
|
||||||
|
CMP R_TMP1, R_LEN
|
||||||
|
BGT errCorrupt
|
||||||
|
|
||||||
|
// copy(dst[d:], src[s:s+length])
|
||||||
|
//
|
||||||
|
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
|
||||||
|
// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
|
||||||
|
// three registers to the stack, to save local variables across the CALL.
|
||||||
|
MOVD R_DST, 8(RSP)
|
||||||
|
MOVD R_SRC, 16(RSP)
|
||||||
|
MOVD R_LEN, 24(RSP)
|
||||||
|
MOVD R_DST, 32(RSP)
|
||||||
|
MOVD R_SRC, 40(RSP)
|
||||||
|
MOVD R_LEN, 48(RSP)
|
||||||
|
MOVD R_OFF, 56(RSP)
|
||||||
|
CALL runtime·memmove(SB)
|
||||||
|
|
||||||
|
// Restore local variables: unspill registers from the stack and
|
||||||
|
// re-calculate R_DBASE-R_SEND.
|
||||||
|
MOVD 32(RSP), R_DST
|
||||||
|
MOVD 40(RSP), R_SRC
|
||||||
|
MOVD 48(RSP), R_LEN
|
||||||
|
MOVD 56(RSP), R_OFF
|
||||||
|
MOVD dst_base+0(FP), R_DBASE
|
||||||
|
MOVD dst_len+8(FP), R_DLEN
|
||||||
|
MOVD R_DBASE, R_DEND
|
||||||
|
ADD R_DLEN, R_DEND, R_DEND
|
||||||
|
MOVD src_base+24(FP), R_SBASE
|
||||||
|
MOVD src_len+32(FP), R_SLEN
|
||||||
|
MOVD R_SBASE, R_SEND
|
||||||
|
ADD R_SLEN, R_SEND, R_SEND
|
||||||
|
|
||||||
|
// d += length
|
||||||
|
// s += length
|
||||||
|
ADD R_LEN, R_DST, R_DST
|
||||||
|
ADD R_LEN, R_SRC, R_SRC
|
||||||
|
B loop
|
||||||
|
|
||||||
|
tagLit60Plus:
|
||||||
|
// !!! This fragment does the
|
||||||
|
//
|
||||||
|
// s += x - 58; if uint(s) > uint(len(src)) { etc }
|
||||||
|
//
|
||||||
|
// checks. In the asm version, we code it once instead of once per switch case.
|
||||||
|
ADD R_LEN, R_SRC, R_SRC
|
||||||
|
SUB $58, R_SRC, R_SRC
|
||||||
|
TEST_SRC()
|
||||||
|
|
||||||
|
// case x == 60:
|
||||||
|
MOVW $61, R1
|
||||||
|
CMPW R1, R_LEN
|
||||||
|
BEQ tagLit61
|
||||||
|
BGT tagLit62Plus
|
||||||
|
|
||||||
|
// x = uint32(src[s-1])
|
||||||
|
MOVBU -1(R_SRC), R_LEN
|
||||||
|
B doLit
|
||||||
|
|
||||||
|
tagLit61:
|
||||||
|
// case x == 61:
|
||||||
|
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||||
|
MOVHU -2(R_SRC), R_LEN
|
||||||
|
B doLit
|
||||||
|
|
||||||
|
tagLit62Plus:
|
||||||
|
CMPW $62, R_LEN
|
||||||
|
BHI tagLit63
|
||||||
|
|
||||||
|
// case x == 62:
|
||||||
|
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||||
|
MOVHU -3(R_SRC), R_LEN
|
||||||
|
MOVBU -1(R_SRC), R_TMP1
|
||||||
|
ORR R_TMP1<<16, R_LEN
|
||||||
|
B doLit
|
||||||
|
|
||||||
|
tagLit63:
|
||||||
|
// case x == 63:
|
||||||
|
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||||
|
MOVWU -4(R_SRC), R_LEN
|
||||||
|
B doLit
|
||||||
|
|
||||||
|
// The code above handles literal tags.
|
||||||
|
// ----------------------------------------
|
||||||
|
// The code below handles copy tags.
|
||||||
|
|
||||||
|
tagCopy4:
|
||||||
|
// case tagCopy4:
|
||||||
|
// s += 5
|
||||||
|
ADD $5, R_SRC, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
MOVD R_SRC, R_TMP1
|
||||||
|
SUB R_SBASE, R_TMP1, R_TMP1
|
||||||
|
CMP R_SLEN, R_TMP1
|
||||||
|
BGT errCorrupt
|
||||||
|
|
||||||
|
// length = 1 + int(src[s-5])>>2
|
||||||
|
MOVD $1, R1
|
||||||
|
ADD R_LEN>>2, R1, R_LEN
|
||||||
|
|
||||||
|
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||||
|
MOVWU -4(R_SRC), R_OFF
|
||||||
|
B doCopy
|
||||||
|
|
||||||
|
tagCopy2:
|
||||||
|
// case tagCopy2:
|
||||||
|
// s += 3
|
||||||
|
ADD $3, R_SRC, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
TEST_SRC()
|
||||||
|
|
||||||
|
// length = 1 + int(src[s-3])>>2
|
||||||
|
MOVD $1, R1
|
||||||
|
ADD R_LEN>>2, R1, R_LEN
|
||||||
|
|
||||||
|
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||||
|
MOVHU -2(R_SRC), R_OFF
|
||||||
|
B doCopy
|
||||||
|
|
||||||
|
tagCopy:
|
||||||
|
// We have a copy tag. We assume that:
|
||||||
|
// - R_TMP1 == src[s] & 0x03
|
||||||
|
// - R_LEN == src[s]
|
||||||
|
CMP $2, R_TMP1
|
||||||
|
BEQ tagCopy2
|
||||||
|
BGT tagCopy4
|
||||||
|
|
||||||
|
// case tagCopy1:
|
||||||
|
// s += 2
|
||||||
|
ADD $2, R_SRC, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
TEST_SRC()
|
||||||
|
|
||||||
|
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||||
|
// Calculate offset in R_TMP0 in case it is a repeat.
|
||||||
|
MOVD R_LEN, R_TMP0
|
||||||
|
AND $0xe0, R_TMP0
|
||||||
|
MOVBU -1(R_SRC), R_TMP1
|
||||||
|
ORR R_TMP0<<3, R_TMP1, R_TMP0
|
||||||
|
|
||||||
|
// length = 4 + int(src[s-2])>>2&0x7
|
||||||
|
MOVD $7, R1
|
||||||
|
AND R_LEN>>2, R1, R_LEN
|
||||||
|
ADD $4, R_LEN, R_LEN
|
||||||
|
|
||||||
|
// check if repeat code with offset 0.
|
||||||
|
CMP $0, R_TMP0
|
||||||
|
BEQ repeatCode
|
||||||
|
|
||||||
|
// This is a regular copy, transfer our temporary value to R_OFF (offset)
|
||||||
|
MOVD R_TMP0, R_OFF
|
||||||
|
B doCopy
|
||||||
|
|
||||||
|
// This is a repeat code.
|
||||||
|
repeatCode:
|
||||||
|
// If length < 9, reuse last offset, with the length already calculated.
|
||||||
|
CMP $9, R_LEN
|
||||||
|
BLT doCopyRepeat
|
||||||
|
BEQ repeatLen1
|
||||||
|
CMP $10, R_LEN
|
||||||
|
BEQ repeatLen2
|
||||||
|
|
||||||
|
repeatLen3:
|
||||||
|
// s +=3
|
||||||
|
ADD $3, R_SRC, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
TEST_SRC()
|
||||||
|
|
||||||
|
// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + 65540
|
||||||
|
MOVBU -1(R_SRC), R_TMP0
|
||||||
|
MOVHU -3(R_SRC), R_LEN
|
||||||
|
ORR R_TMP0<<16, R_LEN, R_LEN
|
||||||
|
ADD $65540, R_LEN, R_LEN
|
||||||
|
B doCopyRepeat
|
||||||
|
|
||||||
|
repeatLen2:
|
||||||
|
// s +=2
|
||||||
|
ADD $2, R_SRC, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
TEST_SRC()
|
||||||
|
|
||||||
|
// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + 260
|
||||||
|
MOVHU -2(R_SRC), R_LEN
|
||||||
|
ADD $260, R_LEN, R_LEN
|
||||||
|
B doCopyRepeat
|
||||||
|
|
||||||
|
repeatLen1:
|
||||||
|
// s +=1
|
||||||
|
ADD $1, R_SRC, R_SRC
|
||||||
|
|
||||||
|
// if uint(s) > uint(len(src)) { etc }
|
||||||
|
TEST_SRC()
|
||||||
|
|
||||||
|
// length = src[s-1] + 8
|
||||||
|
MOVBU -1(R_SRC), R_LEN
|
||||||
|
ADD $8, R_LEN, R_LEN
|
||||||
|
B doCopyRepeat
|
||||||
|
|
||||||
|
doCopy:
|
||||||
|
// This is the end of the outer "switch", when we have a copy tag.
|
||||||
|
//
|
||||||
|
// We assume that:
|
||||||
|
// - R_LEN == length && R_LEN > 0
|
||||||
|
// - R_OFF == offset
|
||||||
|
|
||||||
|
// if d < offset { etc }
|
||||||
|
MOVD R_DST, R_TMP1
|
||||||
|
SUB R_DBASE, R_TMP1, R_TMP1
|
||||||
|
CMP R_OFF, R_TMP1
|
||||||
|
BLT errCorrupt
|
||||||
|
|
||||||
|
// Repeat values can skip the test above, since any offset > 0 will be in dst.
|
||||||
|
doCopyRepeat:
|
||||||
|
|
||||||
|
// if offset <= 0 { etc }
|
||||||
|
CMP $0, R_OFF
|
||||||
|
BLE errCorrupt
|
||||||
|
|
||||||
|
// if length > len(dst)-d { etc }
|
||||||
|
MOVD R_DEND, R_TMP1
|
||||||
|
SUB R_DST, R_TMP1, R_TMP1
|
||||||
|
CMP R_TMP1, R_LEN
|
||||||
|
BGT errCorrupt
|
||||||
|
|
||||||
|
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
|
||||||
|
//
|
||||||
|
// Set:
|
||||||
|
// - R_TMP2 = len(dst)-d
|
||||||
|
// - R_TMP3 = &dst[d-offset]
|
||||||
|
MOVD R_DEND, R_TMP2
|
||||||
|
SUB R_DST, R_TMP2, R_TMP2
|
||||||
|
MOVD R_DST, R_TMP3
|
||||||
|
SUB R_OFF, R_TMP3, R_TMP3
|
||||||
|
|
||||||
|
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
|
||||||
|
//
|
||||||
|
// First, try using two 8-byte load/stores, similar to the doLit technique
|
||||||
|
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
|
||||||
|
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
|
||||||
|
// and not one 16-byte load/store, and the first store has to be before the
|
||||||
|
// second load, due to the overlap if offset is in the range [8, 16).
|
||||||
|
//
|
||||||
|
// if length > 16 || offset < 8 || len(dst)-d < 16 {
|
||||||
|
// goto slowForwardCopy
|
||||||
|
// }
|
||||||
|
// copy 16 bytes
|
||||||
|
// d += length
|
||||||
|
CMP $16, R_LEN
|
||||||
|
BGT slowForwardCopy
|
||||||
|
CMP $8, R_OFF
|
||||||
|
BLT slowForwardCopy
|
||||||
|
CMP $16, R_TMP2
|
||||||
|
BLT slowForwardCopy
|
||||||
|
MOVD 0(R_TMP3), R_TMP0
|
||||||
|
MOVD R_TMP0, 0(R_DST)
|
||||||
|
MOVD 8(R_TMP3), R_TMP1
|
||||||
|
MOVD R_TMP1, 8(R_DST)
|
||||||
|
ADD R_LEN, R_DST, R_DST
|
||||||
|
B loop
|
||||||
|
|
||||||
|
slowForwardCopy:
|
||||||
|
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
|
||||||
|
// can still try 8-byte load stores, provided we can overrun up to 10 extra
|
||||||
|
// bytes. As above, the overrun will be fixed up by subsequent iterations
|
||||||
|
// of the outermost loop.
|
||||||
|
//
|
||||||
|
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
|
||||||
|
// commentary says:
|
||||||
|
//
|
||||||
|
// ----
|
||||||
|
//
|
||||||
|
// The main part of this loop is a simple copy of eight bytes at a time
|
||||||
|
// until we've copied (at least) the requested amount of bytes. However,
|
||||||
|
// if d and d-offset are less than eight bytes apart (indicating a
|
||||||
|
// repeating pattern of length < 8), we first need to expand the pattern in
|
||||||
|
// order to get the correct results. For instance, if the buffer looks like
|
||||||
|
// this, with the eight-byte <d-offset> and <d> patterns marked as
|
||||||
|
// intervals:
|
||||||
|
//
|
||||||
|
// abxxxxxxxxxxxx
|
||||||
|
// [------] d-offset
|
||||||
|
// [------] d
|
||||||
|
//
|
||||||
|
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
|
||||||
|
// once, after which we can move <d> two bytes without moving <d-offset>:
|
||||||
|
//
|
||||||
|
// ababxxxxxxxxxx
|
||||||
|
// [------] d-offset
|
||||||
|
// [------] d
|
||||||
|
//
|
||||||
|
// and repeat the exercise until the two no longer overlap.
|
||||||
|
//
|
||||||
|
// This allows us to do very well in the special case of one single byte
|
||||||
|
// repeated many times, without taking a big hit for more general cases.
|
||||||
|
//
|
||||||
|
// The worst case of extra writing past the end of the match occurs when
|
||||||
|
// offset == 1 and length == 1; the last copy will read from byte positions
|
||||||
|
// [0..7] and write to [4..11], whereas it was only supposed to write to
|
||||||
|
// position 1. Thus, ten excess bytes.
|
||||||
|
//
|
||||||
|
// ----
|
||||||
|
//
|
||||||
|
// That "10 byte overrun" worst case is confirmed by Go's
|
||||||
|
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
|
||||||
|
// and finishSlowForwardCopy algorithm.
|
||||||
|
//
|
||||||
|
// if length > len(dst)-d-10 {
|
||||||
|
// goto verySlowForwardCopy
|
||||||
|
// }
|
||||||
|
SUB $10, R_TMP2, R_TMP2
|
||||||
|
CMP R_TMP2, R_LEN
|
||||||
|
BGT verySlowForwardCopy
|
||||||
|
|
||||||
|
// We want to keep the offset, so we use R_TMP2 from here.
|
||||||
|
MOVD R_OFF, R_TMP2
|
||||||
|
|
||||||
|
makeOffsetAtLeast8:
|
||||||
|
// !!! As above, expand the pattern so that offset >= 8 and we can use
|
||||||
|
// 8-byte load/stores.
|
||||||
|
//
|
||||||
|
// for offset < 8 {
|
||||||
|
// copy 8 bytes from dst[d-offset:] to dst[d:]
|
||||||
|
// length -= offset
|
||||||
|
// d += offset
|
||||||
|
// offset += offset
|
||||||
|
// // The two previous lines together means that d-offset, and therefore
|
||||||
|
// // R_TMP3, is unchanged.
|
||||||
|
// }
|
||||||
|
CMP $8, R_TMP2
|
||||||
|
BGE fixUpSlowForwardCopy
|
||||||
|
MOVD (R_TMP3), R_TMP1
|
||||||
|
MOVD R_TMP1, (R_DST)
|
||||||
|
SUB R_TMP2, R_LEN, R_LEN
|
||||||
|
ADD R_TMP2, R_DST, R_DST
|
||||||
|
ADD R_TMP2, R_TMP2, R_TMP2
|
||||||
|
B makeOffsetAtLeast8
|
||||||
|
|
||||||
|
fixUpSlowForwardCopy:
|
||||||
|
// !!! Add length (which might be negative now) to d (implied by R_DST being
|
||||||
|
// &dst[d]) so that d ends up at the right place when we jump back to the
|
||||||
|
// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
|
||||||
|
// length is positive, copying the remaining length bytes will write to the
|
||||||
|
// right place.
|
||||||
|
MOVD R_DST, R_TMP0
|
||||||
|
ADD R_LEN, R_DST, R_DST
|
||||||
|
|
||||||
|
finishSlowForwardCopy:
|
||||||
|
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
|
||||||
|
// length means that we overrun, but as above, that will be fixed up by
|
||||||
|
// subsequent iterations of the outermost loop.
|
||||||
|
MOVD $0, R1
|
||||||
|
CMP R1, R_LEN
|
||||||
|
BLE loop
|
||||||
|
MOVD (R_TMP3), R_TMP1
|
||||||
|
MOVD R_TMP1, (R_TMP0)
|
||||||
|
ADD $8, R_TMP3, R_TMP3
|
||||||
|
ADD $8, R_TMP0, R_TMP0
|
||||||
|
SUB $8, R_LEN, R_LEN
|
||||||
|
B finishSlowForwardCopy
|
||||||
|
|
||||||
|
verySlowForwardCopy:
|
||||||
|
// verySlowForwardCopy is a simple implementation of forward copy. In C
|
||||||
|
// parlance, this is a do/while loop instead of a while loop, since we know
|
||||||
|
// that length > 0. In Go syntax:
|
||||||
|
//
|
||||||
|
// for {
|
||||||
|
// dst[d] = dst[d - offset]
|
||||||
|
// d++
|
||||||
|
// length--
|
||||||
|
// if length == 0 {
|
||||||
|
// break
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
MOVB (R_TMP3), R_TMP1
|
||||||
|
MOVB R_TMP1, (R_DST)
|
||||||
|
ADD $1, R_TMP3, R_TMP3
|
||||||
|
ADD $1, R_DST, R_DST
|
||||||
|
SUB $1, R_LEN, R_LEN
|
||||||
|
CBNZ R_LEN, verySlowForwardCopy
|
||||||
|
B loop
|
||||||
|
|
||||||
|
// The code above handles copy tags.
|
||||||
|
// ----------------------------------------
|
||||||
|
|
||||||
|
end:
|
||||||
|
// This is the end of the "for s < len(src)".
|
||||||
|
//
|
||||||
|
// if d != len(dst) { etc }
|
||||||
|
CMP R_DEND, R_DST
|
||||||
|
BNE errCorrupt
|
||||||
|
|
||||||
|
// return 0
|
||||||
|
MOVD $0, ret+48(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
errCorrupt:
|
||||||
|
// return decodeErrCodeCorrupt
|
||||||
|
MOVD $1, R_TMP0
|
||||||
|
MOVD R_TMP0, ret+48(FP)
|
||||||
|
RET
|
17
vendor/github.com/klauspost/compress/s2/decode_asm.go
generated
vendored
Normal file
17
vendor/github.com/klauspost/compress/s2/decode_asm.go
generated
vendored
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||||
|
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:build (amd64 || arm64) && !appengine && gc && !noasm
|
||||||
|
// +build amd64 arm64
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !noasm
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
// decode has the same semantics as in decode_other.go.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func s2Decode(dst, src []byte) int
|
267
vendor/github.com/klauspost/compress/s2/decode_other.go
generated
vendored
Normal file
267
vendor/github.com/klauspost/compress/s2/decode_other.go
generated
vendored
Normal file
|
@ -0,0 +1,267 @@
|
||||||
|
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||||
|
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
//go:build (!amd64 && !arm64) || appengine || !gc || noasm
|
||||||
|
// +build !amd64,!arm64 appengine !gc noasm
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
// decode writes the decoding of src to dst. It assumes that the varint-encoded
|
||||||
|
// length of the decompressed bytes has already been read, and that len(dst)
|
||||||
|
// equals that length.
|
||||||
|
//
|
||||||
|
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
|
||||||
|
func s2Decode(dst, src []byte) int {
|
||||||
|
const debug = false
|
||||||
|
if debug {
|
||||||
|
fmt.Println("Starting decode, dst len:", len(dst))
|
||||||
|
}
|
||||||
|
var d, s, length int
|
||||||
|
offset := 0
|
||||||
|
|
||||||
|
// As long as we can read at least 5 bytes...
|
||||||
|
for s < len(src)-5 {
|
||||||
|
switch src[s] & 0x03 {
|
||||||
|
case tagLiteral:
|
||||||
|
x := uint32(src[s] >> 2)
|
||||||
|
switch {
|
||||||
|
case x < 60:
|
||||||
|
s++
|
||||||
|
case x == 60:
|
||||||
|
s += 2
|
||||||
|
x = uint32(src[s-1])
|
||||||
|
case x == 61:
|
||||||
|
s += 3
|
||||||
|
x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||||
|
case x == 62:
|
||||||
|
s += 4
|
||||||
|
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||||
|
case x == 63:
|
||||||
|
s += 5
|
||||||
|
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||||
|
}
|
||||||
|
length = int(x) + 1
|
||||||
|
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
if debug {
|
||||||
|
fmt.Println("literals, length:", length, "d-after:", d+length)
|
||||||
|
}
|
||||||
|
|
||||||
|
copy(dst[d:], src[s:s+length])
|
||||||
|
d += length
|
||||||
|
s += length
|
||||||
|
continue
|
||||||
|
|
||||||
|
case tagCopy1:
|
||||||
|
s += 2
|
||||||
|
length = int(src[s-2]) >> 2 & 0x7
|
||||||
|
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||||
|
if toffset == 0 {
|
||||||
|
if debug {
|
||||||
|
fmt.Print("(repeat) ")
|
||||||
|
}
|
||||||
|
// keep last offset
|
||||||
|
switch length {
|
||||||
|
case 5:
|
||||||
|
s += 1
|
||||||
|
length = int(uint32(src[s-1])) + 4
|
||||||
|
case 6:
|
||||||
|
s += 2
|
||||||
|
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
|
||||||
|
case 7:
|
||||||
|
s += 3
|
||||||
|
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
|
||||||
|
default: // 0-> 4
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
offset = toffset
|
||||||
|
}
|
||||||
|
length += 4
|
||||||
|
case tagCopy2:
|
||||||
|
s += 3
|
||||||
|
length = 1 + int(src[s-3])>>2
|
||||||
|
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||||
|
|
||||||
|
case tagCopy4:
|
||||||
|
s += 5
|
||||||
|
length = 1 + int(src[s-5])>>2
|
||||||
|
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||||
|
}
|
||||||
|
|
||||||
|
if offset <= 0 || d < offset || length > len(dst)-d {
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
|
||||||
|
if debug {
|
||||||
|
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy from an earlier sub-slice of dst to a later sub-slice.
|
||||||
|
// If no overlap, use the built-in copy:
|
||||||
|
if offset > length {
|
||||||
|
copy(dst[d:d+length], dst[d-offset:])
|
||||||
|
d += length
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unlike the built-in copy function, this byte-by-byte copy always runs
|
||||||
|
// forwards, even if the slices overlap. Conceptually, this is:
|
||||||
|
//
|
||||||
|
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
|
||||||
|
//
|
||||||
|
// We align the slices into a and b and show the compiler they are the same size.
|
||||||
|
// This allows the loop to run without bounds checks.
|
||||||
|
a := dst[d : d+length]
|
||||||
|
b := dst[d-offset:]
|
||||||
|
b = b[:len(a)]
|
||||||
|
for i := range a {
|
||||||
|
a[i] = b[i]
|
||||||
|
}
|
||||||
|
d += length
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remaining with extra checks...
|
||||||
|
for s < len(src) {
|
||||||
|
switch src[s] & 0x03 {
|
||||||
|
case tagLiteral:
|
||||||
|
x := uint32(src[s] >> 2)
|
||||||
|
switch {
|
||||||
|
case x < 60:
|
||||||
|
s++
|
||||||
|
case x == 60:
|
||||||
|
s += 2
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
x = uint32(src[s-1])
|
||||||
|
case x == 61:
|
||||||
|
s += 3
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||||
|
case x == 62:
|
||||||
|
s += 4
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||||
|
case x == 63:
|
||||||
|
s += 5
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||||
|
}
|
||||||
|
length = int(x) + 1
|
||||||
|
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
if debug {
|
||||||
|
fmt.Println("literals, length:", length, "d-after:", d+length)
|
||||||
|
}
|
||||||
|
|
||||||
|
copy(dst[d:], src[s:s+length])
|
||||||
|
d += length
|
||||||
|
s += length
|
||||||
|
continue
|
||||||
|
|
||||||
|
case tagCopy1:
|
||||||
|
s += 2
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
length = int(src[s-2]) >> 2 & 0x7
|
||||||
|
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||||
|
if toffset == 0 {
|
||||||
|
if debug {
|
||||||
|
fmt.Print("(repeat) ")
|
||||||
|
}
|
||||||
|
// keep last offset
|
||||||
|
switch length {
|
||||||
|
case 5:
|
||||||
|
s += 1
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
length = int(uint32(src[s-1])) + 4
|
||||||
|
case 6:
|
||||||
|
s += 2
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
|
||||||
|
case 7:
|
||||||
|
s += 3
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
|
||||||
|
default: // 0-> 4
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
offset = toffset
|
||||||
|
}
|
||||||
|
length += 4
|
||||||
|
case tagCopy2:
|
||||||
|
s += 3
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
length = 1 + int(src[s-3])>>2
|
||||||
|
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||||
|
|
||||||
|
case tagCopy4:
|
||||||
|
s += 5
|
||||||
|
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
length = 1 + int(src[s-5])>>2
|
||||||
|
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||||
|
}
|
||||||
|
|
||||||
|
if offset <= 0 || d < offset || length > len(dst)-d {
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
|
||||||
|
if debug {
|
||||||
|
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy from an earlier sub-slice of dst to a later sub-slice.
|
||||||
|
// If no overlap, use the built-in copy:
|
||||||
|
if offset > length {
|
||||||
|
copy(dst[d:d+length], dst[d-offset:])
|
||||||
|
d += length
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unlike the built-in copy function, this byte-by-byte copy always runs
|
||||||
|
// forwards, even if the slices overlap. Conceptually, this is:
|
||||||
|
//
|
||||||
|
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
|
||||||
|
//
|
||||||
|
// We align the slices into a and b and show the compiler they are the same size.
|
||||||
|
// This allows the loop to run without bounds checks.
|
||||||
|
a := dst[d : d+length]
|
||||||
|
b := dst[d-offset:]
|
||||||
|
b = b[:len(a)]
|
||||||
|
for i := range a {
|
||||||
|
a[i] = b[i]
|
||||||
|
}
|
||||||
|
d += length
|
||||||
|
}
|
||||||
|
|
||||||
|
if d != len(dst) {
|
||||||
|
return decodeErrCodeCorrupt
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
1341
vendor/github.com/klauspost/compress/s2/encode.go
generated
vendored
Normal file
1341
vendor/github.com/klauspost/compress/s2/encode.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
456
vendor/github.com/klauspost/compress/s2/encode_all.go
generated
vendored
Normal file
456
vendor/github.com/klauspost/compress/s2/encode_all.go
generated
vendored
Normal file
|
@ -0,0 +1,456 @@
|
||||||
|
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||||
|
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"math/bits"
|
||||||
|
)
|
||||||
|
|
||||||
|
func load32(b []byte, i int) uint32 {
|
||||||
|
return binary.LittleEndian.Uint32(b[i:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func load64(b []byte, i int) uint64 {
|
||||||
|
return binary.LittleEndian.Uint64(b[i:])
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <64.
|
||||||
|
func hash6(u uint64, h uint8) uint32 {
|
||||||
|
const prime6bytes = 227718039650203
|
||||||
|
return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
|
||||||
|
}
|
||||||
|
|
||||||
|
func encodeGo(dst, src []byte) []byte {
|
||||||
|
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||||
|
panic(ErrTooLarge)
|
||||||
|
} else if len(dst) < n {
|
||||||
|
dst = make([]byte, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||||
|
d := binary.PutUvarint(dst, uint64(len(src)))
|
||||||
|
|
||||||
|
if len(src) == 0 {
|
||||||
|
return dst[:d]
|
||||||
|
}
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
d += emitLiteral(dst[d:], src)
|
||||||
|
return dst[:d]
|
||||||
|
}
|
||||||
|
n := encodeBlockGo(dst[d:], src)
|
||||||
|
if n > 0 {
|
||||||
|
d += n
|
||||||
|
return dst[:d]
|
||||||
|
}
|
||||||
|
// Not compressible
|
||||||
|
d += emitLiteral(dst[d:], src)
|
||||||
|
return dst[:d]
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlockGo(dst, src []byte) (d int) {
|
||||||
|
// Initialize the hash table.
|
||||||
|
const (
|
||||||
|
tableBits = 14
|
||||||
|
maxTableSize = 1 << tableBits
|
||||||
|
|
||||||
|
debug = false
|
||||||
|
)
|
||||||
|
|
||||||
|
var table [maxTableSize]uint32
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := len(src) - inputMargin
|
||||||
|
|
||||||
|
// Bail if we can't compress to at least this.
|
||||||
|
dstLimit := len(src) - len(src)>>5 - 5
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
nextEmit := 0
|
||||||
|
|
||||||
|
// The encoded form must start with a literal, as there are no previous
|
||||||
|
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||||
|
s := 1
|
||||||
|
cv := load64(src, s)
|
||||||
|
|
||||||
|
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
|
||||||
|
repeat := 1
|
||||||
|
|
||||||
|
for {
|
||||||
|
candidate := 0
|
||||||
|
for {
|
||||||
|
// Next src position to check
|
||||||
|
nextS := s + (s-nextEmit)>>6 + 4
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
hash0 := hash6(cv, tableBits)
|
||||||
|
hash1 := hash6(cv>>8, tableBits)
|
||||||
|
candidate = int(table[hash0])
|
||||||
|
candidate2 := int(table[hash1])
|
||||||
|
table[hash0] = uint32(s)
|
||||||
|
table[hash1] = uint32(s + 1)
|
||||||
|
hash2 := hash6(cv>>16, tableBits)
|
||||||
|
|
||||||
|
// Check repeat at offset checkRep.
|
||||||
|
const checkRep = 1
|
||||||
|
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
|
||||||
|
base := s + checkRep
|
||||||
|
// Extend back
|
||||||
|
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
|
||||||
|
i--
|
||||||
|
base--
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||||
|
|
||||||
|
// Extend forward
|
||||||
|
candidate := s - repeat + 4 + checkRep
|
||||||
|
s += 4 + checkRep
|
||||||
|
for s <= sLimit {
|
||||||
|
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||||
|
s += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
candidate += 8
|
||||||
|
}
|
||||||
|
if debug {
|
||||||
|
// Validate match.
|
||||||
|
if s <= candidate {
|
||||||
|
panic("s <= candidate")
|
||||||
|
}
|
||||||
|
a := src[base:s]
|
||||||
|
b := src[base-repeat : base-repeat+(s-base)]
|
||||||
|
if !bytes.Equal(a, b) {
|
||||||
|
panic("mismatch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if nextEmit > 0 {
|
||||||
|
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
|
||||||
|
d += emitRepeat(dst[d:], repeat, s-base)
|
||||||
|
} else {
|
||||||
|
// First match, cannot be repeat.
|
||||||
|
d += emitCopy(dst[d:], repeat, s-base)
|
||||||
|
}
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, s)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if uint32(cv) == load32(src, candidate) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
candidate = int(table[hash2])
|
||||||
|
if uint32(cv>>8) == load32(src, candidate2) {
|
||||||
|
table[hash2] = uint32(s + 2)
|
||||||
|
candidate = candidate2
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
table[hash2] = uint32(s + 2)
|
||||||
|
if uint32(cv>>16) == load32(src, candidate) {
|
||||||
|
s += 2
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, nextS)
|
||||||
|
s = nextS
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend backwards.
|
||||||
|
// The top bytes will be rechecked to get the full match.
|
||||||
|
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
|
||||||
|
candidate--
|
||||||
|
s--
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+(s-nextEmit) > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:s])
|
||||||
|
|
||||||
|
// Call emitCopy, and then see if another emitCopy could be our next
|
||||||
|
// move. Repeat until we find no match for the input immediately after
|
||||||
|
// what was consumed by the last emitCopy call.
|
||||||
|
//
|
||||||
|
// If we exit this loop normally then we need to call emitLiteral next,
|
||||||
|
// though we don't yet know how big the literal will be. We handle that
|
||||||
|
// by proceeding to the next iteration of the main loop. We also can
|
||||||
|
// exit this loop via goto if we get close to exhausting the input.
|
||||||
|
for {
|
||||||
|
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||||
|
// literal bytes prior to s.
|
||||||
|
base := s
|
||||||
|
repeat = base - candidate
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
s += 4
|
||||||
|
candidate += 4
|
||||||
|
for s <= len(src)-8 {
|
||||||
|
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||||
|
s += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
candidate += 8
|
||||||
|
}
|
||||||
|
|
||||||
|
d += emitCopy(dst[d:], repeat, s-base)
|
||||||
|
if debug {
|
||||||
|
// Validate match.
|
||||||
|
if s <= candidate {
|
||||||
|
panic("s <= candidate")
|
||||||
|
}
|
||||||
|
a := src[base:s]
|
||||||
|
b := src[base-repeat : base-repeat+(s-base)]
|
||||||
|
if !bytes.Equal(a, b) {
|
||||||
|
panic("mismatch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
if d > dstLimit {
|
||||||
|
// Do we have space for more, if not bail.
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Check for an immediate match, otherwise start search at s+1
|
||||||
|
x := load64(src, s-2)
|
||||||
|
m2Hash := hash6(x, tableBits)
|
||||||
|
currHash := hash6(x>>16, tableBits)
|
||||||
|
candidate = int(table[currHash])
|
||||||
|
table[m2Hash] = uint32(s - 2)
|
||||||
|
table[currHash] = uint32(s)
|
||||||
|
if debug && s == candidate {
|
||||||
|
panic("s == candidate")
|
||||||
|
}
|
||||||
|
if uint32(x>>16) != load32(src, candidate) {
|
||||||
|
cv = load64(src, s+1)
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if nextEmit < len(src) {
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+len(src)-nextEmit > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
func encodeBlockSnappyGo(dst, src []byte) (d int) {
|
||||||
|
// Initialize the hash table.
|
||||||
|
const (
|
||||||
|
tableBits = 14
|
||||||
|
maxTableSize = 1 << tableBits
|
||||||
|
)
|
||||||
|
|
||||||
|
var table [maxTableSize]uint32
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := len(src) - inputMargin
|
||||||
|
|
||||||
|
// Bail if we can't compress to at least this.
|
||||||
|
dstLimit := len(src) - len(src)>>5 - 5
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
nextEmit := 0
|
||||||
|
|
||||||
|
// The encoded form must start with a literal, as there are no previous
|
||||||
|
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||||
|
s := 1
|
||||||
|
cv := load64(src, s)
|
||||||
|
|
||||||
|
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
|
||||||
|
repeat := 1
|
||||||
|
|
||||||
|
for {
|
||||||
|
candidate := 0
|
||||||
|
for {
|
||||||
|
// Next src position to check
|
||||||
|
nextS := s + (s-nextEmit)>>6 + 4
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
hash0 := hash6(cv, tableBits)
|
||||||
|
hash1 := hash6(cv>>8, tableBits)
|
||||||
|
candidate = int(table[hash0])
|
||||||
|
candidate2 := int(table[hash1])
|
||||||
|
table[hash0] = uint32(s)
|
||||||
|
table[hash1] = uint32(s + 1)
|
||||||
|
hash2 := hash6(cv>>16, tableBits)
|
||||||
|
|
||||||
|
// Check repeat at offset checkRep.
|
||||||
|
const checkRep = 1
|
||||||
|
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
|
||||||
|
base := s + checkRep
|
||||||
|
// Extend back
|
||||||
|
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
|
||||||
|
i--
|
||||||
|
base--
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||||
|
|
||||||
|
// Extend forward
|
||||||
|
candidate := s - repeat + 4 + checkRep
|
||||||
|
s += 4 + checkRep
|
||||||
|
for s <= sLimit {
|
||||||
|
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||||
|
s += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
candidate += 8
|
||||||
|
}
|
||||||
|
|
||||||
|
d += emitCopyNoRepeat(dst[d:], repeat, s-base)
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, s)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if uint32(cv) == load32(src, candidate) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
candidate = int(table[hash2])
|
||||||
|
if uint32(cv>>8) == load32(src, candidate2) {
|
||||||
|
table[hash2] = uint32(s + 2)
|
||||||
|
candidate = candidate2
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
table[hash2] = uint32(s + 2)
|
||||||
|
if uint32(cv>>16) == load32(src, candidate) {
|
||||||
|
s += 2
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, nextS)
|
||||||
|
s = nextS
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
|
||||||
|
candidate--
|
||||||
|
s--
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+(s-nextEmit) > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||||
|
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||||
|
// them as literal bytes.
|
||||||
|
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:s])
|
||||||
|
|
||||||
|
// Call emitCopy, and then see if another emitCopy could be our next
|
||||||
|
// move. Repeat until we find no match for the input immediately after
|
||||||
|
// what was consumed by the last emitCopy call.
|
||||||
|
//
|
||||||
|
// If we exit this loop normally then we need to call emitLiteral next,
|
||||||
|
// though we don't yet know how big the literal will be. We handle that
|
||||||
|
// by proceeding to the next iteration of the main loop. We also can
|
||||||
|
// exit this loop via goto if we get close to exhausting the input.
|
||||||
|
for {
|
||||||
|
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||||
|
// literal bytes prior to s.
|
||||||
|
base := s
|
||||||
|
repeat = base - candidate
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
s += 4
|
||||||
|
candidate += 4
|
||||||
|
for s <= len(src)-8 {
|
||||||
|
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||||
|
s += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
candidate += 8
|
||||||
|
}
|
||||||
|
|
||||||
|
d += emitCopyNoRepeat(dst[d:], repeat, s-base)
|
||||||
|
if false {
|
||||||
|
// Validate match.
|
||||||
|
a := src[base:s]
|
||||||
|
b := src[base-repeat : base-repeat+(s-base)]
|
||||||
|
if !bytes.Equal(a, b) {
|
||||||
|
panic("mismatch")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
if d > dstLimit {
|
||||||
|
// Do we have space for more, if not bail.
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Check for an immediate match, otherwise start search at s+1
|
||||||
|
x := load64(src, s-2)
|
||||||
|
m2Hash := hash6(x, tableBits)
|
||||||
|
currHash := hash6(x>>16, tableBits)
|
||||||
|
candidate = int(table[currHash])
|
||||||
|
table[m2Hash] = uint32(s - 2)
|
||||||
|
table[currHash] = uint32(s)
|
||||||
|
if uint32(x>>16) != load32(src, candidate) {
|
||||||
|
cv = load64(src, s+1)
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if nextEmit < len(src) {
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+len(src)-nextEmit > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
142
vendor/github.com/klauspost/compress/s2/encode_amd64.go
generated
vendored
Normal file
142
vendor/github.com/klauspost/compress/s2/encode_amd64.go
generated
vendored
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
//go:build !appengine && !noasm && gc
|
||||||
|
// +build !appengine,!noasm,gc
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlock(dst, src []byte) (d int) {
|
||||||
|
const (
|
||||||
|
// Use 12 bit table when less than...
|
||||||
|
limit12B = 16 << 10
|
||||||
|
// Use 10 bit table when less than...
|
||||||
|
limit10B = 4 << 10
|
||||||
|
// Use 8 bit table when less than...
|
||||||
|
limit8B = 512
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(src) >= 4<<20 {
|
||||||
|
return encodeBlockAsm(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit12B {
|
||||||
|
return encodeBlockAsm4MB(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit10B {
|
||||||
|
return encodeBlockAsm12B(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit8B {
|
||||||
|
return encodeBlockAsm10B(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return encodeBlockAsm8B(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlockBetter(dst, src []byte) (d int) {
|
||||||
|
const (
|
||||||
|
// Use 12 bit table when less than...
|
||||||
|
limit12B = 16 << 10
|
||||||
|
// Use 10 bit table when less than...
|
||||||
|
limit10B = 4 << 10
|
||||||
|
// Use 8 bit table when less than...
|
||||||
|
limit8B = 512
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(src) > 4<<20 {
|
||||||
|
return encodeBetterBlockAsm(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit12B {
|
||||||
|
return encodeBetterBlockAsm4MB(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit10B {
|
||||||
|
return encodeBetterBlockAsm12B(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit8B {
|
||||||
|
return encodeBetterBlockAsm10B(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return encodeBetterBlockAsm8B(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlockSnappy(dst, src []byte) (d int) {
|
||||||
|
const (
|
||||||
|
// Use 12 bit table when less than...
|
||||||
|
limit12B = 16 << 10
|
||||||
|
// Use 10 bit table when less than...
|
||||||
|
limit10B = 4 << 10
|
||||||
|
// Use 8 bit table when less than...
|
||||||
|
limit8B = 512
|
||||||
|
)
|
||||||
|
if len(src) >= 64<<10 {
|
||||||
|
return encodeSnappyBlockAsm(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit12B {
|
||||||
|
return encodeSnappyBlockAsm64K(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit10B {
|
||||||
|
return encodeSnappyBlockAsm12B(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit8B {
|
||||||
|
return encodeSnappyBlockAsm10B(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return encodeSnappyBlockAsm8B(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlockBetterSnappy(dst, src []byte) (d int) {
|
||||||
|
const (
|
||||||
|
// Use 12 bit table when less than...
|
||||||
|
limit12B = 16 << 10
|
||||||
|
// Use 10 bit table when less than...
|
||||||
|
limit10B = 4 << 10
|
||||||
|
// Use 8 bit table when less than...
|
||||||
|
limit8B = 512
|
||||||
|
)
|
||||||
|
if len(src) >= 64<<10 {
|
||||||
|
return encodeSnappyBetterBlockAsm(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit12B {
|
||||||
|
return encodeSnappyBetterBlockAsm64K(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit10B {
|
||||||
|
return encodeSnappyBetterBlockAsm12B(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) >= limit8B {
|
||||||
|
return encodeSnappyBetterBlockAsm10B(dst, src)
|
||||||
|
}
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return encodeSnappyBetterBlockAsm8B(dst, src)
|
||||||
|
}
|
630
vendor/github.com/klauspost/compress/s2/encode_best.go
generated
vendored
Normal file
630
vendor/github.com/klauspost/compress/s2/encode_best.go
generated
vendored
Normal file
|
@ -0,0 +1,630 @@
|
||||||
|
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||||
|
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math/bits"
|
||||||
|
)
|
||||||
|
|
||||||
|
// encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlockBest(dst, src []byte) (d int) {
|
||||||
|
// Initialize the hash tables.
|
||||||
|
const (
|
||||||
|
// Long hash matches.
|
||||||
|
lTableBits = 19
|
||||||
|
maxLTableSize = 1 << lTableBits
|
||||||
|
|
||||||
|
// Short hash matches.
|
||||||
|
sTableBits = 16
|
||||||
|
maxSTableSize = 1 << sTableBits
|
||||||
|
|
||||||
|
inputMargin = 8 + 2
|
||||||
|
)
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := len(src) - inputMargin
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var lTable [maxLTableSize]uint64
|
||||||
|
var sTable [maxSTableSize]uint64
|
||||||
|
|
||||||
|
// Bail if we can't compress to at least this.
|
||||||
|
dstLimit := len(src) - 5
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
nextEmit := 0
|
||||||
|
|
||||||
|
// The encoded form must start with a literal, as there are no previous
|
||||||
|
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||||
|
s := 1
|
||||||
|
cv := load64(src, s)
|
||||||
|
|
||||||
|
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
|
||||||
|
repeat := 1
|
||||||
|
const lowbitMask = 0xffffffff
|
||||||
|
getCur := func(x uint64) int {
|
||||||
|
return int(x & lowbitMask)
|
||||||
|
}
|
||||||
|
getPrev := func(x uint64) int {
|
||||||
|
return int(x >> 32)
|
||||||
|
}
|
||||||
|
const maxSkip = 64
|
||||||
|
|
||||||
|
for {
|
||||||
|
type match struct {
|
||||||
|
offset int
|
||||||
|
s int
|
||||||
|
length int
|
||||||
|
score int
|
||||||
|
rep bool
|
||||||
|
}
|
||||||
|
var best match
|
||||||
|
for {
|
||||||
|
// Next src position to check
|
||||||
|
nextS := (s-nextEmit)>>8 + 1
|
||||||
|
if nextS > maxSkip {
|
||||||
|
nextS = s + maxSkip
|
||||||
|
} else {
|
||||||
|
nextS += s
|
||||||
|
}
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
hashL := hash8(cv, lTableBits)
|
||||||
|
hashS := hash4(cv, sTableBits)
|
||||||
|
candidateL := lTable[hashL]
|
||||||
|
candidateS := sTable[hashS]
|
||||||
|
|
||||||
|
score := func(m match) int {
|
||||||
|
// Matches that are longer forward are penalized since we must emit it as a literal.
|
||||||
|
score := m.length - m.s
|
||||||
|
if nextEmit == m.s {
|
||||||
|
// If we do not have to emit literals, we save 1 byte
|
||||||
|
score++
|
||||||
|
}
|
||||||
|
offset := m.s - m.offset
|
||||||
|
if m.rep {
|
||||||
|
return score - emitRepeatSize(offset, m.length)
|
||||||
|
}
|
||||||
|
return score - emitCopySize(offset, m.length)
|
||||||
|
}
|
||||||
|
|
||||||
|
matchAt := func(offset, s int, first uint32, rep bool) match {
|
||||||
|
if best.length != 0 && best.s-best.offset == s-offset {
|
||||||
|
// Don't retest if we have the same offset.
|
||||||
|
return match{offset: offset, s: s}
|
||||||
|
}
|
||||||
|
if load32(src, offset) != first {
|
||||||
|
return match{offset: offset, s: s}
|
||||||
|
}
|
||||||
|
m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
|
||||||
|
s += 4
|
||||||
|
for s <= sLimit {
|
||||||
|
if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
|
||||||
|
m.length += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
m.length += 8
|
||||||
|
}
|
||||||
|
m.length -= offset
|
||||||
|
m.score = score(m)
|
||||||
|
if m.score <= -m.s {
|
||||||
|
// Eliminate if no savings, we might find a better one.
|
||||||
|
m.length = 0
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
bestOf := func(a, b match) match {
|
||||||
|
if b.length == 0 {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
if a.length == 0 {
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
as := a.score + b.s
|
||||||
|
bs := b.score + a.s
|
||||||
|
if as >= bs {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
|
||||||
|
best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
|
||||||
|
best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
|
||||||
|
|
||||||
|
{
|
||||||
|
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
|
||||||
|
if best.length > 0 {
|
||||||
|
// s+1
|
||||||
|
nextShort := sTable[hash4(cv>>8, sTableBits)]
|
||||||
|
s := s + 1
|
||||||
|
cv := load64(src, s)
|
||||||
|
nextLong := lTable[hash8(cv, lTableBits)]
|
||||||
|
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
|
||||||
|
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
|
||||||
|
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
|
||||||
|
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
|
||||||
|
// Repeat at + 2
|
||||||
|
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
|
||||||
|
|
||||||
|
// s+2
|
||||||
|
if true {
|
||||||
|
nextShort = sTable[hash4(cv>>8, sTableBits)]
|
||||||
|
s++
|
||||||
|
cv = load64(src, s)
|
||||||
|
nextLong = lTable[hash8(cv, lTableBits)]
|
||||||
|
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
|
||||||
|
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
|
||||||
|
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
|
||||||
|
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
|
||||||
|
}
|
||||||
|
// Search for a match at best match end, see if that is better.
|
||||||
|
if sAt := best.s + best.length; sAt < sLimit {
|
||||||
|
sBack := best.s
|
||||||
|
backL := best.length
|
||||||
|
// Load initial values
|
||||||
|
cv = load64(src, sBack)
|
||||||
|
// Search for mismatch
|
||||||
|
next := lTable[hash8(load64(src, sAt), lTableBits)]
|
||||||
|
//next := sTable[hash4(load64(src, sAt), sTableBits)]
|
||||||
|
|
||||||
|
if checkAt := getCur(next) - backL; checkAt > 0 {
|
||||||
|
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
|
||||||
|
}
|
||||||
|
if checkAt := getPrev(next) - backL; checkAt > 0 {
|
||||||
|
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update table
|
||||||
|
lTable[hashL] = uint64(s) | candidateL<<32
|
||||||
|
sTable[hashS] = uint64(s) | candidateS<<32
|
||||||
|
|
||||||
|
if best.length > 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, nextS)
|
||||||
|
s = nextS
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend backwards, not needed for repeats...
|
||||||
|
s = best.s
|
||||||
|
if !best.rep {
|
||||||
|
for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
|
||||||
|
best.offset--
|
||||||
|
best.length++
|
||||||
|
s--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if false && best.offset >= s {
|
||||||
|
panic(fmt.Errorf("t %d >= s %d", best.offset, s))
|
||||||
|
}
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+(s-nextEmit) > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
base := s
|
||||||
|
offset := s - best.offset
|
||||||
|
|
||||||
|
s += best.length
|
||||||
|
|
||||||
|
if offset > 65535 && s-base <= 5 && !best.rep {
|
||||||
|
// Bail if the match is equal or worse to the encoding.
|
||||||
|
s = best.s + 1
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
cv = load64(src, s)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||||
|
if best.rep {
|
||||||
|
if nextEmit > 0 {
|
||||||
|
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
|
||||||
|
d += emitRepeat(dst[d:], offset, best.length)
|
||||||
|
} else {
|
||||||
|
// First match, cannot be repeat.
|
||||||
|
d += emitCopy(dst[d:], offset, best.length)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
d += emitCopy(dst[d:], offset, best.length)
|
||||||
|
}
|
||||||
|
repeat = offset
|
||||||
|
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
if d > dstLimit {
|
||||||
|
// Do we have space for more, if not bail.
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Fill tables...
|
||||||
|
for i := best.s + 1; i < s; i++ {
|
||||||
|
cv0 := load64(src, i)
|
||||||
|
long0 := hash8(cv0, lTableBits)
|
||||||
|
short0 := hash4(cv0, sTableBits)
|
||||||
|
lTable[long0] = uint64(i) | lTable[long0]<<32
|
||||||
|
sTable[short0] = uint64(i) | sTable[short0]<<32
|
||||||
|
}
|
||||||
|
cv = load64(src, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if nextEmit < len(src) {
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+len(src)-nextEmit > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlockBestSnappy(dst, src []byte) (d int) {
|
||||||
|
// Initialize the hash tables.
|
||||||
|
const (
|
||||||
|
// Long hash matches.
|
||||||
|
lTableBits = 19
|
||||||
|
maxLTableSize = 1 << lTableBits
|
||||||
|
|
||||||
|
// Short hash matches.
|
||||||
|
sTableBits = 16
|
||||||
|
maxSTableSize = 1 << sTableBits
|
||||||
|
|
||||||
|
inputMargin = 8 + 2
|
||||||
|
)
|
||||||
|
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := len(src) - inputMargin
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var lTable [maxLTableSize]uint64
|
||||||
|
var sTable [maxSTableSize]uint64
|
||||||
|
|
||||||
|
// Bail if we can't compress to at least this.
|
||||||
|
dstLimit := len(src) - 5
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
nextEmit := 0
|
||||||
|
|
||||||
|
// The encoded form must start with a literal, as there are no previous
|
||||||
|
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||||
|
s := 1
|
||||||
|
cv := load64(src, s)
|
||||||
|
|
||||||
|
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
|
||||||
|
repeat := 1
|
||||||
|
const lowbitMask = 0xffffffff
|
||||||
|
getCur := func(x uint64) int {
|
||||||
|
return int(x & lowbitMask)
|
||||||
|
}
|
||||||
|
getPrev := func(x uint64) int {
|
||||||
|
return int(x >> 32)
|
||||||
|
}
|
||||||
|
const maxSkip = 64
|
||||||
|
|
||||||
|
for {
|
||||||
|
type match struct {
|
||||||
|
offset int
|
||||||
|
s int
|
||||||
|
length int
|
||||||
|
score int
|
||||||
|
}
|
||||||
|
var best match
|
||||||
|
for {
|
||||||
|
// Next src position to check
|
||||||
|
nextS := (s-nextEmit)>>8 + 1
|
||||||
|
if nextS > maxSkip {
|
||||||
|
nextS = s + maxSkip
|
||||||
|
} else {
|
||||||
|
nextS += s
|
||||||
|
}
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
hashL := hash8(cv, lTableBits)
|
||||||
|
hashS := hash4(cv, sTableBits)
|
||||||
|
candidateL := lTable[hashL]
|
||||||
|
candidateS := sTable[hashS]
|
||||||
|
|
||||||
|
score := func(m match) int {
|
||||||
|
// Matches that are longer forward are penalized since we must emit it as a literal.
|
||||||
|
score := m.length - m.s
|
||||||
|
if nextEmit == m.s {
|
||||||
|
// If we do not have to emit literals, we save 1 byte
|
||||||
|
score++
|
||||||
|
}
|
||||||
|
offset := m.s - m.offset
|
||||||
|
|
||||||
|
return score - emitCopyNoRepeatSize(offset, m.length)
|
||||||
|
}
|
||||||
|
|
||||||
|
matchAt := func(offset, s int, first uint32) match {
|
||||||
|
if best.length != 0 && best.s-best.offset == s-offset {
|
||||||
|
// Don't retest if we have the same offset.
|
||||||
|
return match{offset: offset, s: s}
|
||||||
|
}
|
||||||
|
if load32(src, offset) != first {
|
||||||
|
return match{offset: offset, s: s}
|
||||||
|
}
|
||||||
|
m := match{offset: offset, s: s, length: 4 + offset}
|
||||||
|
s += 4
|
||||||
|
for s <= sLimit {
|
||||||
|
if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
|
||||||
|
m.length += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
m.length += 8
|
||||||
|
}
|
||||||
|
m.length -= offset
|
||||||
|
m.score = score(m)
|
||||||
|
if m.score <= -m.s {
|
||||||
|
// Eliminate if no savings, we might find a better one.
|
||||||
|
m.length = 0
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
bestOf := func(a, b match) match {
|
||||||
|
if b.length == 0 {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
if a.length == 0 {
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
as := a.score + b.s
|
||||||
|
bs := b.score + a.s
|
||||||
|
if as >= bs {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
|
||||||
|
best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
|
||||||
|
best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))
|
||||||
|
|
||||||
|
{
|
||||||
|
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
|
||||||
|
if best.length > 0 {
|
||||||
|
// s+1
|
||||||
|
nextShort := sTable[hash4(cv>>8, sTableBits)]
|
||||||
|
s := s + 1
|
||||||
|
cv := load64(src, s)
|
||||||
|
nextLong := lTable[hash8(cv, lTableBits)]
|
||||||
|
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
|
||||||
|
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
|
||||||
|
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
|
||||||
|
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
|
||||||
|
// Repeat at + 2
|
||||||
|
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
|
||||||
|
|
||||||
|
// s+2
|
||||||
|
if true {
|
||||||
|
nextShort = sTable[hash4(cv>>8, sTableBits)]
|
||||||
|
s++
|
||||||
|
cv = load64(src, s)
|
||||||
|
nextLong = lTable[hash8(cv, lTableBits)]
|
||||||
|
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
|
||||||
|
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
|
||||||
|
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
|
||||||
|
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
|
||||||
|
}
|
||||||
|
// Search for a match at best match end, see if that is better.
|
||||||
|
if sAt := best.s + best.length; sAt < sLimit {
|
||||||
|
sBack := best.s
|
||||||
|
backL := best.length
|
||||||
|
// Load initial values
|
||||||
|
cv = load64(src, sBack)
|
||||||
|
// Search for mismatch
|
||||||
|
next := lTable[hash8(load64(src, sAt), lTableBits)]
|
||||||
|
//next := sTable[hash4(load64(src, sAt), sTableBits)]
|
||||||
|
|
||||||
|
if checkAt := getCur(next) - backL; checkAt > 0 {
|
||||||
|
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
|
||||||
|
}
|
||||||
|
if checkAt := getPrev(next) - backL; checkAt > 0 {
|
||||||
|
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update table
|
||||||
|
lTable[hashL] = uint64(s) | candidateL<<32
|
||||||
|
sTable[hashS] = uint64(s) | candidateS<<32
|
||||||
|
|
||||||
|
if best.length > 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, nextS)
|
||||||
|
s = nextS
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend backwards, not needed for repeats...
|
||||||
|
s = best.s
|
||||||
|
if true {
|
||||||
|
for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
|
||||||
|
best.offset--
|
||||||
|
best.length++
|
||||||
|
s--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if false && best.offset >= s {
|
||||||
|
panic(fmt.Errorf("t %d >= s %d", best.offset, s))
|
||||||
|
}
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+(s-nextEmit) > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
base := s
|
||||||
|
offset := s - best.offset
|
||||||
|
|
||||||
|
s += best.length
|
||||||
|
|
||||||
|
if offset > 65535 && s-base <= 5 {
|
||||||
|
// Bail if the match is equal or worse to the encoding.
|
||||||
|
s = best.s + 1
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
cv = load64(src, s)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||||
|
d += emitCopyNoRepeat(dst[d:], offset, best.length)
|
||||||
|
repeat = offset
|
||||||
|
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
if d > dstLimit {
|
||||||
|
// Do we have space for more, if not bail.
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Fill tables...
|
||||||
|
for i := best.s + 1; i < s; i++ {
|
||||||
|
cv0 := load64(src, i)
|
||||||
|
long0 := hash8(cv0, lTableBits)
|
||||||
|
short0 := hash4(cv0, sTableBits)
|
||||||
|
lTable[long0] = uint64(i) | lTable[long0]<<32
|
||||||
|
sTable[short0] = uint64(i) | sTable[short0]<<32
|
||||||
|
}
|
||||||
|
cv = load64(src, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if nextEmit < len(src) {
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+len(src)-nextEmit > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitCopySize returns the size to encode the offset+length
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// 1 <= offset && offset <= math.MaxUint32
|
||||||
|
// 4 <= length && length <= 1 << 24
|
||||||
|
func emitCopySize(offset, length int) int {
|
||||||
|
if offset >= 65536 {
|
||||||
|
i := 0
|
||||||
|
if length > 64 {
|
||||||
|
length -= 64
|
||||||
|
if length >= 4 {
|
||||||
|
// Emit remaining as repeats
|
||||||
|
return 5 + emitRepeatSize(offset, length)
|
||||||
|
}
|
||||||
|
i = 5
|
||||||
|
}
|
||||||
|
if length == 0 {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
return i + 5
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offset no more than 2 bytes.
|
||||||
|
if length > 64 {
|
||||||
|
if offset < 2048 {
|
||||||
|
// Emit 8 bytes, then rest as repeats...
|
||||||
|
return 2 + emitRepeatSize(offset, length-8)
|
||||||
|
}
|
||||||
|
// Emit remaining as repeats, at least 4 bytes remain.
|
||||||
|
return 3 + emitRepeatSize(offset, length-60)
|
||||||
|
}
|
||||||
|
if length >= 12 || offset >= 2048 {
|
||||||
|
return 3
|
||||||
|
}
|
||||||
|
// Emit the remaining copy, encoded as 2 bytes.
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitCopyNoRepeatSize returns the size to encode the offset+length
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// 1 <= offset && offset <= math.MaxUint32
|
||||||
|
// 4 <= length && length <= 1 << 24
|
||||||
|
func emitCopyNoRepeatSize(offset, length int) int {
|
||||||
|
if offset >= 65536 {
|
||||||
|
return 5 + 5*(length/64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offset no more than 2 bytes.
|
||||||
|
if length > 64 {
|
||||||
|
// Emit remaining as repeats, at least 4 bytes remain.
|
||||||
|
return 3 + 3*(length/60)
|
||||||
|
}
|
||||||
|
if length >= 12 || offset >= 2048 {
|
||||||
|
return 3
|
||||||
|
}
|
||||||
|
// Emit the remaining copy, encoded as 2 bytes.
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitRepeatSize returns the number of bytes required to encode a repeat.
|
||||||
|
// Length must be at least 4 and < 1<<24
|
||||||
|
func emitRepeatSize(offset, length int) int {
|
||||||
|
// Repeat offset, make length cheaper
|
||||||
|
if length <= 4+4 || (length < 8+4 && offset < 2048) {
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
if length < (1<<8)+4+4 {
|
||||||
|
return 3
|
||||||
|
}
|
||||||
|
if length < (1<<16)+(1<<8)+4 {
|
||||||
|
return 4
|
||||||
|
}
|
||||||
|
const maxRepeat = (1 << 24) - 1
|
||||||
|
length -= (1 << 16) - 4
|
||||||
|
left := 0
|
||||||
|
if length > maxRepeat {
|
||||||
|
left = length - maxRepeat + 4
|
||||||
|
length = maxRepeat - 4
|
||||||
|
}
|
||||||
|
if left > 0 {
|
||||||
|
return 5 + emitRepeatSize(offset, left)
|
||||||
|
}
|
||||||
|
return 5
|
||||||
|
}
|
431
vendor/github.com/klauspost/compress/s2/encode_better.go
generated
vendored
Normal file
431
vendor/github.com/klauspost/compress/s2/encode_better.go
generated
vendored
Normal file
|
@ -0,0 +1,431 @@
|
||||||
|
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||||
|
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math/bits"
|
||||||
|
)
|
||||||
|
|
||||||
|
// hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <32.
|
||||||
|
func hash4(u uint64, h uint8) uint32 {
|
||||||
|
const prime4bytes = 2654435761
|
||||||
|
return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <64.
|
||||||
|
func hash5(u uint64, h uint8) uint32 {
|
||||||
|
const prime5bytes = 889523592379
|
||||||
|
return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <64.
|
||||||
|
func hash7(u uint64, h uint8) uint32 {
|
||||||
|
const prime7bytes = 58295818150454627
|
||||||
|
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash8 returns the hash of u to fit in a hash table with h bits.
|
||||||
|
// Preferably h should be a constant and should always be <64.
|
||||||
|
func hash8(u uint64, h uint8) uint32 {
|
||||||
|
const prime8bytes = 0xcf1bbcdcb7a56463
|
||||||
|
return uint32((u * prime8bytes) >> ((64 - h) & 63))
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlockBetterGo(dst, src []byte) (d int) {
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := len(src) - inputMargin
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the hash tables.
|
||||||
|
const (
|
||||||
|
// Long hash matches.
|
||||||
|
lTableBits = 16
|
||||||
|
maxLTableSize = 1 << lTableBits
|
||||||
|
|
||||||
|
// Short hash matches.
|
||||||
|
sTableBits = 14
|
||||||
|
maxSTableSize = 1 << sTableBits
|
||||||
|
)
|
||||||
|
|
||||||
|
var lTable [maxLTableSize]uint32
|
||||||
|
var sTable [maxSTableSize]uint32
|
||||||
|
|
||||||
|
// Bail if we can't compress to at least this.
|
||||||
|
dstLimit := len(src) - len(src)>>5 - 6
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
nextEmit := 0
|
||||||
|
|
||||||
|
// The encoded form must start with a literal, as there are no previous
|
||||||
|
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||||
|
s := 1
|
||||||
|
cv := load64(src, s)
|
||||||
|
|
||||||
|
// We initialize repeat to 0, so we never match on first attempt
|
||||||
|
repeat := 0
|
||||||
|
|
||||||
|
for {
|
||||||
|
candidateL := 0
|
||||||
|
nextS := 0
|
||||||
|
for {
|
||||||
|
// Next src position to check
|
||||||
|
nextS = s + (s-nextEmit)>>7 + 1
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
hashL := hash7(cv, lTableBits)
|
||||||
|
hashS := hash4(cv, sTableBits)
|
||||||
|
candidateL = int(lTable[hashL])
|
||||||
|
candidateS := int(sTable[hashS])
|
||||||
|
lTable[hashL] = uint32(s)
|
||||||
|
sTable[hashS] = uint32(s)
|
||||||
|
|
||||||
|
// Check repeat at offset checkRep.
|
||||||
|
const checkRep = 1
|
||||||
|
if false && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
|
||||||
|
base := s + checkRep
|
||||||
|
// Extend back
|
||||||
|
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
|
||||||
|
i--
|
||||||
|
base--
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||||
|
|
||||||
|
// Extend forward
|
||||||
|
candidate := s - repeat + 4 + checkRep
|
||||||
|
s += 4 + checkRep
|
||||||
|
for s < len(src) {
|
||||||
|
if len(src)-s < 8 {
|
||||||
|
if src[s] == src[candidate] {
|
||||||
|
s++
|
||||||
|
candidate++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||||
|
s += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
candidate += 8
|
||||||
|
}
|
||||||
|
if nextEmit > 0 {
|
||||||
|
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
|
||||||
|
d += emitRepeat(dst[d:], repeat, s-base)
|
||||||
|
} else {
|
||||||
|
// First match, cannot be repeat.
|
||||||
|
d += emitCopy(dst[d:], repeat, s-base)
|
||||||
|
}
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, s)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if uint32(cv) == load32(src, candidateL) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check our short candidate
|
||||||
|
if uint32(cv) == load32(src, candidateS) {
|
||||||
|
// Try a long candidate at s+1
|
||||||
|
hashL = hash7(cv>>8, lTableBits)
|
||||||
|
candidateL = int(lTable[hashL])
|
||||||
|
lTable[hashL] = uint32(s + 1)
|
||||||
|
if uint32(cv>>8) == load32(src, candidateL) {
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Use our short candidate.
|
||||||
|
candidateL = candidateS
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, nextS)
|
||||||
|
s = nextS
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
|
||||||
|
candidateL--
|
||||||
|
s--
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+(s-nextEmit) > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
base := s
|
||||||
|
offset := base - candidateL
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
s += 4
|
||||||
|
candidateL += 4
|
||||||
|
for s < len(src) {
|
||||||
|
if len(src)-s < 8 {
|
||||||
|
if src[s] == src[candidateL] {
|
||||||
|
s++
|
||||||
|
candidateL++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
|
||||||
|
s += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
candidateL += 8
|
||||||
|
}
|
||||||
|
|
||||||
|
if offset > 65535 && s-base <= 5 && repeat != offset {
|
||||||
|
// Bail if the match is equal or worse to the encoding.
|
||||||
|
s = nextS + 1
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
cv = load64(src, s)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||||
|
if repeat == offset {
|
||||||
|
d += emitRepeat(dst[d:], offset, s-base)
|
||||||
|
} else {
|
||||||
|
d += emitCopy(dst[d:], offset, s-base)
|
||||||
|
repeat = offset
|
||||||
|
}
|
||||||
|
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
if d > dstLimit {
|
||||||
|
// Do we have space for more, if not bail.
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Index match start+1 (long) and start+2 (short)
|
||||||
|
index0 := base + 1
|
||||||
|
// Index match end-2 (long) and end-1 (short)
|
||||||
|
index1 := s - 2
|
||||||
|
|
||||||
|
cv0 := load64(src, index0)
|
||||||
|
cv1 := load64(src, index1)
|
||||||
|
cv = load64(src, s)
|
||||||
|
lTable[hash7(cv0, lTableBits)] = uint32(index0)
|
||||||
|
lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
|
||||||
|
lTable[hash7(cv1, lTableBits)] = uint32(index1)
|
||||||
|
lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
|
||||||
|
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
|
||||||
|
sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
|
||||||
|
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if nextEmit < len(src) {
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+len(src)-nextEmit > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||||
|
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||||
|
func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
|
||||||
|
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||||
|
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||||
|
// looking for copies.
|
||||||
|
sLimit := len(src) - inputMargin
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the hash tables.
|
||||||
|
const (
|
||||||
|
// Long hash matches.
|
||||||
|
lTableBits = 16
|
||||||
|
maxLTableSize = 1 << lTableBits
|
||||||
|
|
||||||
|
// Short hash matches.
|
||||||
|
sTableBits = 14
|
||||||
|
maxSTableSize = 1 << sTableBits
|
||||||
|
)
|
||||||
|
|
||||||
|
var lTable [maxLTableSize]uint32
|
||||||
|
var sTable [maxSTableSize]uint32
|
||||||
|
|
||||||
|
// Bail if we can't compress to at least this.
|
||||||
|
dstLimit := len(src) - len(src)>>5 - 6
|
||||||
|
|
||||||
|
// nextEmit is where in src the next emitLiteral should start from.
|
||||||
|
nextEmit := 0
|
||||||
|
|
||||||
|
// The encoded form must start with a literal, as there are no previous
|
||||||
|
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||||
|
s := 1
|
||||||
|
cv := load64(src, s)
|
||||||
|
|
||||||
|
// We initialize repeat to 0, so we never match on first attempt
|
||||||
|
repeat := 0
|
||||||
|
const maxSkip = 100
|
||||||
|
|
||||||
|
for {
|
||||||
|
candidateL := 0
|
||||||
|
nextS := 0
|
||||||
|
for {
|
||||||
|
// Next src position to check
|
||||||
|
nextS = (s-nextEmit)>>7 + 1
|
||||||
|
if nextS > maxSkip {
|
||||||
|
nextS = s + maxSkip
|
||||||
|
} else {
|
||||||
|
nextS += s
|
||||||
|
}
|
||||||
|
|
||||||
|
if nextS > sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
hashL := hash7(cv, lTableBits)
|
||||||
|
hashS := hash4(cv, sTableBits)
|
||||||
|
candidateL = int(lTable[hashL])
|
||||||
|
candidateS := int(sTable[hashS])
|
||||||
|
lTable[hashL] = uint32(s)
|
||||||
|
sTable[hashS] = uint32(s)
|
||||||
|
|
||||||
|
if uint32(cv) == load32(src, candidateL) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check our short candidate
|
||||||
|
if uint32(cv) == load32(src, candidateS) {
|
||||||
|
// Try a long candidate at s+1
|
||||||
|
hashL = hash7(cv>>8, lTableBits)
|
||||||
|
candidateL = int(lTable[hashL])
|
||||||
|
lTable[hashL] = uint32(s + 1)
|
||||||
|
if uint32(cv>>8) == load32(src, candidateL) {
|
||||||
|
s++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// Use our short candidate.
|
||||||
|
candidateL = candidateS
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
cv = load64(src, nextS)
|
||||||
|
s = nextS
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extend backwards
|
||||||
|
for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
|
||||||
|
candidateL--
|
||||||
|
s--
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+(s-nextEmit) > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
base := s
|
||||||
|
offset := base - candidateL
|
||||||
|
|
||||||
|
// Extend the 4-byte match as long as possible.
|
||||||
|
s += 4
|
||||||
|
candidateL += 4
|
||||||
|
for s < len(src) {
|
||||||
|
if len(src)-s < 8 {
|
||||||
|
if src[s] == src[candidateL] {
|
||||||
|
s++
|
||||||
|
candidateL++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
|
||||||
|
s += bits.TrailingZeros64(diff) >> 3
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s += 8
|
||||||
|
candidateL += 8
|
||||||
|
}
|
||||||
|
|
||||||
|
if offset > 65535 && s-base <= 5 && repeat != offset {
|
||||||
|
// Bail if the match is equal or worse to the encoding.
|
||||||
|
s = nextS + 1
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
cv = load64(src, s)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||||
|
d += emitCopyNoRepeat(dst[d:], offset, s-base)
|
||||||
|
repeat = offset
|
||||||
|
|
||||||
|
nextEmit = s
|
||||||
|
if s >= sLimit {
|
||||||
|
goto emitRemainder
|
||||||
|
}
|
||||||
|
|
||||||
|
if d > dstLimit {
|
||||||
|
// Do we have space for more, if not bail.
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Index match start+1 (long) and start+2 (short)
|
||||||
|
index0 := base + 1
|
||||||
|
// Index match end-2 (long) and end-1 (short)
|
||||||
|
index1 := s - 2
|
||||||
|
|
||||||
|
cv0 := load64(src, index0)
|
||||||
|
cv1 := load64(src, index1)
|
||||||
|
cv = load64(src, s)
|
||||||
|
lTable[hash7(cv0, lTableBits)] = uint32(index0)
|
||||||
|
lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
|
||||||
|
lTable[hash7(cv1, lTableBits)] = uint32(index1)
|
||||||
|
lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
|
||||||
|
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
|
||||||
|
sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
|
||||||
|
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
emitRemainder:
|
||||||
|
if nextEmit < len(src) {
|
||||||
|
// Bail if we exceed the maximum size.
|
||||||
|
if d+len(src)-nextEmit > dstLimit {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
307
vendor/github.com/klauspost/compress/s2/encode_go.go
generated
vendored
Normal file
307
vendor/github.com/klauspost/compress/s2/encode_go.go
generated
vendored
Normal file
|
@ -0,0 +1,307 @@
|
||||||
|
//go:build !amd64 || appengine || !gc || noasm
|
||||||
|
// +build !amd64 appengine !gc noasm
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math/bits"
|
||||||
|
)
|
||||||
|
|
||||||
|
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src))
|
||||||
|
func encodeBlock(dst, src []byte) (d int) {
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return encodeBlockGo(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src))
|
||||||
|
func encodeBlockBetter(dst, src []byte) (d int) {
|
||||||
|
return encodeBlockBetterGo(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src))
|
||||||
|
func encodeBlockBetterSnappy(dst, src []byte) (d int) {
|
||||||
|
return encodeBlockBetterSnappyGo(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||||
|
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||||
|
// been written.
|
||||||
|
//
|
||||||
|
// It also assumes that:
|
||||||
|
// len(dst) >= MaxEncodedLen(len(src))
|
||||||
|
func encodeBlockSnappy(dst, src []byte) (d int) {
|
||||||
|
if len(src) < minNonLiteralBlockSize {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return encodeBlockSnappyGo(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// dst is long enough to hold the encoded bytes
|
||||||
|
// 0 <= len(lit) && len(lit) <= math.MaxUint32
|
||||||
|
func emitLiteral(dst, lit []byte) int {
|
||||||
|
if len(lit) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
const num = 63<<2 | tagLiteral
|
||||||
|
i, n := 0, uint(len(lit)-1)
|
||||||
|
switch {
|
||||||
|
case n < 60:
|
||||||
|
dst[0] = uint8(n)<<2 | tagLiteral
|
||||||
|
i = 1
|
||||||
|
case n < 1<<8:
|
||||||
|
dst[1] = uint8(n)
|
||||||
|
dst[0] = 60<<2 | tagLiteral
|
||||||
|
i = 2
|
||||||
|
case n < 1<<16:
|
||||||
|
dst[2] = uint8(n >> 8)
|
||||||
|
dst[1] = uint8(n)
|
||||||
|
dst[0] = 61<<2 | tagLiteral
|
||||||
|
i = 3
|
||||||
|
case n < 1<<24:
|
||||||
|
dst[3] = uint8(n >> 16)
|
||||||
|
dst[2] = uint8(n >> 8)
|
||||||
|
dst[1] = uint8(n)
|
||||||
|
dst[0] = 62<<2 | tagLiteral
|
||||||
|
i = 4
|
||||||
|
default:
|
||||||
|
dst[4] = uint8(n >> 24)
|
||||||
|
dst[3] = uint8(n >> 16)
|
||||||
|
dst[2] = uint8(n >> 8)
|
||||||
|
dst[1] = uint8(n)
|
||||||
|
dst[0] = 63<<2 | tagLiteral
|
||||||
|
i = 5
|
||||||
|
}
|
||||||
|
return i + copy(dst[i:], lit)
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitRepeat writes a repeat chunk and returns the number of bytes written.
|
||||||
|
// Length must be at least 4 and < 1<<24
|
||||||
|
func emitRepeat(dst []byte, offset, length int) int {
|
||||||
|
// Repeat offset, make length cheaper
|
||||||
|
length -= 4
|
||||||
|
if length <= 4 {
|
||||||
|
dst[0] = uint8(length)<<2 | tagCopy1
|
||||||
|
dst[1] = 0
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
if length < 8 && offset < 2048 {
|
||||||
|
// Encode WITH offset
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
if length < (1<<8)+4 {
|
||||||
|
length -= 4
|
||||||
|
dst[2] = uint8(length)
|
||||||
|
dst[1] = 0
|
||||||
|
dst[0] = 5<<2 | tagCopy1
|
||||||
|
return 3
|
||||||
|
}
|
||||||
|
if length < (1<<16)+(1<<8) {
|
||||||
|
length -= 1 << 8
|
||||||
|
dst[3] = uint8(length >> 8)
|
||||||
|
dst[2] = uint8(length >> 0)
|
||||||
|
dst[1] = 0
|
||||||
|
dst[0] = 6<<2 | tagCopy1
|
||||||
|
return 4
|
||||||
|
}
|
||||||
|
const maxRepeat = (1 << 24) - 1
|
||||||
|
length -= 1 << 16
|
||||||
|
left := 0
|
||||||
|
if length > maxRepeat {
|
||||||
|
left = length - maxRepeat + 4
|
||||||
|
length = maxRepeat - 4
|
||||||
|
}
|
||||||
|
dst[4] = uint8(length >> 16)
|
||||||
|
dst[3] = uint8(length >> 8)
|
||||||
|
dst[2] = uint8(length >> 0)
|
||||||
|
dst[1] = 0
|
||||||
|
dst[0] = 7<<2 | tagCopy1
|
||||||
|
if left > 0 {
|
||||||
|
return 5 + emitRepeat(dst[5:], offset, left)
|
||||||
|
}
|
||||||
|
return 5
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitCopy writes a copy chunk and returns the number of bytes written.
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// dst is long enough to hold the encoded bytes
|
||||||
|
// 1 <= offset && offset <= math.MaxUint32
|
||||||
|
// 4 <= length && length <= 1 << 24
|
||||||
|
func emitCopy(dst []byte, offset, length int) int {
|
||||||
|
if offset >= 65536 {
|
||||||
|
i := 0
|
||||||
|
if length > 64 {
|
||||||
|
// Emit a length 64 copy, encoded as 5 bytes.
|
||||||
|
dst[4] = uint8(offset >> 24)
|
||||||
|
dst[3] = uint8(offset >> 16)
|
||||||
|
dst[2] = uint8(offset >> 8)
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = 63<<2 | tagCopy4
|
||||||
|
length -= 64
|
||||||
|
if length >= 4 {
|
||||||
|
// Emit remaining as repeats
|
||||||
|
return 5 + emitRepeat(dst[5:], offset, length)
|
||||||
|
}
|
||||||
|
i = 5
|
||||||
|
}
|
||||||
|
if length == 0 {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
// Emit a copy, offset encoded as 4 bytes.
|
||||||
|
dst[i+0] = uint8(length-1)<<2 | tagCopy4
|
||||||
|
dst[i+1] = uint8(offset)
|
||||||
|
dst[i+2] = uint8(offset >> 8)
|
||||||
|
dst[i+3] = uint8(offset >> 16)
|
||||||
|
dst[i+4] = uint8(offset >> 24)
|
||||||
|
return i + 5
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offset no more than 2 bytes.
|
||||||
|
if length > 64 {
|
||||||
|
off := 3
|
||||||
|
if offset < 2048 {
|
||||||
|
// emit 8 bytes as tagCopy1, rest as repeats.
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
|
||||||
|
length -= 8
|
||||||
|
off = 2
|
||||||
|
} else {
|
||||||
|
// Emit a length 60 copy, encoded as 3 bytes.
|
||||||
|
// Emit remaining as repeat value (minimum 4 bytes).
|
||||||
|
dst[2] = uint8(offset >> 8)
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = 59<<2 | tagCopy2
|
||||||
|
length -= 60
|
||||||
|
}
|
||||||
|
// Emit remaining as repeats, at least 4 bytes remain.
|
||||||
|
return off + emitRepeat(dst[off:], offset, length)
|
||||||
|
}
|
||||||
|
if length >= 12 || offset >= 2048 {
|
||||||
|
// Emit the remaining copy, encoded as 3 bytes.
|
||||||
|
dst[2] = uint8(offset >> 8)
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = uint8(length-1)<<2 | tagCopy2
|
||||||
|
return 3
|
||||||
|
}
|
||||||
|
// Emit the remaining copy, encoded as 2 bytes.
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// dst is long enough to hold the encoded bytes
|
||||||
|
// 1 <= offset && offset <= math.MaxUint32
|
||||||
|
// 4 <= length && length <= 1 << 24
|
||||||
|
func emitCopyNoRepeat(dst []byte, offset, length int) int {
|
||||||
|
if offset >= 65536 {
|
||||||
|
i := 0
|
||||||
|
if length > 64 {
|
||||||
|
// Emit a length 64 copy, encoded as 5 bytes.
|
||||||
|
dst[4] = uint8(offset >> 24)
|
||||||
|
dst[3] = uint8(offset >> 16)
|
||||||
|
dst[2] = uint8(offset >> 8)
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = 63<<2 | tagCopy4
|
||||||
|
length -= 64
|
||||||
|
if length >= 4 {
|
||||||
|
// Emit remaining as repeats
|
||||||
|
return 5 + emitCopyNoRepeat(dst[5:], offset, length)
|
||||||
|
}
|
||||||
|
i = 5
|
||||||
|
}
|
||||||
|
if length == 0 {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
// Emit a copy, offset encoded as 4 bytes.
|
||||||
|
dst[i+0] = uint8(length-1)<<2 | tagCopy4
|
||||||
|
dst[i+1] = uint8(offset)
|
||||||
|
dst[i+2] = uint8(offset >> 8)
|
||||||
|
dst[i+3] = uint8(offset >> 16)
|
||||||
|
dst[i+4] = uint8(offset >> 24)
|
||||||
|
return i + 5
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offset no more than 2 bytes.
|
||||||
|
if length > 64 {
|
||||||
|
// Emit a length 60 copy, encoded as 3 bytes.
|
||||||
|
// Emit remaining as repeat value (minimum 4 bytes).
|
||||||
|
dst[2] = uint8(offset >> 8)
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = 59<<2 | tagCopy2
|
||||||
|
length -= 60
|
||||||
|
// Emit remaining as repeats, at least 4 bytes remain.
|
||||||
|
return 3 + emitCopyNoRepeat(dst[3:], offset, length)
|
||||||
|
}
|
||||||
|
if length >= 12 || offset >= 2048 {
|
||||||
|
// Emit the remaining copy, encoded as 3 bytes.
|
||||||
|
dst[2] = uint8(offset >> 8)
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = uint8(length-1)<<2 | tagCopy2
|
||||||
|
return 3
|
||||||
|
}
|
||||||
|
// Emit the remaining copy, encoded as 2 bytes.
|
||||||
|
dst[1] = uint8(offset)
|
||||||
|
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||||
|
return 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchLen returns how many bytes match in a and b
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// len(a) <= len(b)
|
||||||
|
//
|
||||||
|
func matchLen(a []byte, b []byte) int {
|
||||||
|
b = b[:len(a)]
|
||||||
|
var checked int
|
||||||
|
if len(a) > 4 {
|
||||||
|
// Try 4 bytes first
|
||||||
|
if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
|
||||||
|
return bits.TrailingZeros32(diff) >> 3
|
||||||
|
}
|
||||||
|
// Switch to 8 byte matching.
|
||||||
|
checked = 4
|
||||||
|
a = a[4:]
|
||||||
|
b = b[4:]
|
||||||
|
for len(a) >= 8 {
|
||||||
|
b = b[:len(a)]
|
||||||
|
if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
|
||||||
|
return checked + (bits.TrailingZeros64(diff) >> 3)
|
||||||
|
}
|
||||||
|
checked += 8
|
||||||
|
a = a[8:]
|
||||||
|
b = b[8:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b = b[:len(a)]
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return int(i) + checked
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(a) + checked
|
||||||
|
}
|
191
vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
generated
vendored
Normal file
191
vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
generated
vendored
Normal file
|
@ -0,0 +1,191 @@
|
||||||
|
// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
|
||||||
|
|
||||||
|
//go:build !appengine && !noasm && gc && !noasm
|
||||||
|
// +build !appengine,!noasm,gc,!noasm
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
func _dummy_()
|
||||||
|
|
||||||
|
// encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4294967295 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBlockAsm(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4194304 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBlockAsm4MB(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 16383 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBlockAsm12B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4095 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBlockAsm10B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 511 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBlockAsm8B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4294967295 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBetterBlockAsm(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4194304 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 16383 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBetterBlockAsm12B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4095 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBetterBlockAsm10B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 511 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeBetterBlockAsm8B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4294967295 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBlockAsm(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 65535 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 16383 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4095 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 511 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4294967295 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 65535 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 16383 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 4095 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||||
|
// Maximum input 511 bytes.
|
||||||
|
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
|
||||||
|
|
||||||
|
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// dst is long enough to hold the encoded bytes with margin of 0 bytes
|
||||||
|
// 0 <= len(lit) && len(lit) <= math.MaxUint32
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func emitLiteral(dst []byte, lit []byte) int
|
||||||
|
|
||||||
|
// emitRepeat writes a repeat chunk and returns the number of bytes written.
|
||||||
|
// Length must be at least 4 and < 1<<32
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func emitRepeat(dst []byte, offset int, length int) int
|
||||||
|
|
||||||
|
// emitCopy writes a copy chunk and returns the number of bytes written.
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// dst is long enough to hold the encoded bytes
|
||||||
|
// 1 <= offset && offset <= math.MaxUint32
|
||||||
|
// 4 <= length && length <= 1 << 24
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func emitCopy(dst []byte, offset int, length int) int
|
||||||
|
|
||||||
|
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// dst is long enough to hold the encoded bytes
|
||||||
|
// 1 <= offset && offset <= math.MaxUint32
|
||||||
|
// 4 <= length && length <= 1 << 24
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func emitCopyNoRepeat(dst []byte, offset int, length int) int
|
||||||
|
|
||||||
|
// matchLen returns how many bytes match in a and b
|
||||||
|
//
|
||||||
|
// It assumes that:
|
||||||
|
// len(a) <= len(b)
|
||||||
|
//
|
||||||
|
//go:noescape
|
||||||
|
func matchLen(a []byte, b []byte) int
|
17779
vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
generated
vendored
Normal file
17779
vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
598
vendor/github.com/klauspost/compress/s2/index.go
generated
vendored
Normal file
598
vendor/github.com/klauspost/compress/s2/index.go
generated
vendored
Normal file
|
@ -0,0 +1,598 @@
|
||||||
|
// Copyright (c) 2022+ Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package s2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"sort"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
S2IndexHeader = "s2idx\x00"
|
||||||
|
S2IndexTrailer = "\x00xdi2s"
|
||||||
|
maxIndexEntries = 1 << 16
|
||||||
|
)
|
||||||
|
|
||||||
|
// Index represents an S2/Snappy index.
|
||||||
|
type Index struct {
|
||||||
|
TotalUncompressed int64 // Total Uncompressed size if known. Will be -1 if unknown.
|
||||||
|
TotalCompressed int64 // Total Compressed size if known. Will be -1 if unknown.
|
||||||
|
info []struct {
|
||||||
|
compressedOffset int64
|
||||||
|
uncompressedOffset int64
|
||||||
|
}
|
||||||
|
estBlockUncomp int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *Index) reset(maxBlock int) {
|
||||||
|
i.estBlockUncomp = int64(maxBlock)
|
||||||
|
i.TotalCompressed = -1
|
||||||
|
i.TotalUncompressed = -1
|
||||||
|
if len(i.info) > 0 {
|
||||||
|
i.info = i.info[:0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocInfos will allocate an empty slice of infos.
|
||||||
|
func (i *Index) allocInfos(n int) {
|
||||||
|
if n > maxIndexEntries {
|
||||||
|
panic("n > maxIndexEntries")
|
||||||
|
}
|
||||||
|
i.info = make([]struct {
|
||||||
|
compressedOffset int64
|
||||||
|
uncompressedOffset int64
|
||||||
|
}, 0, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// add an uncompressed and compressed pair.
|
||||||
|
// Entries must be sent in order.
|
||||||
|
func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
|
||||||
|
if i == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
lastIdx := len(i.info) - 1
|
||||||
|
if lastIdx >= 0 {
|
||||||
|
latest := i.info[lastIdx]
|
||||||
|
if latest.uncompressedOffset == uncompressedOffset {
|
||||||
|
// Uncompressed didn't change, don't add entry,
|
||||||
|
// but update start index.
|
||||||
|
latest.compressedOffset = compressedOffset
|
||||||
|
i.info[lastIdx] = latest
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if latest.uncompressedOffset > uncompressedOffset {
|
||||||
|
return fmt.Errorf("internal error: Earlier uncompressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
|
||||||
|
}
|
||||||
|
if latest.compressedOffset > compressedOffset {
|
||||||
|
return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i.info = append(i.info, struct {
|
||||||
|
compressedOffset int64
|
||||||
|
uncompressedOffset int64
|
||||||
|
}{compressedOffset: compressedOffset, uncompressedOffset: uncompressedOffset})
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the offset at or before the wanted (uncompressed) offset.
|
||||||
|
// If offset is 0 or positive it is the offset from the beginning of the file.
|
||||||
|
// If the uncompressed size is known, the offset must be within the file.
|
||||||
|
// If an offset outside the file is requested io.ErrUnexpectedEOF is returned.
|
||||||
|
// If the offset is negative, it is interpreted as the distance from the end of the file,
|
||||||
|
// where -1 represents the last byte.
|
||||||
|
// If offset from the end of the file is requested, but size is unknown,
|
||||||
|
// ErrUnsupported will be returned.
|
||||||
|
func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err error) {
|
||||||
|
if i.TotalUncompressed < 0 {
|
||||||
|
return 0, 0, ErrCorrupt
|
||||||
|
}
|
||||||
|
if offset < 0 {
|
||||||
|
offset = i.TotalUncompressed + offset
|
||||||
|
if offset < 0 {
|
||||||
|
return 0, 0, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if offset > i.TotalUncompressed {
|
||||||
|
return 0, 0, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
if len(i.info) > 200 {
|
||||||
|
n := sort.Search(len(i.info), func(n int) bool {
|
||||||
|
return i.info[n].uncompressedOffset > offset
|
||||||
|
})
|
||||||
|
if n == 0 {
|
||||||
|
n = 1
|
||||||
|
}
|
||||||
|
return i.info[n-1].compressedOffset, i.info[n-1].uncompressedOffset, nil
|
||||||
|
}
|
||||||
|
for _, info := range i.info {
|
||||||
|
if info.uncompressedOffset > offset {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
compressedOff = info.compressedOffset
|
||||||
|
uncompressedOff = info.uncompressedOffset
|
||||||
|
}
|
||||||
|
return compressedOff, uncompressedOff, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// reduce to stay below maxIndexEntries
|
||||||
|
func (i *Index) reduce() {
|
||||||
|
if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Algorithm, keep 1, remove removeN entries...
|
||||||
|
removeN := (len(i.info) + 1) / maxIndexEntries
|
||||||
|
src := i.info
|
||||||
|
j := 0
|
||||||
|
|
||||||
|
// Each block should be at least 1MB, but don't reduce below 1000 entries.
|
||||||
|
for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 {
|
||||||
|
removeN++
|
||||||
|
}
|
||||||
|
for idx := 0; idx < len(src); idx++ {
|
||||||
|
i.info[j] = src[idx]
|
||||||
|
j++
|
||||||
|
idx += removeN
|
||||||
|
}
|
||||||
|
i.info = i.info[:j]
|
||||||
|
// Update maxblock estimate.
|
||||||
|
i.estBlockUncomp += i.estBlockUncomp * int64(removeN)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *Index) appendTo(b []byte, uncompTotal, compTotal int64) []byte {
|
||||||
|
i.reduce()
|
||||||
|
var tmp [binary.MaxVarintLen64]byte
|
||||||
|
|
||||||
|
initSize := len(b)
|
||||||
|
// We make the start a skippable header+size.
|
||||||
|
b = append(b, ChunkTypeIndex, 0, 0, 0)
|
||||||
|
b = append(b, []byte(S2IndexHeader)...)
|
||||||
|
// Total Uncompressed size
|
||||||
|
n := binary.PutVarint(tmp[:], uncompTotal)
|
||||||
|
b = append(b, tmp[:n]...)
|
||||||
|
// Total Compressed size
|
||||||
|
n = binary.PutVarint(tmp[:], compTotal)
|
||||||
|
b = append(b, tmp[:n]...)
|
||||||
|
// Put EstBlockUncomp size
|
||||||
|
n = binary.PutVarint(tmp[:], i.estBlockUncomp)
|
||||||
|
b = append(b, tmp[:n]...)
|
||||||
|
// Put length
|
||||||
|
n = binary.PutVarint(tmp[:], int64(len(i.info)))
|
||||||
|
b = append(b, tmp[:n]...)
|
||||||
|
|
||||||
|
// Check if we should add uncompressed offsets
|
||||||
|
var hasUncompressed byte
|
||||||
|
for idx, info := range i.info {
|
||||||
|
if idx == 0 {
|
||||||
|
if info.uncompressedOffset != 0 {
|
||||||
|
hasUncompressed = 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if info.uncompressedOffset != i.info[idx-1].uncompressedOffset+i.estBlockUncomp {
|
||||||
|
hasUncompressed = 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b = append(b, hasUncompressed)
|
||||||
|
|
||||||
|
// Add each entry
|
||||||
|
if hasUncompressed == 1 {
|
||||||
|
for idx, info := range i.info {
|
||||||
|
uOff := info.uncompressedOffset
|
||||||
|
if idx > 0 {
|
||||||
|
prev := i.info[idx-1]
|
||||||
|
uOff -= prev.uncompressedOffset + (i.estBlockUncomp)
|
||||||
|
}
|
||||||
|
n = binary.PutVarint(tmp[:], uOff)
|
||||||
|
b = append(b, tmp[:n]...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initial compressed size estimate.
|
||||||
|
cPredict := i.estBlockUncomp / 2
|
||||||
|
|
||||||
|
for idx, info := range i.info {
|
||||||
|
cOff := info.compressedOffset
|
||||||
|
if idx > 0 {
|
||||||
|
prev := i.info[idx-1]
|
||||||
|
cOff -= prev.compressedOffset + cPredict
|
||||||
|
// Update compressed size prediction, with half the error.
|
||||||
|
cPredict += cOff / 2
|
||||||
|
}
|
||||||
|
n = binary.PutVarint(tmp[:], cOff)
|
||||||
|
b = append(b, tmp[:n]...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add Total Size.
|
||||||
|
// Stored as fixed size for easier reading.
|
||||||
|
binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)-initSize+4+len(S2IndexTrailer)))
|
||||||
|
b = append(b, tmp[:4]...)
|
||||||
|
// Trailer
|
||||||
|
b = append(b, []byte(S2IndexTrailer)...)
|
||||||
|
|
||||||
|
// Update size
|
||||||
|
chunkLen := len(b) - initSize - skippableFrameHeader
|
||||||
|
b[initSize+1] = uint8(chunkLen >> 0)
|
||||||
|
b[initSize+2] = uint8(chunkLen >> 8)
|
||||||
|
b[initSize+3] = uint8(chunkLen >> 16)
|
||||||
|
//fmt.Printf("chunklen: 0x%x Uncomp:%d, Comp:%d\n", chunkLen, uncompTotal, compTotal)
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load a binary index.
|
||||||
|
// A zero value Index can be used or a previous one can be reused.
|
||||||
|
func (i *Index) Load(b []byte) ([]byte, error) {
|
||||||
|
if len(b) <= 4+len(S2IndexHeader)+len(S2IndexTrailer) {
|
||||||
|
return b, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
if b[0] != ChunkTypeIndex {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
|
||||||
|
b = b[4:]
|
||||||
|
|
||||||
|
// Validate we have enough...
|
||||||
|
if len(b) < chunkLen {
|
||||||
|
return b, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
|
||||||
|
return b, ErrUnsupported
|
||||||
|
}
|
||||||
|
b = b[len(S2IndexHeader):]
|
||||||
|
|
||||||
|
// Total Uncompressed
|
||||||
|
if v, n := binary.Varint(b); n <= 0 || v < 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
} else {
|
||||||
|
i.TotalUncompressed = v
|
||||||
|
b = b[n:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Total Compressed
|
||||||
|
if v, n := binary.Varint(b); n <= 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
} else {
|
||||||
|
i.TotalCompressed = v
|
||||||
|
b = b[n:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read EstBlockUncomp
|
||||||
|
if v, n := binary.Varint(b); n <= 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
} else {
|
||||||
|
if v < 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
i.estBlockUncomp = v
|
||||||
|
b = b[n:]
|
||||||
|
}
|
||||||
|
|
||||||
|
var entries int
|
||||||
|
if v, n := binary.Varint(b); n <= 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
} else {
|
||||||
|
if v < 0 || v > maxIndexEntries {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
entries = int(v)
|
||||||
|
b = b[n:]
|
||||||
|
}
|
||||||
|
if cap(i.info) < entries {
|
||||||
|
i.allocInfos(entries)
|
||||||
|
}
|
||||||
|
i.info = i.info[:entries]
|
||||||
|
|
||||||
|
if len(b) < 1 {
|
||||||
|
return b, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
hasUncompressed := b[0]
|
||||||
|
b = b[1:]
|
||||||
|
if hasUncompressed&1 != hasUncompressed {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add each uncompressed entry
|
||||||
|
for idx := range i.info {
|
||||||
|
var uOff int64
|
||||||
|
if hasUncompressed != 0 {
|
||||||
|
// Load delta
|
||||||
|
if v, n := binary.Varint(b); n <= 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
} else {
|
||||||
|
uOff = v
|
||||||
|
b = b[n:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx > 0 {
|
||||||
|
prev := i.info[idx-1].uncompressedOffset
|
||||||
|
uOff += prev + (i.estBlockUncomp)
|
||||||
|
if uOff <= prev {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if uOff < 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
i.info[idx].uncompressedOffset = uOff
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initial compressed size estimate.
|
||||||
|
cPredict := i.estBlockUncomp / 2
|
||||||
|
|
||||||
|
// Add each compressed entry
|
||||||
|
for idx := range i.info {
|
||||||
|
var cOff int64
|
||||||
|
if v, n := binary.Varint(b); n <= 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
} else {
|
||||||
|
cOff = v
|
||||||
|
b = b[n:]
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx > 0 {
|
||||||
|
// Update compressed size prediction, with half the error.
|
||||||
|
cPredictNew := cPredict + cOff/2
|
||||||
|
|
||||||
|
prev := i.info[idx-1].compressedOffset
|
||||||
|
cOff += prev + cPredict
|
||||||
|
if cOff <= prev {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
cPredict = cPredictNew
|
||||||
|
}
|
||||||
|
if cOff < 0 {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
i.info[idx].compressedOffset = cOff
|
||||||
|
}
|
||||||
|
if len(b) < 4+len(S2IndexTrailer) {
|
||||||
|
return b, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
// Skip size...
|
||||||
|
b = b[4:]
|
||||||
|
|
||||||
|
// Check trailer...
|
||||||
|
if !bytes.Equal(b[:len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
|
||||||
|
return b, ErrCorrupt
|
||||||
|
}
|
||||||
|
return b[len(S2IndexTrailer):], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadStream will load an index from the end of the supplied stream.
|
||||||
|
// ErrUnsupported will be returned if the signature cannot be found.
|
||||||
|
// ErrCorrupt will be returned if unexpected values are found.
|
||||||
|
// io.ErrUnexpectedEOF is returned if there are too few bytes.
|
||||||
|
// IO errors are returned as-is.
|
||||||
|
func (i *Index) LoadStream(rs io.ReadSeeker) error {
|
||||||
|
// Go to end.
|
||||||
|
_, err := rs.Seek(-10, io.SeekEnd)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var tmp [10]byte
|
||||||
|
_, err = io.ReadFull(rs, tmp[:])
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Check trailer...
|
||||||
|
if !bytes.Equal(tmp[4:4+len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
|
||||||
|
return ErrUnsupported
|
||||||
|
}
|
||||||
|
sz := binary.LittleEndian.Uint32(tmp[:4])
|
||||||
|
if sz > maxChunkSize+skippableFrameHeader {
|
||||||
|
return ErrCorrupt
|
||||||
|
}
|
||||||
|
_, err = rs.Seek(-int64(sz), io.SeekEnd)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read index.
|
||||||
|
buf := make([]byte, sz)
|
||||||
|
_, err = io.ReadFull(rs, buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = i.Load(buf)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndexStream will return an index for a stream.
|
||||||
|
// The stream structure will be checked, but
|
||||||
|
// data within blocks is not verified.
|
||||||
|
// The returned index can either be appended to the end of the stream
|
||||||
|
// or stored separately.
|
||||||
|
func IndexStream(r io.Reader) ([]byte, error) {
|
||||||
|
var i Index
|
||||||
|
var buf [maxChunkSize]byte
|
||||||
|
var readHeader bool
|
||||||
|
for {
|
||||||
|
_, err := io.ReadFull(r, buf[:4])
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
return i.appendTo(nil, i.TotalUncompressed, i.TotalCompressed), nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// Start of this chunk.
|
||||||
|
startChunk := i.TotalCompressed
|
||||||
|
i.TotalCompressed += 4
|
||||||
|
|
||||||
|
chunkType := buf[0]
|
||||||
|
if !readHeader {
|
||||||
|
if chunkType != chunkTypeStreamIdentifier {
|
||||||
|
return nil, ErrCorrupt
|
||||||
|
}
|
||||||
|
readHeader = true
|
||||||
|
}
|
||||||
|
chunkLen := int(buf[1]) | int(buf[2])<<8 | int(buf[3])<<16
|
||||||
|
if chunkLen < checksumSize {
|
||||||
|
return nil, ErrCorrupt
|
||||||
|
}
|
||||||
|
|
||||||
|
i.TotalCompressed += int64(chunkLen)
|
||||||
|
_, err = io.ReadFull(r, buf[:chunkLen])
|
||||||
|
if err != nil {
|
||||||
|
return nil, io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
// The chunk types are specified at
|
||||||
|
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||||
|
switch chunkType {
|
||||||
|
case chunkTypeCompressedData:
|
||||||
|
// Section 4.2. Compressed data (chunk type 0x00).
|
||||||
|
// Skip checksum.
|
||||||
|
dLen, err := DecodedLen(buf[checksumSize:])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if dLen > maxBlockSize {
|
||||||
|
return nil, ErrCorrupt
|
||||||
|
}
|
||||||
|
if i.estBlockUncomp == 0 {
|
||||||
|
// Use first block for estimate...
|
||||||
|
i.estBlockUncomp = int64(dLen)
|
||||||
|
}
|
||||||
|
err = i.add(startChunk, i.TotalUncompressed)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
i.TotalUncompressed += int64(dLen)
|
||||||
|
continue
|
||||||
|
case chunkTypeUncompressedData:
|
||||||
|
n2 := chunkLen - checksumSize
|
||||||
|
if n2 > maxBlockSize {
|
||||||
|
return nil, ErrCorrupt
|
||||||
|
}
|
||||||
|
if i.estBlockUncomp == 0 {
|
||||||
|
// Use first block for estimate...
|
||||||
|
i.estBlockUncomp = int64(n2)
|
||||||
|
}
|
||||||
|
err = i.add(startChunk, i.TotalUncompressed)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
i.TotalUncompressed += int64(n2)
|
||||||
|
continue
|
||||||
|
case chunkTypeStreamIdentifier:
|
||||||
|
// Section 4.1. Stream identifier (chunk type 0xff).
|
||||||
|
if chunkLen != len(magicBody) {
|
||||||
|
return nil, ErrCorrupt
|
||||||
|
}
|
||||||
|
|
||||||
|
if string(buf[:len(magicBody)]) != magicBody {
|
||||||
|
if string(buf[:len(magicBody)]) != magicBodySnappy {
|
||||||
|
return nil, ErrCorrupt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if chunkType <= 0x7f {
|
||||||
|
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
|
||||||
|
return nil, ErrUnsupported
|
||||||
|
}
|
||||||
|
if chunkLen > maxChunkSize {
|
||||||
|
return nil, ErrUnsupported
|
||||||
|
}
|
||||||
|
// Section 4.4 Padding (chunk type 0xfe).
|
||||||
|
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// JSON returns the index as JSON text.
|
||||||
|
func (i *Index) JSON() []byte {
|
||||||
|
x := struct {
|
||||||
|
TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
|
||||||
|
TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
|
||||||
|
Offsets []struct {
|
||||||
|
CompressedOffset int64 `json:"compressed"`
|
||||||
|
UncompressedOffset int64 `json:"uncompressed"`
|
||||||
|
} `json:"offsets"`
|
||||||
|
EstBlockUncomp int64 `json:"est_block_uncompressed"`
|
||||||
|
}{
|
||||||
|
TotalUncompressed: i.TotalUncompressed,
|
||||||
|
TotalCompressed: i.TotalCompressed,
|
||||||
|
EstBlockUncomp: i.estBlockUncomp,
|
||||||
|
}
|
||||||
|
for _, v := range i.info {
|
||||||
|
x.Offsets = append(x.Offsets, struct {
|
||||||
|
CompressedOffset int64 `json:"compressed"`
|
||||||
|
UncompressedOffset int64 `json:"uncompressed"`
|
||||||
|
}{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
|
||||||
|
}
|
||||||
|
b, _ := json.MarshalIndent(x, "", " ")
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemoveIndexHeaders will trim all headers and trailers from a given index.
|
||||||
|
// This is expected to save 20 bytes.
|
||||||
|
// These can be restored using RestoreIndexHeaders.
|
||||||
|
// This removes a layer of security, but is the most compact representation.
|
||||||
|
// Returns nil if headers contains errors.
|
||||||
|
// The returned slice references the provided slice.
|
||||||
|
func RemoveIndexHeaders(b []byte) []byte {
|
||||||
|
const save = 4 + len(S2IndexHeader) + len(S2IndexTrailer) + 4
|
||||||
|
if len(b) <= save {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if b[0] != ChunkTypeIndex {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
|
||||||
|
b = b[4:]
|
||||||
|
|
||||||
|
// Validate we have enough...
|
||||||
|
if len(b) < chunkLen {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
b = b[:chunkLen]
|
||||||
|
|
||||||
|
if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
b = b[len(S2IndexHeader):]
|
||||||
|
if !bytes.HasSuffix(b, []byte(S2IndexTrailer)) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
b = bytes.TrimSuffix(b, []byte(S2IndexTrailer))
|
||||||
|
|
||||||
|
if len(b) < 4 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return b[:len(b)-4]
|
||||||
|
}
|
||||||
|
|
||||||
|
// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders.
|
||||||
|
// No error checking is performed on the input.
|
||||||
|
// If a 0 length slice is sent, it is returned without modification.
|
||||||
|
func RestoreIndexHeaders(in []byte) []byte {
|
||||||
|
if len(in) == 0 {
|
||||||
|
return in
|
||||||
|
}
|
||||||
|
b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4)
|
||||||
|
b = append(b, ChunkTypeIndex, 0, 0, 0)
|
||||||
|
b = append(b, []byte(S2IndexHeader)...)
|
||||||
|
b = append(b, in...)
|
||||||
|
|
||||||
|
var tmp [4]byte
|
||||||
|
binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)+4+len(S2IndexTrailer)))
|
||||||
|
b = append(b, tmp[:4]...)
|
||||||
|
// Trailer
|
||||||
|
b = append(b, []byte(S2IndexTrailer)...)
|
||||||
|
|
||||||
|
chunkLen := len(b) - skippableFrameHeader
|
||||||
|
b[1] = uint8(chunkLen >> 0)
|
||||||
|
b[2] = uint8(chunkLen >> 8)
|
||||||
|
b[3] = uint8(chunkLen >> 16)
|
||||||
|
return b
|
||||||
|
}
|
143
vendor/github.com/klauspost/compress/s2/s2.go
generated
vendored
Normal file
143
vendor/github.com/klauspost/compress/s2/s2.go
generated
vendored
Normal file
|
@ -0,0 +1,143 @@
|
||||||
|
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
||||||
|
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package s2 implements the S2 compression format.
|
||||||
|
//
|
||||||
|
// S2 is an extension of Snappy. Similar to Snappy S2 is aimed for high throughput,
|
||||||
|
// which is why it features concurrent compression for bigger payloads.
|
||||||
|
//
|
||||||
|
// Decoding is compatible with Snappy compressed content,
|
||||||
|
// but content compressed with S2 cannot be decompressed by Snappy.
|
||||||
|
//
|
||||||
|
// For more information on Snappy/S2 differences see README in: https://github.com/klauspost/compress/tree/master/s2
|
||||||
|
//
|
||||||
|
// There are actually two S2 formats: block and stream. They are related,
|
||||||
|
// but different: trying to decompress block-compressed data as a S2 stream
|
||||||
|
// will fail, and vice versa. The block format is the Decode and Encode
|
||||||
|
// functions and the stream format is the Reader and Writer types.
|
||||||
|
//
|
||||||
|
// A "better" compression option is available. This will trade some compression
|
||||||
|
// speed
|
||||||
|
//
|
||||||
|
// The block format, the more common case, is used when the complete size (the
|
||||||
|
// number of bytes) of the original data is known upfront, at the time
|
||||||
|
// compression starts. The stream format, also known as the framing format, is
|
||||||
|
// for when that isn't always true.
|
||||||
|
//
|
||||||
|
// Blocks to not offer much data protection, so it is up to you to
|
||||||
|
// add data validation of decompressed blocks.
|
||||||
|
//
|
||||||
|
// Streams perform CRC validation of the decompressed data.
|
||||||
|
// Stream compression will also be performed on multiple CPU cores concurrently
|
||||||
|
// significantly improving throughput.
|
||||||
|
package s2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"hash/crc32"
|
||||||
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
Each encoded block begins with the varint-encoded length of the decoded data,
|
||||||
|
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
|
||||||
|
first byte of each chunk is broken into its 2 least and 6 most significant bits
|
||||||
|
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
|
||||||
|
Zero means a literal tag. All other values mean a copy tag.
|
||||||
|
|
||||||
|
For literal tags:
|
||||||
|
- If m < 60, the next 1 + m bytes are literal bytes.
|
||||||
|
- Otherwise, let n be the little-endian unsigned integer denoted by the next
|
||||||
|
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
|
||||||
|
|
||||||
|
For copy tags, length bytes are copied from offset bytes ago, in the style of
|
||||||
|
Lempel-Ziv compression algorithms. In particular:
|
||||||
|
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
|
||||||
|
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
|
||||||
|
of the offset. The next byte is bits 0-7 of the offset.
|
||||||
|
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
|
||||||
|
The length is 1 + m. The offset is the little-endian unsigned integer
|
||||||
|
denoted by the next 2 bytes.
|
||||||
|
- For l == 3, the offset ranges in [0, 1<<32) and the length in
|
||||||
|
[1, 65). The length is 1 + m. The offset is the little-endian unsigned
|
||||||
|
integer denoted by the next 4 bytes.
|
||||||
|
*/
|
||||||
|
const (
|
||||||
|
tagLiteral = 0x00
|
||||||
|
tagCopy1 = 0x01
|
||||||
|
tagCopy2 = 0x02
|
||||||
|
tagCopy4 = 0x03
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
checksumSize = 4
|
||||||
|
chunkHeaderSize = 4
|
||||||
|
magicChunk = "\xff\x06\x00\x00" + magicBody
|
||||||
|
magicChunkSnappy = "\xff\x06\x00\x00" + magicBodySnappy
|
||||||
|
magicBodySnappy = "sNaPpY"
|
||||||
|
magicBody = "S2sTwO"
|
||||||
|
|
||||||
|
// maxBlockSize is the maximum size of the input to encodeBlock.
|
||||||
|
//
|
||||||
|
// For the framing format (Writer type instead of Encode function),
|
||||||
|
// this is the maximum uncompressed size of a block.
|
||||||
|
maxBlockSize = 4 << 20
|
||||||
|
|
||||||
|
// minBlockSize is the minimum size of block setting when creating a writer.
|
||||||
|
minBlockSize = 4 << 10
|
||||||
|
|
||||||
|
skippableFrameHeader = 4
|
||||||
|
maxChunkSize = 1<<24 - 1 // 16777215
|
||||||
|
|
||||||
|
// Default block size
|
||||||
|
defaultBlockSize = 1 << 20
|
||||||
|
|
||||||
|
// maxSnappyBlockSize is the maximum snappy block size.
|
||||||
|
maxSnappyBlockSize = 1 << 16
|
||||||
|
|
||||||
|
obufHeaderLen = checksumSize + chunkHeaderSize
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
chunkTypeCompressedData = 0x00
|
||||||
|
chunkTypeUncompressedData = 0x01
|
||||||
|
ChunkTypeIndex = 0x99
|
||||||
|
chunkTypePadding = 0xfe
|
||||||
|
chunkTypeStreamIdentifier = 0xff
|
||||||
|
)
|
||||||
|
|
||||||
|
var crcTable = crc32.MakeTable(crc32.Castagnoli)
|
||||||
|
|
||||||
|
// crc implements the checksum specified in section 3 of
|
||||||
|
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||||
|
func crc(b []byte) uint32 {
|
||||||
|
c := crc32.Update(0, crcTable, b)
|
||||||
|
return c>>15 | c<<17 + 0xa282ead8
|
||||||
|
}
|
||||||
|
|
||||||
|
// literalExtraSize returns the extra size of encoding n literals.
|
||||||
|
// n should be >= 0 and <= math.MaxUint32.
|
||||||
|
func literalExtraSize(n int64) int64 {
|
||||||
|
if n == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case n < 60:
|
||||||
|
return 1
|
||||||
|
case n < 1<<8:
|
||||||
|
return 2
|
||||||
|
case n < 1<<16:
|
||||||
|
return 3
|
||||||
|
case n < 1<<24:
|
||||||
|
return 4
|
||||||
|
default:
|
||||||
|
return 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type byter interface {
|
||||||
|
Bytes() []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ byter = &bytes.Buffer{}
|
3
vendor/modules.txt
vendored
3
vendor/modules.txt
vendored
|
@ -16,7 +16,7 @@ cloud.google.com/go/storage
|
||||||
cloud.google.com/go/storage/internal
|
cloud.google.com/go/storage/internal
|
||||||
cloud.google.com/go/storage/internal/apiv2
|
cloud.google.com/go/storage/internal/apiv2
|
||||||
cloud.google.com/go/storage/internal/apiv2/stubs
|
cloud.google.com/go/storage/internal/apiv2/stubs
|
||||||
# github.com/VictoriaMetrics/fastcache v1.10.0
|
# github.com/VictoriaMetrics/fastcache v1.12.0
|
||||||
## explicit; go 1.13
|
## explicit; go 1.13
|
||||||
github.com/VictoriaMetrics/fastcache
|
github.com/VictoriaMetrics/fastcache
|
||||||
# github.com/VictoriaMetrics/fasthttp v1.1.0
|
# github.com/VictoriaMetrics/fasthttp v1.1.0
|
||||||
|
@ -166,6 +166,7 @@ github.com/klauspost/compress/gzip
|
||||||
github.com/klauspost/compress/huff0
|
github.com/klauspost/compress/huff0
|
||||||
github.com/klauspost/compress/internal/cpuinfo
|
github.com/klauspost/compress/internal/cpuinfo
|
||||||
github.com/klauspost/compress/internal/snapref
|
github.com/klauspost/compress/internal/snapref
|
||||||
|
github.com/klauspost/compress/s2
|
||||||
github.com/klauspost/compress/zlib
|
github.com/klauspost/compress/zlib
|
||||||
github.com/klauspost/compress/zstd
|
github.com/klauspost/compress/zstd
|
||||||
github.com/klauspost/compress/zstd/internal/xxhash
|
github.com/klauspost/compress/zstd/internal/xxhash
|
||||||
|
|
Loading…
Reference in a new issue