vendor: upgrade github.com/klauspost/compress from v1.11.3 to v1.11.4

This commit is contained in:
Aliaksandr Valialkin 2020-12-21 08:53:37 +02:00
parent 0f99c1afb1
commit de89bcddae
9 changed files with 724 additions and 17 deletions

2
go.mod
View file

@ -13,7 +13,7 @@ require (
github.com/aws/aws-sdk-go v1.36.12 github.com/aws/aws-sdk-go v1.36.12
github.com/cespare/xxhash/v2 v2.1.1 github.com/cespare/xxhash/v2 v2.1.1
github.com/golang/snappy v0.0.2 github.com/golang/snappy v0.0.2
github.com/klauspost/compress v1.11.3 github.com/klauspost/compress v1.11.4
github.com/valyala/fastjson v1.6.3 github.com/valyala/fastjson v1.6.3
github.com/valyala/fastrand v1.0.0 github.com/valyala/fastrand v1.0.0
github.com/valyala/fasttemplate v1.2.1 github.com/valyala/fasttemplate v1.2.1

3
go.sum
View file

@ -149,8 +149,9 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.10.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.11.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.11.3 h1:dB4Bn0tN3wdCzQxnS8r06kV74qN/TAfaIS0bVE8h3jc=
github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.11.4 h1:kz40R/YWls3iqT9zX9AHN3WoVsrAWVyui5sxuLqiXqU=
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=

View file

@ -24,22 +24,21 @@ Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd
### Status: ### Status:
STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively
used by several projects. This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), used by several projects. This library is being [fuzz-tested](https://github.com/klauspost/compress-fuzz) for all updates.
kindly supplied by [fuzzit.dev](https://fuzzit.dev/).
There may still be specific combinations of data types/size/settings that could lead to edge cases, There may still be specific combinations of data types/size/settings that could lead to edge cases,
so as always, testing is recommended. so as always, testing is recommended.
For now, a high speed (fastest) and medium-fast (default) compressor has been implemented. For now, a high speed (fastest) and medium-fast (default) compressor has been implemented.
The "Fastest" compression ratio is roughly equivalent to zstd level 1. * The "Fastest" compression ratio is roughly equivalent to zstd level 1.
The "Default" compression ratio is roughly equivalent to zstd level 3 (default). * The "Default" compression ratio is roughly equivalent to zstd level 3 (default).
* The "Better" compression ratio is roughly equivalent to zstd level 7.
* The "Best" compression ratio is roughly equivalent to zstd level 11.
In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode.
The compression ratio compared to stdlib is around level 3, but usually 3x as fast. The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2.
### Usage ### Usage
@ -140,7 +139,7 @@ I have collected some speed examples to compare speed and compression against ot
* `file` is the input file. * `file` is the input file.
* `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library. * `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library.
* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default". * `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default"; 3 is "better", 4 is "best".
* `insize`/`outsize` is the input/output size. * `insize`/`outsize` is the input/output size.
* `millis` is the number of milliseconds used for compression. * `millis` is the number of milliseconds used for compression.
* `mb/s` is megabytes (2^20 bytes) per second. * `mb/s` is megabytes (2^20 bytes) per second.
@ -154,11 +153,13 @@ file out level insize outsize millis mb/s
silesia.tar zskp 1 211947520 73101992 643 313.87 silesia.tar zskp 1 211947520 73101992 643 313.87
silesia.tar zskp 2 211947520 67504318 969 208.38 silesia.tar zskp 2 211947520 67504318 969 208.38
silesia.tar zskp 3 211947520 65177448 1899 106.44 silesia.tar zskp 3 211947520 65177448 1899 106.44
silesia.tar zskp 4 211947520 61381950 8115 24.91
cgo zstd: cgo zstd:
silesia.tar zstd 1 211947520 73605392 543 371.56 silesia.tar zstd 1 211947520 73605392 543 371.56
silesia.tar zstd 3 211947520 66793289 864 233.68 silesia.tar zstd 3 211947520 66793289 864 233.68
silesia.tar zstd 6 211947520 62916450 1913 105.66 silesia.tar zstd 6 211947520 62916450 1913 105.66
silesia.tar zstd 9 211947520 60212393 5063 39.92
gzip, stdlib/this package: gzip, stdlib/this package:
silesia.tar gzstd 1 211947520 80007735 1654 122.21 silesia.tar gzstd 1 211947520 80007735 1654 122.21
@ -171,9 +172,11 @@ file out level insize outsize millis mb/s
gob-stream zskp 1 1911399616 235022249 3088 590.30 gob-stream zskp 1 1911399616 235022249 3088 590.30
gob-stream zskp 2 1911399616 205669791 3786 481.34 gob-stream zskp 2 1911399616 205669791 3786 481.34
gob-stream zskp 3 1911399616 185792019 9324 195.48 gob-stream zskp 3 1911399616 185792019 9324 195.48
gob-stream zskp 4 1911399616 171537212 32113 56.76
gob-stream zstd 1 1911399616 249810424 2637 691.26 gob-stream zstd 1 1911399616 249810424 2637 691.26
gob-stream zstd 3 1911399616 208192146 3490 522.31 gob-stream zstd 3 1911399616 208192146 3490 522.31
gob-stream zstd 6 1911399616 193632038 6687 272.56 gob-stream zstd 6 1911399616 193632038 6687 272.56
gob-stream zstd 9 1911399616 177620386 16175 112.70
gob-stream gzstd 1 1911399616 357382641 10251 177.82 gob-stream gzstd 1 1911399616 357382641 10251 177.82
gob-stream gzkp 1 1911399616 362156523 5695 320.08 gob-stream gzkp 1 1911399616 362156523 5695 320.08
@ -185,9 +188,11 @@ file out level insize outsize millis mb/s
enwik9 zskp 1 1000000000 343848582 3609 264.18 enwik9 zskp 1 1000000000 343848582 3609 264.18
enwik9 zskp 2 1000000000 317276632 5746 165.97 enwik9 zskp 2 1000000000 317276632 5746 165.97
enwik9 zskp 3 1000000000 294540704 11725 81.34 enwik9 zskp 3 1000000000 294540704 11725 81.34
enwik9 zskp 4 1000000000 276609671 44029 21.66
enwik9 zstd 1 1000000000 358072021 3110 306.65 enwik9 zstd 1 1000000000 358072021 3110 306.65
enwik9 zstd 3 1000000000 313734672 4784 199.35 enwik9 zstd 3 1000000000 313734672 4784 199.35
enwik9 zstd 6 1000000000 295138875 10290 92.68 enwik9 zstd 6 1000000000 295138875 10290 92.68
enwik9 zstd 9 1000000000 278348700 28549 33.40
enwik9 gzstd 1 1000000000 382578136 9604 99.30 enwik9 gzstd 1 1000000000 382578136 9604 99.30
enwik9 gzkp 1 1000000000 383825945 6544 145.73 enwik9 gzkp 1 1000000000 383825945 6544 145.73
@ -198,9 +203,11 @@ file out level insize outsize millis mb/s
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54 github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54
github-june-2days-2019.json zskp 4 6273951764 512796117 97791 61.18
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79 github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03 github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03
@ -211,9 +218,11 @@ file out level insize outsize millis mb/s
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75 rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75
rawstudio-mint14.tar zskp 4 8558382592 3027332295 486243 16.79
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40 rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49 rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49
@ -224,9 +233,11 @@ file out level insize outsize millis mb/s
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53 nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53
nyc-taxi-data-10M.csv zskp 4 3325605752 495986829 89368 35.49
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83 nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53 nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53
``` ```

View file

@ -613,7 +613,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
// Decode treeless literal block. // Decode treeless literal block.
if litType == literalsBlockTreeless { if litType == literalsBlockTreeless {
// TODO: We could send the history early WITHOUT the stream history. // TODO: We could send the history early WITHOUT the stream history.
// This would allow decoding treeless literials before the byte history is available. // This would allow decoding treeless literals before the byte history is available.
// Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless. // Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless.
// So not much obvious gain here. // So not much obvious gain here.

View file

@ -76,6 +76,7 @@ func (b *blockEnc) reset(prev *blockEnc) {
if prev != nil { if prev != nil {
b.recentOffsets = prev.prevRecentOffsets b.recentOffsets = prev.prevRecentOffsets
} }
b.dictLitEnc = nil
} }
// reset will reset the block for a new encode, but in the same stream, // reset will reset the block for a new encode, but in the same stream,

View file

@ -0,0 +1,202 @@
// Copyright 2020+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package zstd
import (
"bytes"
"errors"
"io"
)
// HeaderMaxSize is the maximum size of a Frame and Block Header.
// If less is sent to Header.Decode it *may* still contain enough information.
const HeaderMaxSize = 14 + 3
// Header contains information about the first frame and block within that.
type Header struct {
// Window Size the window of data to keep while decoding.
// Will only be set if HasFCS is false.
WindowSize uint64
// Frame content size.
// Expected size of the entire frame.
FrameContentSize uint64
// Dictionary ID.
// If 0, no dictionary.
DictionaryID uint32
// First block information.
FirstBlock struct {
// OK will be set if first block could be decoded.
OK bool
// Is this the last block of a frame?
Last bool
// Is the data compressed?
// If true CompressedSize will be populated.
// Unfortunately DecompressedSize cannot be determined
// without decoding the blocks.
Compressed bool
// DecompressedSize is the expected decompressed size of the block.
// Will be 0 if it cannot be determined.
DecompressedSize int
// CompressedSize of the data in the block.
// Does not include the block header.
// Will be equal to DecompressedSize if not Compressed.
CompressedSize int
}
// Skippable will be true if the frame is meant to be skipped.
// No other information will be populated.
Skippable bool
// If set there is a checksum present for the block content.
HasCheckSum bool
// If this is true FrameContentSize will have a valid value
HasFCS bool
SingleSegment bool
}
// Decode the header from the beginning of the stream.
// This will decode the frame header and the first block header if enough bytes are provided.
// It is recommended to provide at least HeaderMaxSize bytes.
// If the frame header cannot be read an error will be returned.
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error {
if len(in) < 4 {
return io.ErrUnexpectedEOF
}
b, in := in[:4], in[4:]
if !bytes.Equal(b, frameMagic) {
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch
}
*h = Header{Skippable: true}
return nil
}
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
// Clear output
*h = Header{}
fhd, in := in[0], in[1:]
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("Reserved bit set on frame header")
}
// Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if !h.SingleSegment {
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
var wd byte
wd, in = in[0], in[1:]
windowLog := 10 + (wd >> 3)
windowBase := uint64(1) << windowLog
windowAdd := (windowBase / 8) * uint64(wd&0x7)
h.WindowSize = windowBase + windowAdd
}
// Read Dictionary_ID
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
if size := fhd & 3; size != 0 {
if size == 3 {
size = 4
}
if len(in) < int(size) {
return io.ErrUnexpectedEOF
}
b, in = in[:size], in[size:]
if b == nil {
return io.ErrUnexpectedEOF
}
switch size {
case 1:
h.DictionaryID = uint32(b[0])
case 2:
h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
case 4:
h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
}
}
// Read Frame_Content_Size
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
var fcsSize int
v := fhd >> 6
switch v {
case 0:
if h.SingleSegment {
fcsSize = 1
}
default:
fcsSize = 1 << v
}
if fcsSize > 0 {
h.HasFCS = true
if len(in) < fcsSize {
return io.ErrUnexpectedEOF
}
b, in = in[:fcsSize], in[fcsSize:]
if b == nil {
return io.ErrUnexpectedEOF
}
switch fcsSize {
case 1:
h.FrameContentSize = uint64(b[0])
case 2:
// When FCS_Field_Size is 2, the offset of 256 is added.
h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
case 4:
h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
case 8:
d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
h.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
}
}
// Frame Header done, we will not fail from now on.
if len(in) < 3 {
return nil
}
tmp, in := in[:3], in[3:]
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
h.FirstBlock.Last = bh&1 != 0
blockType := blockType((bh >> 1) & 3)
// find size.
cSize := int(bh >> 3)
switch blockType {
case blockTypeReserved:
return nil
case blockTypeRLE:
h.FirstBlock.Compressed = true
h.FirstBlock.DecompressedSize = cSize
h.FirstBlock.CompressedSize = 1
case blockTypeCompressed:
h.FirstBlock.Compressed = true
h.FirstBlock.CompressedSize = cSize
case blockTypeRaw:
h.FirstBlock.DecompressedSize = cSize
h.FirstBlock.CompressedSize = cSize
default:
panic("Invalid block type")
}
h.FirstBlock.OK = true
return nil
}

484
vendor/github.com/klauspost/compress/zstd/enc_best.go generated vendored Normal file
View file

@ -0,0 +1,484 @@
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
// Based on work by Yann Collet, released under BSD License.
package zstd
import (
"fmt"
"math/bits"
)
const (
bestLongTableBits = 20 // Bits used in the long match table
bestLongTableSize = 1 << bestLongTableBits // Size of the table
// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
// long match table and match offsets are quite big.
// This greatly depends on the type of input.
bestShortTableBits = 16 // Bits used in the short match table
bestShortTableSize = 1 << bestShortTableBits // Size of the table
)
// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
// The long match table contains the previous entry with the same hash,
// effectively making it a "chain" of length 2.
// When we find a long match we choose between the two values and select the longest.
// When we find a short match, after checking the long, we check if we can find a long at n+1
// and that it is longer (lazy matching).
type bestFastEncoder struct {
fastBase
table [bestShortTableSize]prevEntry
longTable [bestLongTableSize]prevEntry
dictTable []prevEntry
dictLongTable []prevEntry
}
// Encode improves compression...
func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
const (
// Input margin is the number of bytes we read (8)
// and the maximum we will read ahead (2)
inputMargin = 8 + 4
minNonLiteralBlockSize = 16
)
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = prevEntry{}
}
for i := range e.longTable[:] {
e.longTable[i] = prevEntry{}
}
e.cur = e.maxMatchOff
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
for i := range e.table[:] {
v := e.table[i].offset
v2 := e.table[i].prev
if v < minOff {
v = 0
v2 = 0
} else {
v = v - e.cur + e.maxMatchOff
if v2 < minOff {
v2 = 0
} else {
v2 = v2 - e.cur + e.maxMatchOff
}
}
e.table[i] = prevEntry{
offset: v,
prev: v2,
}
}
for i := range e.longTable[:] {
v := e.longTable[i].offset
v2 := e.longTable[i].prev
if v < minOff {
v = 0
v2 = 0
} else {
v = v - e.cur + e.maxMatchOff
if v2 < minOff {
v2 = 0
} else {
v2 = v2 - e.cur + e.maxMatchOff
}
}
e.longTable[i] = prevEntry{
offset: v,
prev: v2,
}
}
e.cur = e.maxMatchOff
break
}
s := e.addBlock(src)
blk.size = len(src)
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
copy(blk.literals, src)
return
}
// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
const kSearchStrength = 12
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
offset2 := int32(blk.recentOffsets[1])
offset3 := int32(blk.recentOffsets[2])
addLiterals := func(s *seq, until int32) {
if until == nextEmit {
return
}
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
_ = addLiterals
if debug {
println("recent offsets:", blk.recentOffsets)
}
encodeLoop:
for {
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
if debugAsserts && canRepeat && offset1 == 0 {
panic("offset0 was 0")
}
type match struct {
offset int32
s int32
length int32
rep int32
}
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{offset: offset, s: s}
}
return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
}
bestOf := func(a, b match) match {
aScore := b.s - a.s + a.length
bScore := a.s - b.s + b.length
if a.rep < 0 {
aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
}
if b.rep < 0 {
bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
}
if aScore >= bScore {
return a
}
return b
}
const goodEnough = 100
nextHashL := hash8(cv, bestLongTableBits)
nextHashS := hash4x64(cv, bestShortTableBits)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
if canRepeat && best.length < goodEnough {
best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
}
// Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
// Look far ahead, unless we have a really long match already...
if best.length < goodEnough {
// No match found, move forward on input, no need to check forward...
if best.length < 4 {
s += 1 + (s-nextEmit)>>(kSearchStrength-1)
if s >= sLimit {
break encodeLoop
}
cv = load6432(src, s)
continue
}
s++
candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
cv = load6432(src, s)
cv2 := load6432(src, s+1)
candidateL = e.longTable[hash8(cv, bestLongTableBits)]
candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
}
// We have a match, we can store the forward value
if best.rep > 0 {
s = best.s
var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := best.s
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
repIndex := best.offset
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 0
seq.offset = uint32(best.rep)
if debugSequences {
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
index0 := s
s = best.s + best.length
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, best.length)
}
break encodeLoop
}
// Index skipped...
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
off++
index0++
}
switch best.rep {
case 2:
offset1, offset2 = offset2, offset1
case 3:
offset1, offset2, offset3 = offset3, offset1, offset2
}
cv = load6432(src, s)
continue
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
s = best.s
t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && canRepeat && int(offset1) > len(src) {
panic("invalid offset")
}
// Extend the n-byte match as long as possible.
l := best.length
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
l++
}
// Write our sequence
var seq seq
seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 {
blk.literals = append(blk.literals, src[nextEmit:s]...)
}
seq.offset = uint32(s-t) + 3
s += l
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
// every entry
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++
}
cv = load6432(src, s)
if !canRepeat {
continue
}
// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
// Store this, since we have it.
nextHashS := hash4x64(cv, bestShortTableBits)
nextHashL := hash8(cv, bestLongTableBits)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
seq.offset = 1
s += l
nextEmit = s
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Swap offset 1 and 2.
offset1, offset2 = offset2, offset1
if s >= sLimit {
// Finished
break encodeLoop
}
cv = load6432(src, s)
}
}
if int(nextEmit) < len(src) {
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
blk.recentOffsets[2] = uint32(offset3)
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
// EncodeNoHist will encode a block with no history and no following blocks.
// Most notable difference is that src will not be copied for history and
// we do not need to check for max match length.
func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
e.Encode(blk, src)
}
// ResetDict will reset and set a dictionary if not nil
func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d == nil {
return
}
// Init or copy dict table
if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
if len(e.dictTable) != len(e.table) {
e.dictTable = make([]prevEntry, len(e.table))
}
end := int32(len(d.content)) - 8 + e.maxMatchOff
for i := e.maxMatchOff; i < end; i += 4 {
const hashLog = bestShortTableBits
cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash4x64(cv, hashLog) // 0 -> 4
nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5
nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
e.dictTable[nextHash] = prevEntry{
prev: e.dictTable[nextHash].offset,
offset: i,
}
e.dictTable[nextHash1] = prevEntry{
prev: e.dictTable[nextHash1].offset,
offset: i + 1,
}
e.dictTable[nextHash2] = prevEntry{
prev: e.dictTable[nextHash2].offset,
offset: i + 2,
}
e.dictTable[nextHash3] = prevEntry{
prev: e.dictTable[nextHash3].offset,
offset: i + 3,
}
}
e.lastDictID = d.id
}
// Init or copy dict table
if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
if len(e.dictLongTable) != len(e.longTable) {
e.dictLongTable = make([]prevEntry, len(e.longTable))
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
h := hash8(cv, bestLongTableBits)
e.dictLongTable[h] = prevEntry{
offset: e.maxMatchOff,
prev: e.dictLongTable[h].offset,
}
end := int32(len(d.content)) - 8 + e.maxMatchOff
off := 8 // First to read
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[off]) << 56)
h := hash8(cv, bestLongTableBits)
e.dictLongTable[h] = prevEntry{
offset: i,
prev: e.dictLongTable[h].offset,
}
off++
}
}
e.lastDictID = d.id
}
// Reset table to initial state
copy(e.longTable[:], e.dictLongTable)
e.cur = e.maxMatchOff
// Reset table to initial state
copy(e.table[:], e.dictTable)
}

View file

@ -47,6 +47,8 @@ func (o encoderOptions) encoder() encoder {
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}}
case SpeedBetterCompression: case SpeedBetterCompression:
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
case SpeedBestCompression:
return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
case SpeedFastest: case SpeedFastest:
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
} }
@ -143,20 +145,20 @@ const (
// By using this, notice that CPU usage may go up in the future. // By using this, notice that CPU usage may go up in the future.
SpeedBetterCompression SpeedBetterCompression
// SpeedBestCompression will choose the best available compression option.
// This will offer the best compression no matter the CPU cost.
SpeedBestCompression
// speedLast should be kept as the last actual compression option. // speedLast should be kept as the last actual compression option.
// The is not for external usage, but is used to keep track of the valid options. // The is not for external usage, but is used to keep track of the valid options.
speedLast speedLast
// SpeedBestCompression will choose the best available compression option.
// For now this is not implemented.
SpeedBestCompression = SpeedBetterCompression
) )
// EncoderLevelFromString will convert a string representation of an encoding level back // EncoderLevelFromString will convert a string representation of an encoding level back
// to a compression level. The compare is not case sensitive. // to a compression level. The compare is not case sensitive.
// If the string wasn't recognized, (false, SpeedDefault) will be returned. // If the string wasn't recognized, (false, SpeedDefault) will be returned.
func EncoderLevelFromString(s string) (bool, EncoderLevel) { func EncoderLevelFromString(s string) (bool, EncoderLevel) {
for l := EncoderLevel(speedNotSet + 1); l < speedLast; l++ { for l := speedNotSet + 1; l < speedLast; l++ {
if strings.EqualFold(s, l.String()) { if strings.EqualFold(s, l.String()) {
return true, l return true, l
} }
@ -173,7 +175,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel {
return SpeedFastest return SpeedFastest
case level >= 3 && level < 6: case level >= 3 && level < 6:
return SpeedDefault return SpeedDefault
case level > 5: case level >= 6 && level < 10:
return SpeedBetterCompression
case level >= 10:
return SpeedBetterCompression return SpeedBetterCompression
} }
return SpeedDefault return SpeedDefault
@ -188,6 +192,8 @@ func (e EncoderLevel) String() string {
return "default" return "default"
case SpeedBetterCompression: case SpeedBetterCompression:
return "better" return "better"
case SpeedBestCompression:
return "best"
default: default:
return "invalid" return "invalid"
} }
@ -209,6 +215,8 @@ func WithEncoderLevel(l EncoderLevel) EOption {
o.windowSize = 8 << 20 o.windowSize = 8 << 20
case SpeedBetterCompression: case SpeedBetterCompression:
o.windowSize = 16 << 20 o.windowSize = 16 << 20
case SpeedBestCompression:
o.windowSize = 32 << 20
} }
} }
if !o.customALEntropy { if !o.customALEntropy {

2
vendor/modules.txt vendored
View file

@ -88,7 +88,7 @@ github.com/jmespath/go-jmespath
github.com/jstemmer/go-junit-report github.com/jstemmer/go-junit-report
github.com/jstemmer/go-junit-report/formatter github.com/jstemmer/go-junit-report/formatter
github.com/jstemmer/go-junit-report/parser github.com/jstemmer/go-junit-report/parser
# github.com/klauspost/compress v1.11.3 # github.com/klauspost/compress v1.11.4
github.com/klauspost/compress/flate github.com/klauspost/compress/flate
github.com/klauspost/compress/fse github.com/klauspost/compress/fse
github.com/klauspost/compress/gzip github.com/klauspost/compress/gzip