vendor: make vendor-update

This commit is contained in:
Aliaksandr Valialkin 2022-05-31 12:57:04 +03:00
parent a1add5c2c7
commit b6af13ae94
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
30 changed files with 1039 additions and 866 deletions

8
go.mod
View file

@ -11,7 +11,7 @@ require (
github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.18.1
github.com/VictoriaMetrics/metricsql v0.43.0
github.com/aws/aws-sdk-go v1.44.22
github.com/aws/aws-sdk-go v1.44.24
github.com/cespare/xxhash/v2 v2.1.2
// TODO: switch back to https://github.com/cheggaaa/pb/v3 when v3-pooling branch
@ -20,7 +20,7 @@ require (
github.com/dmitryk-dk/pb/v3 v3.0.9
github.com/golang/snappy v0.0.4
github.com/influxdata/influxdb v1.9.7
github.com/klauspost/compress v1.15.4
github.com/klauspost/compress v1.15.5
github.com/prometheus/prometheus v1.8.2-0.20201119142752-3ad25a6dc3d9
github.com/urfave/cli/v2 v2.8.1
github.com/valyala/fastjson v1.6.3
@ -28,7 +28,7 @@ require (
github.com/valyala/fasttemplate v1.2.1
github.com/valyala/gozstd v1.17.0
github.com/valyala/quicktemplate v1.7.0
golang.org/x/net v0.0.0-20220524220425-1d687d428aca
golang.org/x/net v0.0.0-20220526153639-5463443f8c37
golang.org/x/oauth2 v0.0.0-20220524215830-622c5d57e401
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a
google.golang.org/api v0.81.0
@ -75,7 +75,7 @@ require (
golang.org/x/text v0.3.7 // indirect
golang.org/x/xerrors v0.0.0-20220517211312-f3a8303e98df // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20220525015930-6ca3db687a9d // indirect
google.golang.org/genproto v0.0.0-20220527130721-00d5c0f3be58 // indirect
google.golang.org/grpc v1.46.2 // indirect
google.golang.org/protobuf v1.28.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect

16
go.sum
View file

@ -142,8 +142,8 @@ github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQ
github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48=
github.com/aws/aws-sdk-go v1.35.31/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
github.com/aws/aws-sdk-go v1.44.22 h1:StP+vxaFzl445mSML6KzgiTcqpA+eVwbO5fMNvhVN7c=
github.com/aws/aws-sdk-go v1.44.22/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
github.com/aws/aws-sdk-go v1.44.24 h1:3nOkwJBJLiGBmJKWp3z0utyXuBkxyGkRRwWjrTItJaY=
github.com/aws/aws-sdk-go v1.44.24/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
@ -566,8 +566,8 @@ github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.15.4 h1:1kn4/7MepF/CHmYub99/nNX8az0IJjfSOU/jbnTVfqQ=
github.com/klauspost/compress v1.15.4/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/compress v1.15.5 h1:qyCLMz2JCrKADihKOh9FxnW3houKeNsp2h5OEz0QSEA=
github.com/klauspost/compress v1.15.5/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
@ -993,8 +993,8 @@ golang.org/x/net v0.0.0-20220325170049-de3da57026de/go.mod h1:CfG3xpIq0wQ8r1q4Su
golang.org/x/net v0.0.0-20220412020605-290c469a71a5/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220520000938-2e3eb7b945c2/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220524220425-1d687d428aca h1:xTaFYiPROfpPhqrfTIDXj0ri1SpfueYT951s4bAuDO8=
golang.org/x/net v0.0.0-20220524220425-1d687d428aca/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220526153639-5463443f8c37 h1:lUkvobShwKsOesNfWWlCS5q7fnbG1MEliIzwu886fn8=
golang.org/x/net v0.0.0-20220526153639-5463443f8c37/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -1358,8 +1358,8 @@ google.golang.org/genproto v0.0.0-20220505152158-f39f71e6c8f3/go.mod h1:RAyBrSAP
google.golang.org/genproto v0.0.0-20220518221133-4f43b3371335/go.mod h1:RAyBrSAP7Fh3Nc84ghnVLDPuV51xc9agzmm4Ph6i0Q4=
google.golang.org/genproto v0.0.0-20220519153652-3a47de7e79bd/go.mod h1:RAyBrSAP7Fh3Nc84ghnVLDPuV51xc9agzmm4Ph6i0Q4=
google.golang.org/genproto v0.0.0-20220523171625-347a074981d8/go.mod h1:RAyBrSAP7Fh3Nc84ghnVLDPuV51xc9agzmm4Ph6i0Q4=
google.golang.org/genproto v0.0.0-20220525015930-6ca3db687a9d h1:8BnRR08DxAQ+e2pFx64Q3Ltg/AkrrxyG1LLa1WpomyA=
google.golang.org/genproto v0.0.0-20220525015930-6ca3db687a9d/go.mod h1:yKyY4AMRwFiC8yMMNaMi+RkCnjZJt9LoWuvhXjMs+To=
google.golang.org/genproto v0.0.0-20220527130721-00d5c0f3be58 h1:a221mAAEAzq4Lz6ZWRkcS8ptb2mxoxYSt4N68aRyQHM=
google.golang.org/genproto v0.0.0-20220527130721-00d5c0f3be58/go.mod h1:yKyY4AMRwFiC8yMMNaMi+RkCnjZJt9LoWuvhXjMs+To=
google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM=

View file

@ -3046,6 +3046,73 @@ var awsPartition = partition{
}: endpoint{},
},
},
"backup-gateway": service{
Endpoints: serviceEndpoints{
endpointKey{
Region: "af-south-1",
}: endpoint{},
endpointKey{
Region: "ap-east-1",
}: endpoint{},
endpointKey{
Region: "ap-northeast-1",
}: endpoint{},
endpointKey{
Region: "ap-northeast-2",
}: endpoint{},
endpointKey{
Region: "ap-northeast-3",
}: endpoint{},
endpointKey{
Region: "ap-south-1",
}: endpoint{},
endpointKey{
Region: "ap-southeast-1",
}: endpoint{},
endpointKey{
Region: "ap-southeast-2",
}: endpoint{},
endpointKey{
Region: "ca-central-1",
}: endpoint{},
endpointKey{
Region: "eu-central-1",
}: endpoint{},
endpointKey{
Region: "eu-north-1",
}: endpoint{},
endpointKey{
Region: "eu-south-1",
}: endpoint{},
endpointKey{
Region: "eu-west-1",
}: endpoint{},
endpointKey{
Region: "eu-west-2",
}: endpoint{},
endpointKey{
Region: "eu-west-3",
}: endpoint{},
endpointKey{
Region: "me-south-1",
}: endpoint{},
endpointKey{
Region: "sa-east-1",
}: endpoint{},
endpointKey{
Region: "us-east-1",
}: endpoint{},
endpointKey{
Region: "us-east-2",
}: endpoint{},
endpointKey{
Region: "us-west-1",
}: endpoint{},
endpointKey{
Region: "us-west-2",
}: endpoint{},
},
},
"batch": service{
Defaults: endpointDefaults{
defaultKey{}: endpoint{},
@ -3080,6 +3147,9 @@ var awsPartition = partition{
endpointKey{
Region: "ap-southeast-2",
}: endpoint{},
endpointKey{
Region: "ap-southeast-3",
}: endpoint{},
endpointKey{
Region: "ca-central-1",
}: endpoint{},
@ -8533,6 +8603,52 @@ var awsPartition = partition{
},
},
},
"emr-serverless": service{
Endpoints: serviceEndpoints{
endpointKey{
Region: "ap-northeast-1",
}: endpoint{},
endpointKey{
Region: "eu-west-1",
}: endpoint{},
endpointKey{
Region: "fips-us-east-1",
}: endpoint{
Hostname: "emr-serverless-fips.us-east-1.amazonaws.com",
CredentialScope: credentialScope{
Region: "us-east-1",
},
Deprecated: boxedTrue,
},
endpointKey{
Region: "fips-us-west-2",
}: endpoint{
Hostname: "emr-serverless-fips.us-west-2.amazonaws.com",
CredentialScope: credentialScope{
Region: "us-west-2",
},
Deprecated: boxedTrue,
},
endpointKey{
Region: "us-east-1",
}: endpoint{},
endpointKey{
Region: "us-east-1",
Variant: fipsVariant,
}: endpoint{
Hostname: "emr-serverless-fips.us-east-1.amazonaws.com",
},
endpointKey{
Region: "us-west-2",
}: endpoint{},
endpointKey{
Region: "us-west-2",
Variant: fipsVariant,
}: endpoint{
Hostname: "emr-serverless-fips.us-west-2.amazonaws.com",
},
},
},
"entitlement.marketplace": service{
Defaults: endpointDefaults{
defaultKey{}: endpoint{
@ -10547,14 +10663,6 @@ var awsPartition = partition{
},
},
"health": service{
PartitionEndpoint: "aws-global",
IsRegionalized: boxedFalse,
Defaults: endpointDefaults{
defaultKey{}: endpoint{
SSLCommonName: "health.us-east-1.amazonaws.com",
Protocols: []string{"https"},
},
},
Endpoints: serviceEndpoints{
endpointKey{
Region: "fips-us-east-2",
@ -14800,6 +14908,14 @@ var awsPartition = partition{
Region: "ap-northeast-2",
},
},
endpointKey{
Region: "ap-northeast-3",
}: endpoint{
Hostname: "oidc.ap-northeast-3.amazonaws.com",
CredentialScope: credentialScope{
Region: "ap-northeast-3",
},
},
endpointKey{
Region: "ap-south-1",
}: endpoint{
@ -15495,7 +15611,12 @@ var awsPartition = partition{
},
endpointKey{
Region: "ap-northeast-3",
}: endpoint{},
}: endpoint{
Hostname: "portal.sso.ap-northeast-3.amazonaws.com",
CredentialScope: credentialScope{
Region: "ap-northeast-3",
},
},
endpointKey{
Region: "ap-south-1",
}: endpoint{
@ -25134,6 +25255,16 @@ var awsusgovPartition = partition{
}: endpoint{},
},
},
"backup-gateway": service{
Endpoints: serviceEndpoints{
endpointKey{
Region: "us-gov-east-1",
}: endpoint{},
endpointKey{
Region: "us-gov-west-1",
}: endpoint{},
},
},
"batch": service{
Defaults: endpointDefaults{
defaultKey{}: endpoint{},

View file

@ -5,4 +5,4 @@ package aws
const SDKName = "aws-sdk-go"
// SDKVersion is the version of this SDK
const SDKVersion = "1.44.22"
const SDKVersion = "1.44.24"

View file

@ -17,6 +17,12 @@ This package provides various compression algorithms.
# changelog
* May 11, 2022 (v1.15.4)
* huff0: decompress directly into output by @WojciechMula in [#577](https://github.com/klauspost/compress/pull/577)
* inflate: Keep dict on stack [#581](https://github.com/klauspost/compress/pull/581)
* zstd: Faster decoding memcopy in asm [#583](https://github.com/klauspost/compress/pull/583)
* zstd: Fix ignored crc [#580](https://github.com/klauspost/compress/pull/580)
* May 5, 2022 (v1.15.3)
* zstd: Allow to ignore checksum checking by @WojciechMula [#572](https://github.com/klauspost/compress/pull/572)
* s2: Fix incorrect seek for io.SeekEnd in [#575](https://github.com/klauspost/compress/pull/575)

View file

@ -84,24 +84,23 @@ type advancedState struct {
length int
offset int
maxInsertIndex int
// Input hash chains
// hashHead[hashValue] contains the largest inputIndex with the specified hash value
// If hashHead[hashValue] is within the current window, then
// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
// with the same hash value.
chainHead int
hashHead [hashSize]uint32
hashPrev [windowSize]uint32
hashOffset int
ii uint16 // position of last match, intended to overflow to reset.
// input window: unprocessed data is window[index:windowEnd]
index int
estBitsPerByte int
hashMatch [maxMatchLength + minMatchLength]uint32
hash uint32
ii uint16 // position of last match, intended to overflow to reset.
// Input hash chains
// hashHead[hashValue] contains the largest inputIndex with the specified hash value
// If hashHead[hashValue] is within the current window, then
// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
// with the same hash value.
hashHead [hashSize]uint32
hashPrev [windowSize]uint32
}
type compressor struct {
@ -259,7 +258,6 @@ func (d *compressor) fillWindow(b []byte) {
// Set the head of the hash chain to us.
s.hashHead[newH] = uint32(di + s.hashOffset)
}
s.hash = newH
}
// Update window information.
d.windowEnd += n
@ -403,7 +401,6 @@ func (d *compressor) initDeflate() {
s.hashOffset = 1
s.length = minMatchLength - 1
s.offset = 0
s.hash = 0
s.chainHead = -1
}
@ -432,9 +429,6 @@ func (d *compressor) deflateLazy() {
}
s.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
if s.index < s.maxInsertIndex {
s.hash = hash4(d.window[s.index:])
}
for {
if sanity && s.index > d.windowEnd {
@ -466,11 +460,11 @@ func (d *compressor) deflateLazy() {
}
if s.index < s.maxInsertIndex {
// Update the hash
s.hash = hash4(d.window[s.index:])
ch := s.hashHead[s.hash&hashMask]
hash := hash4(d.window[s.index:])
ch := s.hashHead[hash]
s.chainHead = int(ch)
s.hashPrev[s.index&windowMask] = ch
s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset)
s.hashHead[hash] = uint32(s.index + s.hashOffset)
}
prevLength := s.length
prevOffset := s.offset
@ -503,7 +497,7 @@ func (d *compressor) deflateLazy() {
end += prevIndex
idx := prevIndex + prevLength - (4 - checkOff)
h := hash4(d.window[idx:])
ch2 := int(s.hashHead[h&hashMask]) - s.hashOffset - prevLength + (4 - checkOff)
ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + (4 - checkOff)
if ch2 > minIndex {
length := matchLen(d.window[prevIndex:end], d.window[ch2:])
// It seems like a pure length metric is best.
@ -547,7 +541,6 @@ func (d *compressor) deflateLazy() {
// Set the head of the hash chain to us.
s.hashHead[newH] = uint32(di + s.hashOffset)
}
s.hash = newH
}
s.index = newIndex
@ -793,7 +786,6 @@ func (d *compressor) reset(w io.Writer) {
d.tokens.Reset()
s.length = minMatchLength - 1
s.offset = 0
s.hash = 0
s.ii = 0
s.maxInsertIndex = 0
}

View file

@ -117,7 +117,7 @@ func (e *fastGen) addBlock(src []byte) int32 {
// hash4 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4u(u uint32, h uint8) uint32 {
return (u * prime4bytes) >> ((32 - h) & reg8SizeMask32)
return (u * prime4bytes) >> (32 - h)
}
type tableEntryPrev struct {

View file

@ -165,11 +165,6 @@ func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
return uint16(b.value >> ((64 - n) & 63))
}
// peekTopBits(n) is equvialent to peekBitFast(64 - n)
func (b *bitReaderShifted) peekTopBits(n uint8) uint16 {
return uint16(b.value >> n)
}
func (b *bitReaderShifted) advance(n uint8) {
b.bitsRead += n
b.value <<= n & 63
@ -220,11 +215,6 @@ func (b *bitReaderShifted) fill() {
}
}
// finished returns true if all bits have been read from the bit stream.
func (b *bitReaderShifted) finished() bool {
return b.off == 0 && b.bitsRead >= 64
}
func (b *bitReaderShifted) remaining() uint {
return b.off*8 + uint(64-b.bitsRead)
}

View file

@ -5,8 +5,6 @@
package huff0
import "fmt"
// bitWriter will write bits.
// First bit will be LSB of the first byte of output.
type bitWriter struct {
@ -23,14 +21,6 @@ var bitMask16 = [32]uint16{
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
0xFFFF, 0xFFFF} /* up to 16 bits */
// addBits16NC will add up to 16 bits.
// It will not check if there is space for them,
// so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
b.nBits += bits
}
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
@ -70,104 +60,6 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
b.nBits += encA.nBits + encB.nBits
}
// addBits16ZeroNC will add up to 16 bits.
// It will not check if there is space for them,
// so the caller must ensure that it has flushed recently.
// This is fastest if bits can be zero.
func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
if bits == 0 {
return
}
value <<= (16 - bits) & 15
value >>= (16 - bits) & 15
b.bitContainer |= uint64(value) << (b.nBits & 63)
b.nBits += bits
}
// flush will flush all pending full bytes.
// There will be at least 56 bits available for writing when this has been called.
// Using flush32 is faster, but leaves less space for writing.
func (b *bitWriter) flush() {
v := b.nBits >> 3
switch v {
case 0:
return
case 1:
b.out = append(b.out,
byte(b.bitContainer),
)
b.bitContainer >>= 1 << 3
case 2:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
)
b.bitContainer >>= 2 << 3
case 3:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
)
b.bitContainer >>= 3 << 3
case 4:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
)
b.bitContainer >>= 4 << 3
case 5:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
)
b.bitContainer >>= 5 << 3
case 6:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
)
b.bitContainer >>= 6 << 3
case 7:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
byte(b.bitContainer>>48),
)
b.bitContainer >>= 7 << 3
case 8:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
byte(b.bitContainer>>48),
byte(b.bitContainer>>56),
)
b.bitContainer = 0
b.nBits = 0
return
default:
panic(fmt.Errorf("bits (%d) > 64", b.nBits))
}
b.nBits &= 7
}
// flush32 will flush out, so there are at least 32 bits available for writing.
func (b *bitWriter) flush32() {
if b.nBits < 32 {
@ -201,10 +93,3 @@ func (b *bitWriter) close() error {
b.flushAlign()
return nil
}
// reset and continue writing by appending to out.
func (b *bitWriter) reset(out []byte) {
b.bitContainer = 0
b.nBits = 0
b.out = out
}

View file

@ -20,11 +20,6 @@ func (b *byteReader) init(in []byte) {
b.off = 0
}
// advance the stream b n bytes.
func (b *byteReader) advance(n uint) {
b.off += int(n)
}
// Int32 returns a little endian int32 starting at current offset.
func (b byteReader) Int32() int32 {
v3 := int32(b.b[b.off+3])
@ -43,11 +38,6 @@ func (b byteReader) Uint32() uint32 {
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
}
// unread returns the unread portion of the input.
func (b byteReader) unread() []byte {
return b.b[b.off:]
}
// remain will return the number of bytes remaining.
func (b byteReader) remain() int {
return len(b.b) - b.off

View file

@ -404,6 +404,7 @@ func (s *Scratch) canUseTable(c cTable) bool {
return true
}
//lint:ignore U1000 used for debugging
func (s *Scratch) validateTable(c cTable) bool {
if len(c) < int(s.symbolLen) {
return false

View file

@ -11,7 +11,6 @@ import (
type dTable struct {
single []dEntrySingle
double []dEntryDouble
}
// single-symbols decoding
@ -19,13 +18,6 @@ type dEntrySingle struct {
entry uint16
}
// double-symbols decoding
type dEntryDouble struct {
seq [4]byte
nBits uint8
len uint8
}
// Uses special code for all tables that are < 8 bits.
const use8BitTables = true
@ -35,7 +27,7 @@ const use8BitTables = true
// If no Scratch is provided a new one is allocated.
// The returned Scratch can be used for encoding or decoding input using this table.
func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
s, err = s.prepare(in)
s, err = s.prepare(nil)
if err != nil {
return s, nil, err
}
@ -236,108 +228,6 @@ func (d *Decoder) buffer() *[4][256]byte {
return &[4][256]byte{}
}
// Decompress1X will decompress a 1X encoded stream.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded")
}
if use8BitTables && d.actualTableLog <= 8 {
return d.decompress1X8Bit(dst, src)
}
var br bitReaderShifted
err := br.init(src)
if err != nil {
return dst, err
}
maxDecodedSize := cap(dst)
dst = dst[:0]
// Avoid bounds check by always having full sized table.
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
dt := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
bufs := d.buffer()
buf := &bufs[0]
var off uint8
for br.off >= 8 {
br.fillFast()
v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
// Refill
br.fillFast()
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
d.bufs.Put(bufs)
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
if len(dst)+int(off) > maxDecodedSize {
d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:off]...)
// br < 8, so uint8 is fine
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
for bitsLeft > 0 {
br.fill()
if false && br.bitsRead >= 32 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value = (br.value << 32) | uint64(low)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value = (br.value << 8) | uint64(br.in[br.off-1])
br.bitsRead -= 8
br.off--
}
}
}
if len(dst) >= maxDecodedSize {
d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= nBits
dst = append(dst, uint8(v.entry>>8))
}
d.bufs.Put(bufs)
return dst, br.close()
}
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
@ -995,7 +885,6 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
const shift = 56
const tlSize = 1 << 8
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.

View file

@ -2,12 +2,14 @@
// +build amd64,!appengine,!noasm,gc
// This file contains the specialisation of Decoder.Decompress4X
// that uses an asm implementation of its main loop.
// and Decoder.Decompress1X that use an asm implementation of thir main loops.
package huff0
import (
"errors"
"fmt"
"github.com/klauspost/compress/internal/cpuinfo"
)
// decompress4x_main_loop_x86 is an x86 assembler implementation
@ -146,3 +148,81 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
}
return dst, nil
}
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress1X when tablelog > 8.
//go:noescape
func decompress1x_main_loop_amd64(ctx *decompress1xContext)
// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation
// of Decompress1X when tablelog > 8.
//go:noescape
func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
type decompress1xContext struct {
pbr *bitReaderShifted
peekBits uint8
out *byte
outCap int
tbl *dEntrySingle
decoded int
}
// Error reported by asm implementations
const error_max_decoded_size_exeeded = -1
// Decompress1X will decompress a 1X encoded stream.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded")
}
var br bitReaderShifted
err := br.init(src)
if err != nil {
return dst, err
}
maxDecodedSize := cap(dst)
dst = dst[:maxDecodedSize]
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
if maxDecodedSize >= 4 {
ctx := decompress1xContext{
pbr: &br,
out: &dst[0],
outCap: maxDecodedSize,
peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
tbl: &d.dt.single[0],
}
if cpuinfo.HasBMI2() {
decompress1x_main_loop_bmi2(&ctx)
} else {
decompress1x_main_loop_amd64(&ctx)
}
if ctx.decoded == error_max_decoded_size_exeeded {
return nil, ErrMaxDecodedSizeExceeded
}
dst = dst[:ctx.decoded]
}
// br < 8, so uint8 is fine
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
for bitsLeft > 0 {
br.fill()
if len(dst) >= maxDecodedSize {
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= nBits
dst = append(dst, uint8(v.entry>>8))
}
return dst, br.close()
}

View file

@ -660,3 +660,206 @@ skip_fill1003:
SHLQ $0x02, DX
MOVQ DX, 64(AX)
RET
// func decompress1x_main_loop_amd64(ctx *decompress1xContext)
TEXT ·decompress1x_main_loop_amd64(SB), $0-8
MOVQ ctx+0(FP), CX
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
JB error_max_decoded_size_exeeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
MOVQ 24(SI), R9
MOVQ 32(SI), R10
MOVBQZX 40(SI), R11
MOVQ 32(CX), SI
MOVBQZX 8(CX), DI
JMP loop_condition
main_loop:
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
JGE error_max_decoded_size_exeeded
// Decode 4 values
CMPQ R11, $0x20
JL bitReader_fillFast_1_end
SUBQ $0x20, R11
SUBQ $0x04, R9
MOVL (R8)(R9*1), R12
MOVQ R11, CX
SHLQ CL, R12
ORQ R12, R10
bitReader_fillFast_1_end:
MOVQ DI, CX
MOVQ R10, R12
SHRQ CL, R12
MOVW (SI)(R12*2), CX
MOVB CH, AL
MOVBQZX CL, CX
ADDQ CX, R11
SHLQ CL, R10
MOVQ DI, CX
MOVQ R10, R12
SHRQ CL, R12
MOVW (SI)(R12*2), CX
MOVB CH, AH
MOVBQZX CL, CX
ADDQ CX, R11
SHLQ CL, R10
BSWAPL AX
CMPQ R11, $0x20
JL bitReader_fillFast_2_end
SUBQ $0x20, R11
SUBQ $0x04, R9
MOVL (R8)(R9*1), R12
MOVQ R11, CX
SHLQ CL, R12
ORQ R12, R10
bitReader_fillFast_2_end:
MOVQ DI, CX
MOVQ R10, R12
SHRQ CL, R12
MOVW (SI)(R12*2), CX
MOVB CH, AH
MOVBQZX CL, CX
ADDQ CX, R11
SHLQ CL, R10
MOVQ DI, CX
MOVQ R10, R12
SHRQ CL, R12
MOVW (SI)(R12*2), CX
MOVB CH, AL
MOVBQZX CL, CX
ADDQ CX, R11
SHLQ CL, R10
BSWAPL AX
// Store the decoded values
MOVL AX, (DX)
ADDQ $0x04, DX
loop_condition:
CMPQ R9, $0x08
JGE main_loop
// Update ctx structure
MOVQ ctx+0(FP), AX
MOVQ DX, CX
MOVQ 16(AX), DX
SUBQ DX, CX
MOVQ CX, 40(AX)
MOVQ (AX), AX
MOVQ R9, 24(AX)
MOVQ R10, 32(AX)
MOVB R11, 40(AX)
RET
// Report error
error_max_decoded_size_exeeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
RET
// func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
// Requires: BMI2
TEXT ·decompress1x_main_loop_bmi2(SB), $0-8
MOVQ ctx+0(FP), CX
MOVQ 16(CX), DX
MOVQ 24(CX), BX
CMPQ BX, $0x04
JB error_max_decoded_size_exeeded
LEAQ (DX)(BX*1), BX
MOVQ (CX), SI
MOVQ (SI), R8
MOVQ 24(SI), R9
MOVQ 32(SI), R10
MOVBQZX 40(SI), R11
MOVQ 32(CX), SI
MOVBQZX 8(CX), DI
JMP loop_condition
main_loop:
// Check if we have room for 4 bytes in the output buffer
LEAQ 4(DX), CX
CMPQ CX, BX
JGE error_max_decoded_size_exeeded
// Decode 4 values
CMPQ R11, $0x20
JL bitReader_fillFast_1_end
SUBQ $0x20, R11
SUBQ $0x04, R9
MOVL (R8)(R9*1), CX
SHLXQ R11, CX, CX
ORQ CX, R10
bitReader_fillFast_1_end:
SHRXQ DI, R10, CX
MOVW (SI)(CX*2), CX
MOVB CH, AL
MOVBQZX CL, CX
ADDQ CX, R11
SHLXQ CX, R10, R10
SHRXQ DI, R10, CX
MOVW (SI)(CX*2), CX
MOVB CH, AH
MOVBQZX CL, CX
ADDQ CX, R11
SHLXQ CX, R10, R10
BSWAPL AX
CMPQ R11, $0x20
JL bitReader_fillFast_2_end
SUBQ $0x20, R11
SUBQ $0x04, R9
MOVL (R8)(R9*1), CX
SHLXQ R11, CX, CX
ORQ CX, R10
bitReader_fillFast_2_end:
SHRXQ DI, R10, CX
MOVW (SI)(CX*2), CX
MOVB CH, AH
MOVBQZX CL, CX
ADDQ CX, R11
SHLXQ CX, R10, R10
SHRXQ DI, R10, CX
MOVW (SI)(CX*2), CX
MOVB CH, AL
MOVBQZX CL, CX
ADDQ CX, R11
SHLXQ CX, R10, R10
BSWAPL AX
// Store the decoded values
MOVL AX, (DX)
ADDQ $0x04, DX
loop_condition:
CMPQ R9, $0x08
JGE main_loop
// Update ctx structure
MOVQ ctx+0(FP), AX
MOVQ DX, CX
MOVQ 16(AX), DX
SUBQ DX, CX
MOVQ CX, 40(AX)
MOVQ (AX), AX
MOVQ R9, 24(AX)
MOVQ R10, 32(AX)
MOVB R11, 40(AX)
RET
// Report error
error_max_decoded_size_exeeded:
MOVQ ctx+0(FP), AX
MOVQ $-1, CX
MOVQ CX, 40(AX)
RET

View file

@ -191,3 +191,105 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
}
return dst, nil
}
// Decompress1X will decompress a 1X encoded stream.
// The cap of the output buffer will be the maximum decompressed size.
// The length of the supplied input must match the end of a block exactly.
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
if len(d.dt.single) == 0 {
return nil, errors.New("no table loaded")
}
if use8BitTables && d.actualTableLog <= 8 {
return d.decompress1X8Bit(dst, src)
}
var br bitReaderShifted
err := br.init(src)
if err != nil {
return dst, err
}
maxDecodedSize := cap(dst)
dst = dst[:0]
// Avoid bounds check by always having full sized table.
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
dt := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
bufs := d.buffer()
buf := &bufs[0]
var off uint8
for br.off >= 8 {
br.fillFast()
v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
// Refill
br.fillFast()
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
br.close()
d.bufs.Put(bufs)
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:]...)
}
}
if len(dst)+int(off) > maxDecodedSize {
d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
dst = append(dst, buf[:off]...)
// br < 8, so uint8 is fine
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
for bitsLeft > 0 {
br.fill()
if false && br.bitsRead >= 32 {
if br.off >= 4 {
v := br.in[br.off-4:]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
br.value = (br.value << 32) | uint64(low)
br.bitsRead -= 32
br.off -= 4
} else {
for br.off > 0 {
br.value = (br.value << 8) | uint64(br.in[br.off-1])
br.bitsRead -= 8
br.off--
}
}
}
if len(dst) >= maxDecodedSize {
d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= nBits
dst = append(dst, uint8(v.entry>>8))
}
d.bufs.Put(bufs)
return dst, br.close()
}

View file

@ -63,13 +63,6 @@ func (b *bitReader) get32BitsFast(n uint8) uint32 {
return v
}
func (b *bitReader) get16BitsFast(n uint8) uint16 {
const regMask = 64 - 1
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return v
}
// fillFast() will make sure at least 32 bits are available.
// There must be at least 4 bytes available.
func (b *bitReader) fillFast() {

View file

@ -5,8 +5,6 @@
package zstd
import "fmt"
// bitWriter will write bits.
// First bit will be LSB of the first byte of output.
type bitWriter struct {
@ -73,80 +71,6 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
b.nBits += bits
}
// flush will flush all pending full bytes.
// There will be at least 56 bits available for writing when this has been called.
// Using flush32 is faster, but leaves less space for writing.
func (b *bitWriter) flush() {
v := b.nBits >> 3
switch v {
case 0:
case 1:
b.out = append(b.out,
byte(b.bitContainer),
)
case 2:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
)
case 3:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
)
case 4:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
)
case 5:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
)
case 6:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
)
case 7:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
byte(b.bitContainer>>48),
)
case 8:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
byte(b.bitContainer>>48),
byte(b.bitContainer>>56),
)
default:
panic(fmt.Errorf("bits (%d) > 64", b.nBits))
}
b.bitContainer >>= v << 3
b.nBits &= 7
}
// flush32 will flush out, so there are at least 32 bits available for writing.
func (b *bitWriter) flush32() {
if b.nBits < 32 {

View file

@ -49,9 +49,6 @@ const (
// Maximum possible block size (all Raw+Uncompressed).
maxBlockSize = (1 << 21) - 1
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
maxCompressedLiteralSize = 1 << 18
maxRLELiteralSize = 1 << 20
maxMatchLen = 131074
maxSequences = 0x7f00 + 0xffff
@ -105,7 +102,6 @@ type blockDec struct {
// Block is RLE, this is the size.
RLESize uint32
tmp [4]byte
Type blockType
@ -368,14 +364,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
}
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, litRegenSize)
b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc)
} else {
if litRegenSize > maxCompressedLiteralSize {
// Exceptional
b.literalBuf = make([]byte, litRegenSize)
} else {
b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
}
b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
literals = b.literalBuf[:litRegenSize]
@ -405,14 +396,14 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, 0, litRegenSize)
b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
} else {
b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
var err error
// Use our out buffer.
huff.MaxDecodedSize = maxCompressedBlockSize
huff.MaxDecodedSize = litRegenSize
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
@ -437,9 +428,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, 0, litRegenSize)
b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
} else {
b.literalBuf = make([]byte, 0, maxCompressedBlockSize)
b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
huff := hist.huffTree
@ -456,7 +447,7 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
return in, err
}
hist.huffTree = huff
huff.MaxDecodedSize = maxCompressedBlockSize
huff.MaxDecodedSize = litRegenSize
// Use our out buffer.
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
@ -471,6 +462,8 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
if len(literals) != litRegenSize {
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
// Re-cap to get extra size.
literals = b.literalBuf[:len(literals)]
if debugDecoder {
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
}

View file

@ -52,10 +52,6 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
return r, nil
}
func (b *byteBuf) remain() []byte {
return *b
}
func (b *byteBuf) readByte() (byte, error) {
bb := *b
if len(bb) < 1 {

View file

@ -13,12 +13,6 @@ type byteReader struct {
off int
}
// init will initialize the reader and set the input.
func (b *byteReader) init(in []byte) {
b.b = in
b.off = 0
}
// advance the stream b n bytes.
func (b *byteReader) advance(n uint) {
b.off += int(n)

View file

@ -637,60 +637,18 @@ func (d *Decoder) startSyncDecoder(r io.Reader) error {
// Create Decoder:
// ASYNC:
// Spawn 4 go routines.
// 0: Read frames and decode blocks.
// 1: Decode block and literals. Receives hufftree and seqdecs, returns seqdecs and huff tree.
// 2: Wait for recentOffsets if needed. Decode sequences, send recentOffsets.
// 3: Wait for stream history, execute sequences, send stream history.
// Spawn 3 go routines.
// 0: Read frames and decode block literals.
// 1: Decode sequences.
// 2: Execute sequences, send to output.
func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) {
defer d.streamWg.Done()
br := readerWrapper{r: r}
var seqPrepare = make(chan *blockDec, d.o.concurrent)
var seqDecode = make(chan *blockDec, d.o.concurrent)
var seqExecute = make(chan *blockDec, d.o.concurrent)
// Async 1: Prepare blocks...
go func() {
var hist history
var hasErr bool
for block := range seqPrepare {
if hasErr {
if block != nil {
seqDecode <- block
}
continue
}
if block.async.newHist != nil {
if debugDecoder {
println("Async 1: new history")
}
hist.reset()
if block.async.newHist.dict != nil {
hist.setDict(block.async.newHist.dict)
}
}
if block.err != nil || block.Type != blockTypeCompressed {
hasErr = block.err != nil
seqDecode <- block
continue
}
remain, err := block.decodeLiterals(block.data, &hist)
block.err = err
hasErr = block.err != nil
if err == nil {
block.async.literals = hist.decoders.literals
block.async.seqData = remain
} else if debugDecoder {
println("decodeLiterals error:", err)
}
seqDecode <- block
}
close(seqDecode)
}()
// Async 2: Decode sequences...
// Async 1: Decode sequences...
go func() {
var hist history
var hasErr bool
@ -704,7 +662,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
}
if block.async.newHist != nil {
if debugDecoder {
println("Async 2: new history, recent:", block.async.newHist.recentOffsets)
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
}
hist.decoders = block.async.newHist.decoders
hist.recentOffsets = block.async.newHist.recentOffsets
@ -758,7 +716,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
}
if block.async.newHist != nil {
if debugDecoder {
println("Async 3: new history")
println("Async 2: new history")
}
hist.windowSize = block.async.newHist.windowSize
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
@ -845,6 +803,33 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
decodeStream:
for {
var hist history
var hasErr bool
decodeBlock := func(block *blockDec) {
if hasErr {
if block != nil {
seqDecode <- block
}
return
}
if block.err != nil || block.Type != blockTypeCompressed {
hasErr = block.err != nil
seqDecode <- block
return
}
remain, err := block.decodeLiterals(block.data, &hist)
block.err = err
hasErr = block.err != nil
if err == nil {
block.async.literals = hist.decoders.literals
block.async.seqData = remain
} else if debugDecoder {
println("decodeLiterals error:", err)
}
seqDecode <- block
}
frame := d.frame
if debugDecoder {
println("New frame...")
@ -871,7 +856,7 @@ decodeStream:
case <-ctx.Done():
case dec := <-d.decoders:
dec.sendErr(err)
seqPrepare <- dec
decodeBlock(dec)
}
break decodeStream
}
@ -891,6 +876,10 @@ decodeStream:
if debugDecoder {
println("Alloc History:", h.allocFrameBuffer)
}
hist.reset()
if h.dict != nil {
hist.setDict(h.dict)
}
dec.async.newHist = &h
dec.async.fcs = frame.FrameContentSize
historySent = true
@ -917,7 +906,7 @@ decodeStream:
}
err = dec.err
last := dec.Last
seqPrepare <- dec
decodeBlock(dec)
if err != nil {
break decodeStream
}
@ -926,7 +915,7 @@ decodeStream:
}
}
}
close(seqPrepare)
close(seqDecode)
wg.Wait()
d.frame.history.b = frameHistCache
}

View file

@ -253,10 +253,11 @@ func (d *frameDec) reset(br byteBuffer) error {
return ErrWindowSizeTooSmall
}
d.history.windowSize = int(d.WindowSize)
if d.o.lowMem && d.history.windowSize < maxBlockSize {
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
// Alloc 2x window size if not low-mem, or very small window size.
d.history.allocFrameBuffer = d.history.windowSize * 2
// TODO: Maybe use FrameContent size
} else {
// Alloc with one additional block
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
}

View file

@ -229,18 +229,10 @@ func (d decSymbol) newState() uint16 {
return uint16(d >> 16)
}
func (d decSymbol) baseline() uint32 {
return uint32(d >> 32)
}
func (d decSymbol) baselineInt() int {
return int(d >> 32)
}
func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d *decSymbol) setNBits(nBits uint8) {
const mask = 0xffffffffffffff00
*d = (*d & mask) | decSymbol(nBits)
@ -256,11 +248,6 @@ func (d *decSymbol) setNewState(state uint16) {
*d = (*d & mask) | decSymbol(state)<<16
}
func (d *decSymbol) setBaseline(baseline uint32) {
const mask = 0xffffffff
*d = (*d & mask) | decSymbol(baseline)<<32
}
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
const mask = 0xffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
@ -377,34 +364,7 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
s.state = dt[br.getBits(tableLog)]
}
// next returns the current symbol and sets the next state.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) next(br *bitReader) {
lowBits := uint16(br.getBits(s.state.nbBits()))
s.state = s.dt[s.state.newState()+lowBits]
}
// finished returns true if all bits have been read from the bitstream
// and the next state would require reading bits from the input.
func (s *fseState) finished(br *bitReader) bool {
return br.finished() && s.state.nbBits() > 0
}
// final returns the current state symbol without decoding the next.
func (s *fseState) final() (int, uint8) {
return s.state.baselineInt(), s.state.addBits()
}
// final returns the current state symbol without decoding the next.
func (s decSymbol) final() (int, uint8) {
return s.baselineInt(), s.addBits()
}
// nextFast returns the next symbol and sets the next state.
// This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := br.get16BitsFast(s.state.nbBits())
s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline(), s.state.addBits()
}

View file

@ -76,21 +76,6 @@ func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
s.clearCount = maxCount != 0
}
// prepare will prepare and allocate scratch tables used for both compression and decompression.
func (s *fseEncoder) prepare() (*fseEncoder, error) {
if s == nil {
s = &fseEncoder{}
}
s.useRLE = false
if s.clearCount && s.maxCount == 0 {
for i := range s.count {
s.count[i] = 0
}
s.clearCount = false
}
return s, nil
}
// allocCtable will allocate tables needed for compression.
// If existing tables a re big enough, they are simply re-used.
func (s *fseEncoder) allocCtable() {
@ -709,14 +694,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
c.state = c.stateTable[lu]
}
// encode the output symbol provided and write it to the bitstream.
func (c *cState) encode(symbolTT symbolTransform) {
nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
c.bw.addBits16NC(c.state, uint8(nbBitsOut))
c.state = c.stateTable[dstState]
}
// flush will write the tablelog to the output and flush the remaining full bytes.
func (c *cState) flush(tableLog uint8) {
c.bw.flush32()

View file

@ -33,9 +33,3 @@ func hashLen(u uint64, length, mls uint8) uint32 {
return (uint32(u) * prime4bytes) >> (32 - length)
}
}
// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash3(u uint32, h uint8) uint32 {
return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
}

View file

@ -188,6 +188,7 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
}
}
}
// Add final literals
copy(out[t:], s.literals)
if debugDecoder {
@ -203,12 +204,11 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
// decode sequences from the stream with the provided history.
func (s *sequenceDecs) decodeSync(hist []byte) error {
if true {
supported, err := s.decodeSyncSimple(hist)
if supported {
return err
}
}
br := s.br
seqs := s.nSeqs
startSize := len(s.out)
@ -396,6 +396,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
@ -418,16 +419,6 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
return br.close()
}
// update states, at least 27 bits must be available.
func (s *sequenceDecs) update(br *bitReader) {
// Max 8 bits
s.litLengths.state.next(br)
// Max 9 bits
s.matchLengths.state.next(br)
// Max 8 bits
s.offsets.state.next(br)
}
var bitMask [16]uint16
func init() {
@ -436,87 +427,6 @@ func init() {
}
}
// update states, at least 27 bits must be available.
func (s *sequenceDecs) updateAlt(br *bitReader) {
// Update all 3 states at once. Approx 20% faster.
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
nBits := a.nbBits() + b.nbBits() + c.nbBits()
if nBits == 0 {
s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
s.offsets.state.state = s.offsets.state.dt[c.newState()]
return
}
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
lowBits = uint16(bits >> (c.nbBits() & 31))
lowBits &= bitMask[b.nbBits()&15]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
lowBits = uint16(bits) & bitMask[c.nbBits()&15]
s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
}
// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := llState.final()
ml, mlB := mlState.final()
mo, moB := ofState.final()
// extra bits are stored in reverse order.
br.fillFast()
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fillFast()
}
ml += br.getBits(mlB)
ll += br.getBits(llB)
if moB > 1 {
s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo
return
}
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
mo++
}
if mo == 0 {
mo = s.prevOffset[0]
return
}
var temp int
if mo == 3 {
temp = s.prevOffset[0] - 1
} else {
temp = s.prevOffset[mo]
}
if temp == 0 {
// 0 is not valid; input is corrupted; force offset to 1
println("temp was 0")
temp = 1
}
if mo != 1 {
s.prevOffset[2] = s.prevOffset[1]
}
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = temp
mo = temp
return
}
func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := llState.final()

View file

@ -62,6 +62,10 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
useSafe = true
}
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
useSafe = true
}
br := s.br
maxBlockSize := maxCompressedBlockSize
@ -301,6 +305,10 @@ type executeAsmContext struct {
//go:noescape
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Same as above, but with safe memcopies
//go:noescape
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
// executeSimple handles cases when dictionary is not used.
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
// Ensure we have enough output size...
@ -327,8 +335,12 @@ func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
literals: s.literals,
windowSize: s.windowSize,
}
ok := sequenceDecs_executeSimple_amd64(&ctx)
var ok bool
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
} else {
ok = sequenceDecs_executeSimple_amd64(&ctx)
}
if !ok {
return fmt.Errorf("match offset (%d) bigger than current history (%d)",
seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))

View file

@ -711,54 +711,49 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ ctx+16(FP), CX
CMPQ 96(CX), $0x00
JZ sequenceDecs_decode_bmi2_skip_update
// Update Literal Length State
MOVBQZX SI, R14
MOVQ $0x00001010, CX
BEXTRQ CX, SI, SI
LEAQ (SI)(DI*1), R14
ADDQ R8, R14
MOVBQZX R14, R14
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
ADDQ R15, SI
// Load ctx.llTable
// Update Offset State
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, R8, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
MOVQ (CX), CX
MOVQ (CX)(SI*8), SI
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8
// Update Match Length State
MOVBQZX DI, R14
MOVQ $0x00001010, CX
BEXTRQ CX, DI, DI
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
ADDQ R15, DI
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, DI, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
// Update Offset State
MOVBQZX R8, R14
MOVQ $0x00001010, CX
BEXTRQ CX, R8, R8
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
ADDQ R15, R8
// Update Literal Length State
BZHIQ SI, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI
// Load ctx.ofTable
// Load ctx.llTable
MOVQ ctx+16(FP), CX
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8
MOVQ (CX), CX
MOVQ (CX)(SI*8), SI
sequenceDecs_decode_bmi2_skip_update:
// Adjust offset
@ -971,54 +966,49 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ ctx+16(FP), CX
CMPQ 96(CX), $0x00
JZ sequenceDecs_decode_56_bmi2_skip_update
// Update Literal Length State
MOVBQZX SI, R14
MOVQ $0x00001010, CX
BEXTRQ CX, SI, SI
LEAQ (SI)(DI*1), R14
ADDQ R8, R14
MOVBQZX R14, R14
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
ADDQ R15, SI
// Load ctx.llTable
// Update Offset State
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, R8, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
MOVQ (CX), CX
MOVQ (CX)(SI*8), SI
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8
// Update Match Length State
MOVBQZX DI, R14
MOVQ $0x00001010, CX
BEXTRQ CX, DI, DI
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
ADDQ R15, DI
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, DI, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
// Update Offset State
MOVBQZX R8, R14
MOVQ $0x00001010, CX
BEXTRQ CX, R8, R8
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
ADDQ R15, R8
// Update Literal Length State
BZHIQ SI, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI
// Load ctx.ofTable
// Load ctx.llTable
MOVQ ctx+16(FP), CX
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8
MOVQ (CX), CX
MOVQ (CX)(SI*8), SI
sequenceDecs_decode_56_bmi2_skip_update:
// Adjust offset
@ -1162,6 +1152,228 @@ TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
// outBase += outPosition
ADDQ DI, BX
main_loop:
MOVQ (AX), R11
MOVQ 16(AX), R12
MOVQ 8(AX), R13
// Copy literals
TESTQ R11, R11
JZ check_offset
XORQ R14, R14
copy_1:
MOVUPS (SI)(R14*1), X0
MOVUPS X0, (BX)(R14*1)
ADDQ $0x10, R14
CMPQ R14, R11
JB copy_1
ADDQ R11, SI
ADDQ R11, BX
ADDQ R11, DI
// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
check_offset:
LEAQ (DI)(R10*1), R11
CMPQ R12, R11
JG error_match_off_too_big
CMPQ R12, R8
JG error_match_off_too_big
// Copy match from history
MOVQ R12, R11
SUBQ DI, R11
JLS copy_match
MOVQ R9, R14
SUBQ R11, R14
CMPQ R13, R11
JGE copy_all_from_history
XORQ R11, R11
TESTQ $0x00000001, R13
JZ copy_4_word
MOVB (R14)(R11*1), R12
MOVB R12, (BX)(R11*1)
ADDQ $0x01, R11
copy_4_word:
TESTQ $0x00000002, R13
JZ copy_4_dword
MOVW (R14)(R11*1), R12
MOVW R12, (BX)(R11*1)
ADDQ $0x02, R11
copy_4_dword:
TESTQ $0x00000004, R13
JZ copy_4_qword
MOVL (R14)(R11*1), R12
MOVL R12, (BX)(R11*1)
ADDQ $0x04, R11
copy_4_qword:
TESTQ $0x00000008, R13
JZ copy_4_test
MOVQ (R14)(R11*1), R12
MOVQ R12, (BX)(R11*1)
ADDQ $0x08, R11
JMP copy_4_test
copy_4:
MOVUPS (R14)(R11*1), X0
MOVUPS X0, (BX)(R11*1)
ADDQ $0x10, R11
copy_4_test:
CMPQ R11, R13
JB copy_4
ADDQ R13, DI
ADDQ R13, BX
ADDQ $0x18, AX
INCQ DX
CMPQ DX, CX
JB main_loop
JMP loop_finished
copy_all_from_history:
XORQ R15, R15
TESTQ $0x00000001, R11
JZ copy_5_word
MOVB (R14)(R15*1), BP
MOVB BP, (BX)(R15*1)
ADDQ $0x01, R15
copy_5_word:
TESTQ $0x00000002, R11
JZ copy_5_dword
MOVW (R14)(R15*1), BP
MOVW BP, (BX)(R15*1)
ADDQ $0x02, R15
copy_5_dword:
TESTQ $0x00000004, R11
JZ copy_5_qword
MOVL (R14)(R15*1), BP
MOVL BP, (BX)(R15*1)
ADDQ $0x04, R15
copy_5_qword:
TESTQ $0x00000008, R11
JZ copy_5_test
MOVQ (R14)(R15*1), BP
MOVQ BP, (BX)(R15*1)
ADDQ $0x08, R15
JMP copy_5_test
copy_5:
MOVUPS (R14)(R15*1), X0
MOVUPS X0, (BX)(R15*1)
ADDQ $0x10, R15
copy_5_test:
CMPQ R15, R11
JB copy_5
ADDQ R11, BX
ADDQ R11, DI
SUBQ R11, R13
// Copy match from the current buffer
copy_match:
TESTQ R13, R13
JZ handle_loop
MOVQ BX, R11
SUBQ R12, R11
// ml <= mo
CMPQ R13, R12
JA copy_overlapping_match
// Copy non-overlapping match
ADDQ R13, DI
MOVQ BX, R12
ADDQ R13, BX
copy_2:
MOVUPS (R11), X0
MOVUPS X0, (R12)
ADDQ $0x10, R11
ADDQ $0x10, R12
SUBQ $0x10, R13
JHI copy_2
JMP handle_loop
// Copy overlapping match
copy_overlapping_match:
ADDQ R13, DI
copy_slow_3:
MOVB (R11), R12
MOVB R12, (BX)
INCQ R11
INCQ BX
DECQ R13
JNZ copy_slow_3
handle_loop:
ADDQ $0x18, AX
INCQ DX
CMPQ DX, CX
JB main_loop
loop_finished:
// Return value
MOVB $0x01, ret+8(FP)
// Update the context
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
MOVQ SI, 112(AX)
RET
error_match_off_too_big:
// Return value
MOVB $0x00, ret+8(FP)
// Update the context
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
MOVQ SI, 112(AX)
RET
empty_seqs:
// Return value
MOVB $0x01, ret+8(FP)
RET
// func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
// Requires: SSE
TEXT ·sequenceDecs_executeSimple_safe_amd64(SB), $8-9
MOVQ ctx+0(FP), R10
MOVQ 8(R10), CX
TESTQ CX, CX
JZ empty_seqs
MOVQ (R10), AX
MOVQ 24(R10), DX
MOVQ 32(R10), BX
MOVQ 80(R10), SI
MOVQ 104(R10), DI
MOVQ 120(R10), R8
MOVQ 56(R10), R9
MOVQ 64(R10), R10
ADDQ R10, R9
// seqsBase += 24 * seqIndex
LEAQ (DX)(DX*2), R11
SHLQ $0x03, R11
ADDQ R11, AX
// outBase += outPosition
ADDQ DI, BX
main_loop:
MOVQ (AX), R11
MOVQ 16(AX), R12
@ -1327,16 +1539,44 @@ copy_match:
// Copy non-overlapping match
ADDQ R13, DI
MOVQ BX, R12
ADDQ R13, BX
XORQ R12, R12
TESTQ $0x00000001, R13
JZ copy_2_word
MOVB (R11)(R12*1), R14
MOVB R14, (BX)(R12*1)
ADDQ $0x01, R12
copy_2_word:
TESTQ $0x00000002, R13
JZ copy_2_dword
MOVW (R11)(R12*1), R14
MOVW R14, (BX)(R12*1)
ADDQ $0x02, R12
copy_2_dword:
TESTQ $0x00000004, R13
JZ copy_2_qword
MOVL (R11)(R12*1), R14
MOVL R14, (BX)(R12*1)
ADDQ $0x04, R12
copy_2_qword:
TESTQ $0x00000008, R13
JZ copy_2_test
MOVQ (R11)(R12*1), R14
MOVQ R14, (BX)(R12*1)
ADDQ $0x08, R12
JMP copy_2_test
copy_2:
MOVUPS (R11), X0
MOVUPS X0, (R12)
ADDQ $0x10, R11
MOVUPS (R11)(R12*1), X0
MOVUPS X0, (BX)(R12*1)
ADDQ $0x10, R12
SUBQ $0x10, R13
JHI copy_2
copy_2_test:
CMPQ R12, R13
JB copy_2
ADDQ R13, BX
JMP handle_loop
// Copy overlapping match
@ -1673,40 +1913,11 @@ sequenceDecs_decodeSync_amd64_match_len_ofs_ok:
TESTQ AX, AX
JZ check_offset
XORQ R14, R14
TESTQ $0x00000001, AX
JZ copy_1_word
MOVB (R11)(R14*1), R15
MOVB R15, (R10)(R14*1)
ADDQ $0x01, R14
copy_1_word:
TESTQ $0x00000002, AX
JZ copy_1_dword
MOVW (R11)(R14*1), R15
MOVW R15, (R10)(R14*1)
ADDQ $0x02, R14
copy_1_dword:
TESTQ $0x00000004, AX
JZ copy_1_qword
MOVL (R11)(R14*1), R15
MOVL R15, (R10)(R14*1)
ADDQ $0x04, R14
copy_1_qword:
TESTQ $0x00000008, AX
JZ copy_1_test
MOVQ (R11)(R14*1), R15
MOVQ R15, (R10)(R14*1)
ADDQ $0x08, R14
JMP copy_1_test
copy_1:
MOVUPS (R11)(R14*1), X0
MOVUPS X0, (R10)(R14*1)
ADDQ $0x10, R14
copy_1_test:
CMPQ R14, AX
JB copy_1
ADDQ AX, R11
@ -2050,54 +2261,49 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ ctx+16(FP), CX
CMPQ 96(CX), $0x00
JZ sequenceDecs_decodeSync_bmi2_skip_update
// Update Literal Length State
MOVBQZX SI, R13
MOVQ $0x00001010, CX
BEXTRQ CX, SI, SI
LEAQ (SI)(DI*1), R13
ADDQ R8, R13
MOVBQZX R13, R13
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
ADDQ R14, SI
// Load ctx.llTable
// Update Offset State
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, R8, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
MOVQ (CX), CX
MOVQ (CX)(SI*8), SI
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8
// Update Match Length State
MOVBQZX DI, R13
MOVQ $0x00001010, CX
BEXTRQ CX, DI, DI
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
ADDQ R14, DI
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, DI, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
// Update Offset State
MOVBQZX R8, R13
MOVQ $0x00001010, CX
BEXTRQ CX, R8, R8
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
ADDQ R14, R8
// Update Literal Length State
BZHIQ SI, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI
// Load ctx.ofTable
// Load ctx.llTable
MOVQ ctx+16(FP), CX
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8
MOVQ (CX), CX
MOVQ (CX)(SI*8), SI
sequenceDecs_decodeSync_bmi2_skip_update:
// Adjust offset
@ -2180,40 +2386,11 @@ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok:
TESTQ CX, CX
JZ check_offset
XORQ R14, R14
TESTQ $0x00000001, CX
JZ copy_1_word
MOVB (R10)(R14*1), R15
MOVB R15, (R9)(R14*1)
ADDQ $0x01, R14
copy_1_word:
TESTQ $0x00000002, CX
JZ copy_1_dword
MOVW (R10)(R14*1), R15
MOVW R15, (R9)(R14*1)
ADDQ $0x02, R14
copy_1_dword:
TESTQ $0x00000004, CX
JZ copy_1_qword
MOVL (R10)(R14*1), R15
MOVL R15, (R9)(R14*1)
ADDQ $0x04, R14
copy_1_qword:
TESTQ $0x00000008, CX
JZ copy_1_test
MOVQ (R10)(R14*1), R15
MOVQ R15, (R9)(R14*1)
ADDQ $0x08, R14
JMP copy_1_test
copy_1:
MOVUPS (R10)(R14*1), X0
MOVUPS X0, (R9)(R14*1)
ADDQ $0x10, R14
copy_1_test:
CMPQ R14, CX
JB copy_1
ADDQ CX, R10
@ -3114,54 +3291,49 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ ctx+16(FP), CX
CMPQ 96(CX), $0x00
JZ sequenceDecs_decodeSync_safe_bmi2_skip_update
// Update Literal Length State
MOVBQZX SI, R13
MOVQ $0x00001010, CX
BEXTRQ CX, SI, SI
LEAQ (SI)(DI*1), R13
ADDQ R8, R13
MOVBQZX R13, R13
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
ADDQ R14, SI
// Load ctx.llTable
// Update Offset State
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, R8, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
MOVQ (CX), CX
MOVQ (CX)(SI*8), SI
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8
// Update Match Length State
MOVBQZX DI, R13
MOVQ $0x00001010, CX
BEXTRQ CX, DI, DI
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
ADDQ R14, DI
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, DI, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
// Update Offset State
MOVBQZX R8, R13
MOVQ $0x00001010, CX
BEXTRQ CX, R8, R8
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
ADDQ R14, R8
// Update Literal Length State
BZHIQ SI, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI
// Load ctx.ofTable
// Load ctx.llTable
MOVQ ctx+16(FP), CX
MOVQ 48(CX), CX
MOVQ (CX)(R8*8), R8
MOVQ (CX), CX
MOVQ (CX)(SI*8), SI
sequenceDecs_decodeSync_safe_bmi2_skip_update:
// Adjust offset

View file

@ -110,17 +110,6 @@ func printf(format string, a ...interface{}) {
}
}
// matchLenFast does matching, but will not match the last up to 7 bytes.
func matchLenFast(a, b []byte) int {
endI := len(a) & (math.MaxInt32 - 7)
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
return i + bits.TrailingZeros64(diff)>>3
}
}
return endI
}
// matchLen returns the maximum length.
// a must be the shortest of the two.
// The function also returns whether all bytes matched.

8
vendor/modules.txt vendored
View file

@ -34,7 +34,7 @@ github.com/VictoriaMetrics/metricsql/binaryop
# github.com/VividCortex/ewma v1.2.0
## explicit; go 1.12
github.com/VividCortex/ewma
# github.com/aws/aws-sdk-go v1.44.22
# github.com/aws/aws-sdk-go v1.44.24
## explicit; go 1.11
github.com/aws/aws-sdk-go/aws
github.com/aws/aws-sdk-go/aws/arn
@ -155,7 +155,7 @@ github.com/influxdata/influxdb/pkg/escape
# github.com/jmespath/go-jmespath v0.4.0
## explicit; go 1.14
github.com/jmespath/go-jmespath
# github.com/klauspost/compress v1.15.4
# github.com/klauspost/compress v1.15.5
## explicit; go 1.16
github.com/klauspost/compress
github.com/klauspost/compress/flate
@ -277,7 +277,7 @@ go.opencensus.io/trace/tracestate
go.uber.org/atomic
# go.uber.org/goleak v1.1.11-0.20210813005559-691160354723
## explicit; go 1.13
# golang.org/x/net v0.0.0-20220524220425-1d687d428aca
# golang.org/x/net v0.0.0-20220526153639-5463443f8c37
## explicit; go 1.17
golang.org/x/net/context
golang.org/x/net/context/ctxhttp
@ -350,7 +350,7 @@ google.golang.org/appengine/internal/socket
google.golang.org/appengine/internal/urlfetch
google.golang.org/appengine/socket
google.golang.org/appengine/urlfetch
# google.golang.org/genproto v0.0.0-20220525015930-6ca3db687a9d
# google.golang.org/genproto v0.0.0-20220527130721-00d5c0f3be58
## explicit; go 1.15
google.golang.org/genproto/googleapis/api/annotations
google.golang.org/genproto/googleapis/iam/v1