diff --git a/go.mod b/go.mod index 69aad20d3..ff8e243b8 100644 --- a/go.mod +++ b/go.mod @@ -11,14 +11,14 @@ require ( github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/metrics v1.18.1 github.com/VictoriaMetrics/metricsql v0.43.0 - github.com/aws/aws-sdk-go v1.44.0 + github.com/aws/aws-sdk-go v1.44.4 github.com/cespare/xxhash/v2 v2.1.2 github.com/cheggaaa/pb/v3 v3.0.9-0.20211222075416-90c02fa07ea4 github.com/golang/snappy v0.0.4 github.com/influxdata/influxdb v1.9.6 - github.com/klauspost/compress v1.15.1 + github.com/klauspost/compress v1.15.2 github.com/prometheus/prometheus v1.8.2-0.20201119142752-3ad25a6dc3d9 - github.com/urfave/cli/v2 v2.5.0 + github.com/urfave/cli/v2 v2.5.1 github.com/valyala/fastjson v1.6.3 github.com/valyala/fastrand v1.1.0 github.com/valyala/fasttemplate v1.2.1 @@ -26,8 +26,8 @@ require ( github.com/valyala/quicktemplate v1.7.0 golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4 golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 - golang.org/x/sys v0.0.0-20220422013727-9388b58f7150 - google.golang.org/api v0.75.0 + golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba + google.golang.org/api v0.77.0 gopkg.in/yaml.v2 v2.4.0 ) @@ -44,7 +44,7 @@ require ( github.com/go-logfmt/logfmt v0.5.1 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.2 // indirect - github.com/google/go-cmp v0.5.7 // indirect + github.com/google/go-cmp v0.5.8 // indirect github.com/googleapis/gax-go/v2 v2.3.0 // indirect github.com/googleapis/go-type-adapters v1.0.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect @@ -69,7 +69,7 @@ require ( golang.org/x/text v0.3.7 // indirect golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20220422154200-b37d22cd5731 // indirect + google.golang.org/genproto v0.0.0-20220429170224-98d788798c3e // indirect google.golang.org/grpc v1.46.0 // indirect google.golang.org/protobuf v1.28.0 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect diff --git a/go.sum b/go.sum index 5077e69e2..8e25fa38f 100644 --- a/go.sum +++ b/go.sum @@ -166,8 +166,8 @@ github.com/aws/aws-sdk-go v1.30.12/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZve github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= github.com/aws/aws-sdk-go v1.35.31/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.40.45/go.mod h1:585smgzpB/KqRA+K3y/NL/oYRqQvpNJYvLm+LY1U59Q= -github.com/aws/aws-sdk-go v1.44.0 h1:jwtHuNqfnJxL4DKHBUVUmQlfueQqBW7oXP6yebZR/R0= -github.com/aws/aws-sdk-go v1.44.0/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= +github.com/aws/aws-sdk-go v1.44.4 h1:ePN0CVJMdiz2vYUcJH96eyxRrtKGSDMgyhP6rah2OgE= +github.com/aws/aws-sdk-go v1.44.4/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= github.com/aws/aws-sdk-go-v2 v1.9.1/go.mod h1:cK/D0BBs0b/oWPIcX/Z/obahJK1TT7IPVjy53i/mX/4= github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.8.1/go.mod h1:CM+19rL1+4dFWnOQKwDc7H1KwXTz+h61oUSHyhV0b3o= @@ -494,8 +494,9 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -670,8 +671,8 @@ github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8 github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A= -github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.15.2 h1:3WH+AG7s2+T8o3nrM/8u2rdqUEcQhmga7smjrT41nAw= +github.com/klauspost/compress v1.15.2/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg= github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= @@ -977,8 +978,8 @@ github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6 github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1 h1:+mkCCcOFKPnCmVYVcURKps1Xe+3zP90gSYGNfRkjoIY= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/urfave/cli/v2 v2.5.0 h1:2sqblaW62ebcTIEvwb8eRvDfNHeBAeKxfhdynaanhug= -github.com/urfave/cli/v2 v2.5.0/go.mod h1:oDzoM7pVwz6wHn5ogWgFUU1s4VJayeQS+aEZDqXIEJs= +github.com/urfave/cli/v2 v2.5.1 h1:YKwdkyA0xTBzOaP2G0DVxBnCheHGP+Y9VbKAs4K1Ess= +github.com/urfave/cli/v2 v2.5.1/go.mod h1:oDzoM7pVwz6wHn5ogWgFUU1s4VJayeQS+aEZDqXIEJs= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.30.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus= @@ -1330,8 +1331,8 @@ golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220328115105-d36c6a25d886/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220405052023-b1e9470b6e64/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220422013727-9388b58f7150 h1:xHms4gcpe1YE7A3yIllJXP16CMAGuqwO2lX1mTyyRRc= -golang.org/x/sys v0.0.0-20220422013727-9388b58f7150/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba h1:AyHWHCBVlIYI5rgEM3o+1PLd0sLPcIAoaUckGQMaWtw= +golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -1486,8 +1487,9 @@ google.golang.org/api v0.67.0/go.mod h1:ShHKP8E60yPsKNw/w8w+VYaj9H6buA5UqDp8dhbQ google.golang.org/api v0.70.0/go.mod h1:Bs4ZM2HGifEvXwd50TtW70ovgJffJYw2oRCOFU/SkfA= google.golang.org/api v0.71.0/go.mod h1:4PyU6e6JogV1f9eA4voyrTY2batOLdgZ5qZ5HOCc4j8= google.golang.org/api v0.74.0/go.mod h1:ZpfMZOVRMywNyvJFeqL9HRWBgAuRfSjJFpe9QtRRyDs= -google.golang.org/api v0.75.0 h1:0AYh/ae6l9TDUvIQrDw5QRpM100P6oHgD+o3dYHMzJg= google.golang.org/api v0.75.0/go.mod h1:pU9QmyHLnzlpar1Mjt4IbapUCy8J+6HD6GeELN69ljA= +google.golang.org/api v0.77.0 h1:msijLTxwkJ7Jub5tv9KBVCKtHOQwnvnvkX7ErFFCVxY= +google.golang.org/api v0.77.0/go.mod h1:pU9QmyHLnzlpar1Mjt4IbapUCy8J+6HD6GeELN69ljA= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1580,8 +1582,8 @@ google.golang.org/genproto v0.0.0-20220407144326-9054f6ed7bac/go.mod h1:8w6bsBMX google.golang.org/genproto v0.0.0-20220413183235-5e96e2839df9/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= google.golang.org/genproto v0.0.0-20220414192740-2d67ff6cf2b4/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= google.golang.org/genproto v0.0.0-20220421151946-72621c1f0bd3/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= -google.golang.org/genproto v0.0.0-20220422154200-b37d22cd5731 h1:nquqdM9+ps0JZcIiI70+tqoaIFS5Ql4ZuK8UXnz3HfE= -google.golang.org/genproto v0.0.0-20220422154200-b37d22cd5731/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= +google.golang.org/genproto v0.0.0-20220429170224-98d788798c3e h1:gMjH4zLGs9m+dGzR7qHCHaXMOwsJHJKKkHtyXhtOrJk= +google.golang.org/genproto v0.0.0-20220429170224-98d788798c3e/go.mod h1:8w6bsBMX6yCPbAVTeqQHvzxW0EIFigd5lZyahWgyfDo= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= diff --git a/vendor/github.com/aws/aws-sdk-go/aws/version.go b/vendor/github.com/aws/aws-sdk-go/aws/version.go index e32dbb761..b4b2db9a9 100644 --- a/vendor/github.com/aws/aws-sdk-go/aws/version.go +++ b/vendor/github.com/aws/aws-sdk-go/aws/version.go @@ -5,4 +5,4 @@ package aws const SDKName = "aws-sdk-go" // SDKVersion is the version of this SDK -const SDKVersion = "1.44.0" +const SDKVersion = "1.44.4" diff --git a/vendor/github.com/google/go-cmp/cmp/compare.go b/vendor/github.com/google/go-cmp/cmp/compare.go index 2a5446762..fd2b3a42b 100644 --- a/vendor/github.com/google/go-cmp/cmp/compare.go +++ b/vendor/github.com/google/go-cmp/cmp/compare.go @@ -40,6 +40,8 @@ import ( "github.com/google/go-cmp/cmp/internal/value" ) +// TODO(≥go1.18): Use any instead of interface{}. + // Equal reports whether x and y are equal by recursively applying the // following rules in the given order to x and y and all of their sub-values: // diff --git a/vendor/github.com/google/go-cmp/cmp/report_compare.go b/vendor/github.com/google/go-cmp/cmp/report_compare.go index 104bb3053..1ef65ac1d 100644 --- a/vendor/github.com/google/go-cmp/cmp/report_compare.go +++ b/vendor/github.com/google/go-cmp/cmp/report_compare.go @@ -116,7 +116,10 @@ func (opts formatOptions) FormatDiff(v *valueNode, ptrs *pointerReferences) (out } // For leaf nodes, format the value based on the reflect.Values alone. - if v.MaxDepth == 0 { + // As a special case, treat equal []byte as a leaf nodes. + isBytes := v.Type.Kind() == reflect.Slice && v.Type.Elem() == reflect.TypeOf(byte(0)) + isEqualBytes := isBytes && v.NumDiff+v.NumIgnored+v.NumTransformed == 0 + if v.MaxDepth == 0 || isEqualBytes { switch opts.DiffMode { case diffUnknown, diffIdentical: // Format Equal. diff --git a/vendor/github.com/google/go-cmp/cmp/report_reflect.go b/vendor/github.com/google/go-cmp/cmp/report_reflect.go index 76c04fdbd..287b89358 100644 --- a/vendor/github.com/google/go-cmp/cmp/report_reflect.go +++ b/vendor/github.com/google/go-cmp/cmp/report_reflect.go @@ -211,7 +211,7 @@ func (opts formatOptions) FormatValue(v reflect.Value, parentKind reflect.Kind, if len(b) > 0 && utf8.Valid(b) && len(bytes.TrimFunc(b, isPrintSpace)) == 0 { out = opts.formatString("", string(b)) skipType = true - return opts.WithTypeMode(emitType).FormatType(t, out) + return opts.FormatType(t, out) } } @@ -282,7 +282,12 @@ func (opts formatOptions) FormatValue(v reflect.Value, parentKind reflect.Kind, } defer ptrs.Pop() - skipType = true // Let the underlying value print the type instead + // Skip the name only if this is an unnamed pointer type. + // Otherwise taking the address of a value does not reproduce + // the named pointer type. + if v.Type().Name() == "" { + skipType = true // Let the underlying value print the type instead + } out = opts.FormatValue(v.Elem(), t.Kind(), ptrs) out = wrapTrunkReference(ptrRef, opts.PrintAddresses, out) out = &textWrap{Prefix: "&", Value: out} @@ -293,7 +298,6 @@ func (opts formatOptions) FormatValue(v reflect.Value, parentKind reflect.Kind, } // Interfaces accept different concrete types, // so configure the underlying value to explicitly print the type. - skipType = true // Print the concrete type instead return opts.WithTypeMode(emitType).FormatValue(v.Elem(), t.Kind(), ptrs) default: panic(fmt.Sprintf("%v kind not handled", v.Kind())) diff --git a/vendor/github.com/klauspost/compress/.gitignore b/vendor/github.com/klauspost/compress/.gitignore index b35f8449b..d31b37815 100644 --- a/vendor/github.com/klauspost/compress/.gitignore +++ b/vendor/github.com/klauspost/compress/.gitignore @@ -23,3 +23,10 @@ _testmain.go *.test *.prof /s2/cmd/_s2sx/sfx-exe + +# Linux perf files +perf.data +perf.data.old + +# gdb history +.gdb_history diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 0e2dc116a..5b7cf781a 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -17,6 +17,13 @@ This package provides various compression algorithms. # changelog +* Mar 11, 2022 (v1.15.1) + * huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512) + * zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514) + * zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520) + * zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521) + * zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523) + * Mar 3, 2022 (v1.15.0) * zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498) * zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505) diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go index d47f6644f..ce8e93bcd 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go @@ -12,14 +12,14 @@ import ( // decompress4x_main_loop_x86 is an x86 assembler implementation // of Decompress4X when tablelog > 8. -// go:noescape +//go:noescape func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, peekBits uint8, buf *byte, tbl *dEntrySingle) uint8 // decompress4x_8b_loop_x86 is an x86 assembler implementation // of Decompress4X when tablelog <= 8 which decodes 4 entries // per loop. -// go:noescape +//go:noescape func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, peekBits uint8, buf *byte, tbl *dEntrySingle) uint8 diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go new file mode 100644 index 000000000..3954c5121 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go @@ -0,0 +1,34 @@ +// Package cpuinfo gives runtime info about the current CPU. +// +// This is a very limited module meant for use internally +// in this project. For more versatile solution check +// https://github.com/klauspost/cpuid. +package cpuinfo + +// HasBMI1 checks whether an x86 CPU supports the BMI1 extension. +func HasBMI1() bool { + return hasBMI1 +} + +// HasBMI2 checks whether an x86 CPU supports the BMI2 extension. +func HasBMI2() bool { + return hasBMI2 +} + +// DisableBMI2 will disable BMI2, for testing purposes. +// Call returned function to restore previous state. +func DisableBMI2() func() { + old := hasBMI2 + hasBMI2 = false + return func() { + hasBMI2 = old + } +} + +// HasBMI checks whether an x86 CPU supports both BMI1 and BMI2 extensions. +func HasBMI() bool { + return HasBMI1() && HasBMI2() +} + +var hasBMI1 bool +var hasBMI2 bool diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go new file mode 100644 index 000000000..e802579c4 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go @@ -0,0 +1,11 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package cpuinfo + +// go:noescape +func x86extensions() (bmi1, bmi2 bool) + +func init() { + hasBMI1, hasBMI2 = x86extensions() +} diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s new file mode 100644 index 000000000..4465fbe9e --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s @@ -0,0 +1,36 @@ +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" +#include "funcdata.h" +#include "go_asm.h" + +TEXT ·x86extensions(SB), NOSPLIT, $0 + // 1. determine max EAX value + XORQ AX, AX + CPUID + + CMPQ AX, $7 + JB unsupported + + // 2. EAX = 7, ECX = 0 --- see Table 3-8 "Information Returned by CPUID Instruction" + MOVQ $7, AX + MOVQ $0, CX + CPUID + + BTQ $3, BX // bit 3 = BMI1 + SETCS AL + + BTQ $8, BX // bit 8 = BMI2 + SETCS AH + + MOVB AL, bmi1+0(FP) + MOVB AH, bmi2+1(FP) + RET + +unsupported: + XORQ AX, AX + MOVB AL, bmi1+0(FP) + MOVB AL, bmi2+1(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index e3445ac19..beb7fa872 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -386,47 +386,31 @@ In practice this means that concurrency is often limited to utilizing about 3 co ### Benchmarks -These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd). - The first two are streaming decodes and the last are smaller inputs. - + +Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used. + ``` -BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op -BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op +BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op +BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op -BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op -BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op +Concurrent blocks, performance: -Concurrent performance: - -BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op - -BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op ``` -This reflects the performance around May 2020, but this may be out of date. +This reflects the performance around May 2022, but this may be out of date. ## Zstd inside ZIP files diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 7d567a54a..b2bca3301 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -5,9 +5,14 @@ package zstd import ( + "bytes" + "encoding/binary" "errors" "fmt" "io" + "io/ioutil" + "os" + "path/filepath" "sync" "github.com/klauspost/compress/huff0" @@ -38,6 +43,9 @@ const ( // maxCompressedBlockSize is the biggest allowed compressed block size (128KB) maxCompressedBlockSize = 128 << 10 + compressedBlockOverAlloc = 16 + maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc + // Maximum possible block size (all Raw+Uncompressed). maxBlockSize = (1 << 21) - 1 @@ -136,7 +144,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.Type = blockType((bh >> 1) & 3) // find size. cSize := int(bh >> 3) - maxSize := maxBlockSize + maxSize := maxCompressedBlockSizeAlloc switch b.Type { case blockTypeReserved: return ErrReservedBlockType @@ -157,9 +165,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { println("Data size on stream:", cSize) } b.RLESize = 0 - maxSize = maxCompressedBlockSize + maxSize = maxCompressedBlockSizeAlloc if windowSize < maxCompressedBlockSize && b.lowMem { - maxSize = int(windowSize) + maxSize = int(windowSize) + compressedBlockOverAlloc } if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { if debugDecoder { @@ -190,9 +198,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { // Read block data. if cap(b.dataStorage) < cSize { if b.lowMem || cSize > maxCompressedBlockSize { - b.dataStorage = make([]byte, 0, cSize) + b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) } else { - b.dataStorage = make([]byte, 0, maxCompressedBlockSize) + b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) } } if cap(b.dst) <= maxSize { @@ -486,10 +494,15 @@ func (b *blockDec) decodeCompressed(hist *history) error { b.dst = append(b.dst, hist.decoders.literals...) return nil } - err = hist.decoders.decodeSync(hist) + before := len(hist.decoders.out) + err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:]) if err != nil { return err } + if hist.decoders.maxSyncLen > 0 { + hist.decoders.maxSyncLen += uint64(before) + hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out)) + } b.dst = hist.decoders.out hist.recentOffsets = hist.decoders.prevOffset return nil @@ -632,6 +645,22 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { println("initializing sequences:", err) return err } + // Extract blocks... + if false && hist.dict == nil { + fatalErr := func(err error) { + if err != nil { + panic(err) + } + } + fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize) + var buf bytes.Buffer + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse)) + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse)) + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse)) + buf.Write(in) + ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm) + } + return nil } @@ -650,6 +679,7 @@ func (b *blockDec) decodeSequences(hist *history) error { } hist.decoders.windowSize = hist.windowSize hist.decoders.prevOffset = hist.recentOffsets + err := hist.decoders.decode(b.sequence) hist.recentOffsets = hist.decoders.prevOffset return err diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 9fcdaac1d..c65ea9795 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -347,18 +347,20 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } frame.history.setDict(&dict) } - - if frame.FrameContentSize != fcsUnknown && frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) { - return dst, ErrDecoderSizeExceeded + if frame.WindowSize > d.o.maxWindowSize { + return dst, ErrWindowSizeExceeded } - if frame.FrameContentSize < 1<<30 { - // Never preallocate more than 1 GB up front. + if frame.FrameContentSize != fcsUnknown { + if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) { + return dst, ErrDecoderSizeExceeded + } if cap(dst)-len(dst) < int(frame.FrameContentSize) { - dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)) + dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc) copy(dst2, dst) dst = dst2 } } + if cap(dst) == 0 { // Allocate len(input) * 2 by default if nothing is provided // and we didn't get frame content size. diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index fd05c9bb0..fc52ebc40 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -31,7 +31,7 @@ func (o *decoderOptions) setDefault() { if o.concurrent > 4 { o.concurrent = 4 } - o.maxDecodedSize = 1 << 63 + o.maxDecodedSize = 64 << 30 } // WithDecoderLowmem will set whether to use a lower amount of memory, @@ -66,7 +66,7 @@ func WithDecoderConcurrency(n int) DOption { // WithDecoderMaxMemory allows to set a maximum decoded size for in-memory // non-streaming operations or maximum window size for streaming operations. // This can be used to control memory usage of potentially hostile content. -// Maximum and default is 1 << 63 bytes. +// Maximum is 1 << 63 bytes. Default is 64GiB. func WithDecoderMaxMemory(n uint64) DOption { return func(o *decoderOptions) error { if n == 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 11089d223..509d5cece 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -326,6 +326,19 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { d.history.ignoreBuffer = len(dst) // Store input length, so we only check new data. crcStart := len(dst) + d.history.decoders.maxSyncLen = 0 + if d.FrameContentSize != fcsUnknown { + d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst)) + if d.history.decoders.maxSyncLen > d.o.maxDecodedSize { + return dst, ErrDecoderSizeExceeded + } + if uint64(cap(dst)) < d.history.decoders.maxSyncLen { + // Alloc for output + dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc) + copy(dst2, dst) + dst = dst2 + } + } var err error for { err = dec.reset(d.rawInput, d.WindowSize) diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index bb3d4fd6c..fde4e6b60 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -5,8 +5,10 @@ package zstd import ( + "encoding/binary" "errors" "fmt" + "io" ) const ( @@ -182,6 +184,29 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { return s.buildDtable() } +func (s *fseDecoder) mustReadFrom(r io.Reader) { + fatalErr := func(err error) { + if err != nil { + panic(err) + } + } + // dt [maxTablesize]decSymbol // Decompression table. + // symbolLen uint16 // Length of active part of the symbol table. + // actualTableLog uint8 // Selected tablelog. + // maxBits uint8 // Maximum number of additional bits + // // used for table creation to avoid allocations. + // stateTable [256]uint16 + // norm [maxSymbolValue + 1]int16 + // preDefined bool + fatalErr(binary.Read(r, binary.LittleEndian, &s.dt)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.norm)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined)) +} + // decSymbol contains information about a state entry, // Including the state offset base, the output symbol and // the number of bits to read for the low part of the destination state. diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index 819f1461b..e80139dd9 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -73,6 +73,7 @@ type sequenceDecs struct { seqSize int windowSize int maxBits uint8 + maxSyncLen uint64 } // initialize all 3 decoders from the stream input. @@ -98,153 +99,13 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro return nil } -// decode sequences from the stream with the provided history. -func (s *sequenceDecs) decode(seqs []seqVals) error { - br := s.br - - // Grab full sizes tables, to avoid bounds checks. - llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] - llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - s.seqSize = 0 - litRemain := len(s.literals) - maxBlockSize := maxCompressedBlockSize - if s.windowSize < maxBlockSize { - maxBlockSize = s.windowSize - } - for i := range seqs { - var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { - // inlined function: - // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) - - // Final will not read from stream. - var llB, mlB, moB uint8 - ll, llB = llState.final() - ml, mlB = mlState.final() - mo, moB = ofState.final() - - // extra bits are stored in reverse order. - br.fillFast() - mo += br.getBits(moB) - if s.maxBits > 32 { - br.fillFast() - } - ml += br.getBits(mlB) - ll += br.getBits(llB) - - if moB > 1 { - s.prevOffset[2] = s.prevOffset[1] - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = mo - } else { - // mo = s.adjustOffset(mo, ll, moB) - // Inlined for rather big speedup - if ll == 0 { - // There is an exception though, when current sequence's literals_length = 0. - // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, - // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. - mo++ - } - - if mo == 0 { - mo = s.prevOffset[0] - } else { - var temp int - if mo == 3 { - temp = s.prevOffset[0] - 1 - } else { - temp = s.prevOffset[mo] - } - - if temp == 0 { - // 0 is not valid; input is corrupted; force offset to 1 - println("WARNING: temp was 0") - temp = 1 - } - - if mo != 1 { - s.prevOffset[2] = s.prevOffset[1] - } - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = temp - mo = temp - } - } - br.fillFast() - } else { - if br.overread() { - if debugDecoder { - printf("reading sequence %d, exceeded available data\n", i) - } - return io.ErrUnexpectedEOF - } - ll, mo, ml = s.next(br, llState, mlState, ofState) - br.fill() - } - - if debugSequences { - println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) - } - // Evaluate. - // We might be doing this async, so do it early. - if mo == 0 && ml > 0 { - return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) - } - if ml > maxMatchLen { - return fmt.Errorf("match len (%d) bigger than max allowed length", ml) - } - s.seqSize += ll + ml - if s.seqSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) - } - litRemain -= ll - if litRemain < 0 { - return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll) - } - seqs[i] = seqVals{ - ll: ll, - ml: ml, - mo: mo, - } - if i == len(seqs)-1 { - // This is the last sequence, so we shouldn't update state. - break - } - - // Manually inlined, ~ 5-20% faster - // Update all 3 states at once. Approx 20% faster. - nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() - if nBits == 0 { - llState = llTable[llState.newState()&maxTableMask] - mlState = mlTable[mlState.newState()&maxTableMask] - ofState = ofTable[ofState.newState()&maxTableMask] - } else { - bits := br.get32BitsFast(nBits) - lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) - llState = llTable[(llState.newState()+lowBits)&maxTableMask] - - lowBits = uint16(bits >> (ofState.nbBits() & 31)) - lowBits &= bitMask[mlState.nbBits()&15] - mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] - - lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] - ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] - } - } - s.seqSize += litRemain - if s.seqSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) - } - err := br.close() - if err != nil { - printf("Closing sequences: %v, %+v\n", err, *br) - } - return err -} - // execute will execute the decoded sequence with the provided history. // The sequence must be evaluated before being sent. func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { + if len(s.dict) == 0 { + return s.executeSimple(seqs, hist) + } + // Ensure we have enough output size... if len(s.out)+s.seqSize > cap(s.out) { addBytes := s.seqSize + len(s.out) @@ -341,14 +202,19 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { } // decode sequences from the stream with the provided history. -func (s *sequenceDecs) decodeSync(history *history) error { +func (s *sequenceDecs) decodeSync(hist []byte) error { + if true { + supported, err := s.decodeSyncSimple(hist) + if supported { + return err + } + } br := s.br seqs := s.nSeqs startSize := len(s.out) // Grab full sizes tables, to avoid bounds checks. llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - hist := history.b[history.ignoreBuffer:] out := s.out maxBlockSize := maxCompressedBlockSize if s.windowSize < maxBlockSize { @@ -433,7 +299,7 @@ func (s *sequenceDecs) decodeSync(history *history) error { } size := ll + ml + len(out) if size-startSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize) + return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize) } if size > cap(out) { // Not enough size, which can happen under high volume block streaming conditions @@ -463,13 +329,13 @@ func (s *sequenceDecs) decodeSync(history *history) error { if mo > len(out)+len(hist) || mo > s.windowSize { if len(s.dict) == 0 { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) } // we may be in dictionary. dictO := len(s.dict) - (mo - (len(out) + len(hist))) if dictO < 0 || dictO >= len(s.dict) { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) } end := dictO + ml if end > len(s.dict) { @@ -543,8 +409,8 @@ func (s *sequenceDecs) decodeSync(history *history) error { } // Check if space for literals - if len(s.literals)+len(s.out)-startSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size (%d)", len(s.out), maxBlockSize) + if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize) } // Add final literals diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go new file mode 100644 index 000000000..4676b09cc --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -0,0 +1,350 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package zstd + +import ( + "fmt" + + "github.com/klauspost/compress/internal/cpuinfo" +) + +type decodeSyncAsmContext struct { + llTable []decSymbol + mlTable []decSymbol + ofTable []decSymbol + llState uint64 + mlState uint64 + ofState uint64 + iteration int + litRemain int + out []byte + outPosition int + literals []byte + litPosition int + history []byte + windowSize int + ll int // set on error (not for all errors, please refer to _generate/gen.go) + ml int // set on error (not for all errors, please refer to _generate/gen.go) + mo int // set on error (not for all errors, please refer to _generate/gen.go) +} + +// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer. +//go:noescape +func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer. +//go:noescape +func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// decode sequences from the stream with the provided history but without a dictionary. +func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { + if len(s.dict) > 0 { + return false, nil + } + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize { + return false, nil + } + useSafe := false + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc { + useSafe = true + } + if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) { + useSafe = true + } + br := s.br + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + + ctx := decodeSyncAsmContext{ + llTable: s.litLengths.fse.dt[:maxTablesize], + mlTable: s.matchLengths.fse.dt[:maxTablesize], + ofTable: s.offsets.fse.dt[:maxTablesize], + llState: uint64(s.litLengths.state.state), + mlState: uint64(s.matchLengths.state.state), + ofState: uint64(s.offsets.state.state), + iteration: s.nSeqs - 1, + litRemain: len(s.literals), + out: s.out, + outPosition: len(s.out), + literals: s.literals, + windowSize: s.windowSize, + history: hist, + } + + s.seqSize = 0 + startSize := len(s.out) + + var errCode int + if cpuinfo.HasBMI2() { + if useSafe { + errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx) + } else { + errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx) + } + } else { + if useSafe { + errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx) + } else { + errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx) + } + } + switch errCode { + case noError: + break + + case errorMatchLenOfsMismatch: + return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml) + + case errorMatchLenTooBig: + return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml) + + case errorMatchOffTooBig: + return true, fmt.Errorf("match offset (%d) bigger than current history (%d)", + ctx.mo, ctx.outPosition+len(hist)-startSize) + + case errorNotEnoughLiterals: + return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", + ctx.ll, ctx.litRemain+ctx.ll) + + case errorNotEnoughSpace: + size := ctx.outPosition + ctx.ll + ctx.ml + if debugDecoder { + println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize) + } + return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize) + + default: + return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + } + + s.seqSize += ctx.litRemain + if s.seqSize > maxBlockSize { + return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + return true, err + } + + s.literals = s.literals[ctx.litPosition:] + t := ctx.outPosition + s.out = s.out[:t] + + // Add final literals + s.out = append(s.out, s.literals...) + if debugDecoder { + t += len(s.literals) + if t != len(s.out) { + panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t)) + } + } + + return true, nil +} + +// -------------------------------------------------------------------------------- + +type decodeAsmContext struct { + llTable []decSymbol + mlTable []decSymbol + ofTable []decSymbol + llState uint64 + mlState uint64 + ofState uint64 + iteration int + seqs []seqVals + litRemain int +} + +const noError = 0 + +// error reported when mo == 0 && ml > 0 +const errorMatchLenOfsMismatch = 1 + +// error reported when ml > maxMatchLen +const errorMatchLenTooBig = 2 + +// error reported when mo > available history or mo > s.windowSize +const errorMatchOffTooBig = 3 + +// error reported when the sum of literal lengths exeeceds the literal buffer size +const errorNotEnoughLiterals = 4 + +// error reported when capacity of `out` is too small +const errorNotEnoughSpace = 5 + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// decode sequences from the stream without the provided history. +func (s *sequenceDecs) decode(seqs []seqVals) error { + br := s.br + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + + ctx := decodeAsmContext{ + llTable: s.litLengths.fse.dt[:maxTablesize], + mlTable: s.matchLengths.fse.dt[:maxTablesize], + ofTable: s.offsets.fse.dt[:maxTablesize], + llState: uint64(s.litLengths.state.state), + mlState: uint64(s.matchLengths.state.state), + ofState: uint64(s.offsets.state.state), + seqs: seqs, + iteration: len(seqs) - 1, + litRemain: len(s.literals), + } + + s.seqSize = 0 + lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56 + var errCode int + if cpuinfo.HasBMI2() { + if lte56bits { + errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx) + } else { + errCode = sequenceDecs_decode_bmi2(s, br, &ctx) + } + } else { + if lte56bits { + errCode = sequenceDecs_decode_56_amd64(s, br, &ctx) + } else { + errCode = sequenceDecs_decode_amd64(s, br, &ctx) + } + } + if errCode != 0 { + i := len(seqs) - ctx.iteration - 1 + switch errCode { + case errorMatchLenOfsMismatch: + ml := ctx.seqs[i].ml + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + + case errorMatchLenTooBig: + ml := ctx.seqs[i].ml + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) + + case errorNotEnoughLiterals: + ll := ctx.seqs[i].ll + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll) + } + + return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + } + + if ctx.litRemain < 0 { + return fmt.Errorf("literal count is too big: total available %d, total requested %d", + len(s.literals), len(s.literals)-ctx.litRemain) + } + + s.seqSize += ctx.litRemain + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + return err +} + +// -------------------------------------------------------------------------------- + +type executeAsmContext struct { + seqs []seqVals + seqIndex int + out []byte + history []byte + literals []byte + outPosition int + litPosition int + windowSize int +} + +// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm. +// +// Returns false if a match offset is too big. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool + +// executeSimple handles cases when dictionary is not used. +func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { + // Ensure we have enough output size... + if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) { + addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + ctx := executeAsmContext{ + seqs: seqs, + seqIndex: 0, + out: out, + history: hist, + outPosition: t, + litPosition: 0, + literals: s.literals, + windowSize: s.windowSize, + } + + ok := sequenceDecs_executeSimple_amd64(&ctx) + if !ok { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", + seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist)) + } + s.literals = s.literals[ctx.litPosition:] + t = ctx.outPosition + + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s new file mode 100644 index 000000000..01cc23fa8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -0,0 +1,3519 @@ +// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT. + +//go:build !appengine && !noasm && gc && !noasm +// +build !appengine,!noasm,gc,!noasm + +// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: CMOV +TEXT ·sequenceDecs_decode_amd64(SB), $8-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 104(AX), R10 + MOVQ s+0(FP), AX + MOVQ 144(AX), R11 + MOVQ 152(AX), R12 + MOVQ 160(AX), R13 + +sequenceDecs_decode_amd64_main_loop: + MOVQ (SP), R14 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decode_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_amd64_fill_end + +sequenceDecs_decode_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_amd64_fill_byte_by_byte + +sequenceDecs_decode_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 16(R10) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 8(R10) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decode_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_amd64_fill_2_end + +sequenceDecs_decode_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte + +sequenceDecs_decode_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, (R10) + + // Fill bitreader for state updates + MOVQ R14, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R14 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, DI + +sequenceDecs_decode_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R14 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R8 + +sequenceDecs_decode_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R14 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R9 + +sequenceDecs_decode_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decode_amd64_skip_update: + // Adjust offset + MOVQ 16(R10), CX + CMPQ AX, $0x01 + JBE sequenceDecs_decode_amd64_adjust_offsetB_1_or_0 + MOVQ R12, R13 + MOVQ R11, R12 + MOVQ CX, R11 + JMP sequenceDecs_decode_amd64_adjust_end + +sequenceDecs_decode_amd64_adjust_offsetB_1_or_0: + CMPQ (R10), $0x00000000 + JNE sequenceDecs_decode_amd64_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_amd64_adjust_offset_nonzero + +sequenceDecs_decode_amd64_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero + MOVQ R11, CX + JMP sequenceDecs_decode_amd64_adjust_end + +sequenceDecs_decode_amd64_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_amd64_adjust_zero + JEQ sequenceDecs_decode_amd64_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_amd64_adjust_three + JMP sequenceDecs_decode_amd64_adjust_two + +sequenceDecs_decode_amd64_adjust_zero: + MOVQ R11, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_one: + MOVQ R12, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_two: + MOVQ R13, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_three: + LEAQ -1(R11), AX + +sequenceDecs_decode_amd64_adjust_test_temp_valid: + TESTQ AX, AX + JNZ sequenceDecs_decode_amd64_adjust_temp_valid + MOVQ $0x00000001, AX + +sequenceDecs_decode_amd64_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R12, R13 + MOVQ R11, R12 + MOVQ AX, R11 + MOVQ AX, CX + +sequenceDecs_decode_amd64_adjust_end: + MOVQ CX, 16(R10) + + // Check values + MOVQ 8(R10), AX + MOVQ (R10), R14 + LEAQ (AX)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decode_amd64_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decode_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decode_amd64_match_len_ofs_ok: + ADDQ $0x18, R10 + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decode_amd64_main_loop + MOVQ s+0(FP), AX + MOVQ R11, 144(AX) + MOVQ R12, 152(AX) + MOVQ R13, 160(AX) + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_amd64_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_amd64_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: CMOV +TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 104(AX), R10 + MOVQ s+0(FP), AX + MOVQ 144(AX), R11 + MOVQ 152(AX), R12 + MOVQ 160(AX), R13 + +sequenceDecs_decode_56_amd64_main_loop: + MOVQ (SP), R14 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decode_56_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_56_amd64_fill_end + +sequenceDecs_decode_56_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_56_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_56_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte + +sequenceDecs_decode_56_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 16(R10) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 8(R10) + + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, (R10) + + // Fill bitreader for state updates + MOVQ R14, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R14 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, DI + +sequenceDecs_decode_56_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R14 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R8 + +sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R14 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R9 + +sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decode_56_amd64_skip_update: + // Adjust offset + MOVQ 16(R10), CX + CMPQ AX, $0x01 + JBE sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0 + MOVQ R12, R13 + MOVQ R11, R12 + MOVQ CX, R11 + JMP sequenceDecs_decode_56_amd64_adjust_end + +sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0: + CMPQ (R10), $0x00000000 + JNE sequenceDecs_decode_56_amd64_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_56_amd64_adjust_offset_nonzero + +sequenceDecs_decode_56_amd64_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero + MOVQ R11, CX + JMP sequenceDecs_decode_56_amd64_adjust_end + +sequenceDecs_decode_56_amd64_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_56_amd64_adjust_zero + JEQ sequenceDecs_decode_56_amd64_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_56_amd64_adjust_three + JMP sequenceDecs_decode_56_amd64_adjust_two + +sequenceDecs_decode_56_amd64_adjust_zero: + MOVQ R11, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_one: + MOVQ R12, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_two: + MOVQ R13, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_three: + LEAQ -1(R11), AX + +sequenceDecs_decode_56_amd64_adjust_test_temp_valid: + TESTQ AX, AX + JNZ sequenceDecs_decode_56_amd64_adjust_temp_valid + MOVQ $0x00000001, AX + +sequenceDecs_decode_56_amd64_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R12, R13 + MOVQ R11, R12 + MOVQ AX, R11 + MOVQ AX, CX + +sequenceDecs_decode_56_amd64_adjust_end: + MOVQ CX, 16(R10) + + // Check values + MOVQ 8(R10), AX + MOVQ (R10), R14 + LEAQ (AX)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decode_56_amd64_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_56_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decode_56_amd64_match_len_ofs_ok: + ADDQ $0x18, R10 + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decode_56_amd64_main_loop + MOVQ s+0(FP), AX + MOVQ R11, 144(AX) + MOVQ R12, 152(AX) + MOVQ R13, 160(AX) + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_56_amd64_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: BMI, BMI2, CMOV +TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 104(CX), R9 + MOVQ s+0(FP), CX + MOVQ 144(CX), R10 + MOVQ 152(CX), R11 + MOVQ 160(CX), R12 + +sequenceDecs_decode_bmi2_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decode_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_bmi2_fill_end + +sequenceDecs_decode_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_bmi2_fill_byte_by_byte + +sequenceDecs_decode_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 16(R9) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 8(R9) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decode_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_bmi2_fill_2_end + +sequenceDecs_decode_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte + +sequenceDecs_decode_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, (R9) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decode_bmi2_skip_update: + // Adjust offset + MOVQ 16(R9), CX + CMPQ R13, $0x01 + JBE sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0 + MOVQ R11, R12 + MOVQ R10, R11 + MOVQ CX, R10 + JMP sequenceDecs_decode_bmi2_adjust_end + +sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0: + CMPQ (R9), $0x00000000 + JNE sequenceDecs_decode_bmi2_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_bmi2_adjust_offset_nonzero + +sequenceDecs_decode_bmi2_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero + MOVQ R10, CX + JMP sequenceDecs_decode_bmi2_adjust_end + +sequenceDecs_decode_bmi2_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_bmi2_adjust_zero + JEQ sequenceDecs_decode_bmi2_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_bmi2_adjust_three + JMP sequenceDecs_decode_bmi2_adjust_two + +sequenceDecs_decode_bmi2_adjust_zero: + MOVQ R10, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_one: + MOVQ R11, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_two: + MOVQ R12, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_three: + LEAQ -1(R10), R13 + +sequenceDecs_decode_bmi2_adjust_test_temp_valid: + TESTQ R13, R13 + JNZ sequenceDecs_decode_bmi2_adjust_temp_valid + MOVQ $0x00000001, R13 + +sequenceDecs_decode_bmi2_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R11, R12 + MOVQ R10, R11 + MOVQ R13, R10 + MOVQ R13, CX + +sequenceDecs_decode_bmi2_adjust_end: + MOVQ CX, 16(R9) + + // Check values + MOVQ 8(R9), R13 + MOVQ (R9), R14 + LEAQ (R13)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ R13, $0x00020002 + JA sequenceDecs_decode_bmi2_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_bmi2_match_len_ofs_ok + TESTQ R13, R13 + JNZ sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decode_bmi2_match_len_ofs_ok: + ADDQ $0x18, R9 + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decode_bmi2_main_loop + MOVQ s+0(FP), CX + MOVQ R10, 144(CX) + MOVQ R11, 152(CX) + MOVQ R12, 160(CX) + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_bmi2_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: BMI, BMI2, CMOV +TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 104(CX), R9 + MOVQ s+0(FP), CX + MOVQ 144(CX), R10 + MOVQ 152(CX), R11 + MOVQ 160(CX), R12 + +sequenceDecs_decode_56_bmi2_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decode_56_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_56_bmi2_fill_end + +sequenceDecs_decode_56_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_56_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_56_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte + +sequenceDecs_decode_56_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 16(R9) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 8(R9) + + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, (R9) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decode_56_bmi2_skip_update: + // Adjust offset + MOVQ 16(R9), CX + CMPQ R13, $0x01 + JBE sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0 + MOVQ R11, R12 + MOVQ R10, R11 + MOVQ CX, R10 + JMP sequenceDecs_decode_56_bmi2_adjust_end + +sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0: + CMPQ (R9), $0x00000000 + JNE sequenceDecs_decode_56_bmi2_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_56_bmi2_adjust_offset_nonzero + +sequenceDecs_decode_56_bmi2_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero + MOVQ R10, CX + JMP sequenceDecs_decode_56_bmi2_adjust_end + +sequenceDecs_decode_56_bmi2_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_56_bmi2_adjust_zero + JEQ sequenceDecs_decode_56_bmi2_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_56_bmi2_adjust_three + JMP sequenceDecs_decode_56_bmi2_adjust_two + +sequenceDecs_decode_56_bmi2_adjust_zero: + MOVQ R10, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_one: + MOVQ R11, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_two: + MOVQ R12, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_three: + LEAQ -1(R10), R13 + +sequenceDecs_decode_56_bmi2_adjust_test_temp_valid: + TESTQ R13, R13 + JNZ sequenceDecs_decode_56_bmi2_adjust_temp_valid + MOVQ $0x00000001, R13 + +sequenceDecs_decode_56_bmi2_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R11, R12 + MOVQ R10, R11 + MOVQ R13, R10 + MOVQ R13, CX + +sequenceDecs_decode_56_bmi2_adjust_end: + MOVQ CX, 16(R9) + + // Check values + MOVQ 8(R9), R13 + MOVQ (R9), R14 + LEAQ (R13)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ R13, $0x00020002 + JA sequenceDecs_decode_56_bmi2_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_56_bmi2_match_len_ofs_ok + TESTQ R13, R13 + JNZ sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decode_56_bmi2_match_len_ofs_ok: + ADDQ $0x18, R9 + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decode_56_bmi2_main_loop + MOVQ s+0(FP), CX + MOVQ R10, 144(CX) + MOVQ R11, 152(CX) + MOVQ R12, 160(CX) + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_56_bmi2_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool +// Requires: SSE +TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 + MOVQ ctx+0(FP), R10 + MOVQ 8(R10), CX + TESTQ CX, CX + JZ empty_seqs + MOVQ (R10), AX + MOVQ 24(R10), DX + MOVQ 32(R10), BX + MOVQ 80(R10), SI + MOVQ 104(R10), DI + MOVQ 120(R10), R8 + MOVQ 56(R10), R9 + MOVQ 64(R10), R10 + ADDQ R10, R9 + + // seqsBase += 24 * seqIndex + LEAQ (DX)(DX*2), R11 + SHLQ $0x03, R11 + ADDQ R11, AX + + // outBase += outPosition + ADDQ DI, BX + +main_loop: + MOVQ (AX), R11 + MOVQ 16(AX), R12 + MOVQ 8(AX), R13 + + // Copy literals + TESTQ R11, R11 + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, R11 + JZ copy_1_word + MOVB (SI)(R14*1), R15 + MOVB R15, (BX)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, R11 + JZ copy_1_dword + MOVW (SI)(R14*1), R15 + MOVW R15, (BX)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, R11 + JZ copy_1_qword + MOVL (SI)(R14*1), R15 + MOVL R15, (BX)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, R11 + JZ copy_1_test + MOVQ (SI)(R14*1), R15 + MOVQ R15, (BX)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (SI)(R14*1), X0 + MOVUPS X0, (BX)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, R11 + JB copy_1 + ADDQ R11, SI + ADDQ R11, BX + ADDQ R11, DI + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + LEAQ (DI)(R10*1), R11 + CMPQ R12, R11 + JG error_match_off_too_big + CMPQ R12, R8 + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, R11 + SUBQ DI, R11 + JLS copy_match + MOVQ R9, R14 + SUBQ R11, R14 + CMPQ R13, R11 + JGE copy_all_from_history + XORQ R11, R11 + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(R11*1), R12 + MOVB R12, (BX)(R11*1) + ADDQ $0x01, R11 + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(R11*1), R12 + MOVW R12, (BX)(R11*1) + ADDQ $0x02, R11 + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(R11*1), R12 + MOVL R12, (BX)(R11*1) + ADDQ $0x04, R11 + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(R11*1), R12 + MOVQ R12, (BX)(R11*1) + ADDQ $0x08, R11 + JMP copy_4_test + +copy_4: + MOVUPS (R14)(R11*1), X0 + MOVUPS X0, (BX)(R11*1) + ADDQ $0x10, R11 + +copy_4_test: + CMPQ R11, R13 + JB copy_4 + ADDQ R13, DI + ADDQ R13, BX + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, R11 + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (BX)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, R11 + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (BX)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, R11 + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (BX)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, R11 + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (BX)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (BX)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, R11 + JB copy_5 + ADDQ R11, BX + ADDQ R11, DI + SUBQ R11, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ BX, R11 + SUBQ R12, R11 + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ R12, R12 + +copy_2: + MOVUPS (R11)(R12*1), X0 + MOVUPS X0, (BX)(R12*1) + ADDQ $0x10, R12 + CMPQ R12, R13 + JB copy_2 + ADDQ R13, BX + ADDQ R13, DI + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ R12, R12 + +copy_slow_3: + MOVB (R11)(R12*1), R14 + MOVB R14, (BX)(R12*1) + INCQ R12 + CMPQ R12, R13 + JB copy_slow_3 + ADDQ R13, BX + ADDQ R13, DI + +handle_loop: + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + +loop_finished: + // Return value + MOVB $0x01, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + MOVQ 80(AX), CX + SUBQ CX, SI + MOVQ SI, 112(AX) + RET + +error_match_off_too_big: + // Return value + MOVB $0x00, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + MOVQ 80(AX), CX + SUBQ CX, SI + MOVQ SI, 112(AX) + RET + +empty_seqs: + // Return value + MOVB $0x01, ret+8(FP) + RET + +// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: CMOV, SSE +TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) + MOVQ 144(AX), R11 + MOVQ 136(AX), R12 + MOVQ 200(AX), CX + MOVQ CX, 56(SP) + MOVQ 176(AX), CX + MOVQ CX, 48(SP) + MOVQ 184(AX), AX + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) + + // outBase += outPosition + ADDQ R12, R10 + +sequenceDecs_decodeSync_amd64_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_amd64_fill_end + +sequenceDecs_decodeSync_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte + +sequenceDecs_decodeSync_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 8(SP) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_amd64_fill_2_end + +sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte + +sequenceDecs_decodeSync_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 24(SP) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R13 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, DI + +sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R13 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R8 + +sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R13 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R9 + +sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decodeSync_amd64_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ AX, $0x01 + JBE sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_amd64_adjust_end + +sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_amd64_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_amd64_adjust_offset_nonzero + +sequenceDecs_decodeSync_amd64_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_amd64_adjust_end + +sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: + MOVQ R13, AX + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, AX + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(AX*8), R14 + JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_amd64_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_amd64_adjust_skip + MOVQ 152(CX), AX + MOVQ AX, 160(CX) + +sequenceDecs_decodeSync_amd64_adjust_skip: + MOVQ 144(CX), AX + MOVQ AX, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_amd64_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), AX + MOVQ 24(SP), CX + LEAQ (AX)(CX*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ CX, 104(R14) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decodeSync_amd64_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_amd64_match_len_ofs_ok: + MOVQ 24(SP), AX + MOVQ 8(SP), CX + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ AX, AX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, AX + JZ copy_1_word + MOVB (R11)(R14*1), R15 + MOVB R15, (R10)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, AX + JZ copy_1_dword + MOVW (R11)(R14*1), R15 + MOVW R15, (R10)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, AX + JZ copy_1_qword + MOVL (R11)(R14*1), R15 + MOVL R15, (R10)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, AX + JZ copy_1_test + MOVQ (R11)(R14*1), R15 + MOVQ R15, (R10)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R11)(R14*1), X0 + MOVUPS X0, (R10)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, AX + JB copy_1 + ADDQ AX, R11 + ADDQ AX, R10 + ADDQ AX, R12 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R12, AX + ADDQ 40(SP), AX + CMPQ CX, AX + JG error_match_off_too_big + CMPQ CX, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JGE copy_all_from_history + XORQ AX, AX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(AX*1), CL + MOVB CL, (R10)(AX*1) + ADDQ $0x01, AX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(AX*1), CX + MOVW CX, (R10)(AX*1) + ADDQ $0x02, AX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(AX*1), CX + MOVL CX, (R10)(AX*1) + ADDQ $0x04, AX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(AX*1), CX + MOVQ CX, (R10)(AX*1) + ADDQ $0x08, AX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(AX*1), X0 + MOVUPS X0, (R10)(AX*1) + ADDQ $0x10, AX + +copy_4_test: + CMPQ AX, R13 + JB copy_4 + ADDQ R13, R12 + ADDQ R13, R10 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, AX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R10)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, AX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R10)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, AX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R10)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, AX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R10)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R10)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, AX + JB copy_5 + ADDQ AX, R10 + ADDQ AX, R12 + SUBQ AX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R10, AX + SUBQ CX, AX + + // ml <= mo + CMPQ R13, CX + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ CX, CX + +copy_2: + MOVUPS (AX)(CX*1), X0 + MOVUPS X0, (R10)(CX*1) + ADDQ $0x10, CX + CMPQ CX, R13 + JB copy_2 + ADDQ R13, R10 + ADDQ R13, R12 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ CX, CX + +copy_slow_3: + MOVB (AX)(CX*1), R14 + MOVB R14, (R10)(CX*1) + INCQ CX + CMPQ CX, R13 + JB copy_slow_3 + ADDQ R13, R10 + ADDQ R13, R12 + +handle_loop: + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decodeSync_amd64_main_loop + +loop_finished: + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R12, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R11 + MOVQ R11, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_amd64_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: BMI, BMI2, CMOV, SSE +TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) + MOVQ 144(CX), R10 + MOVQ 136(CX), R11 + MOVQ 200(CX), R12 + MOVQ R12, 56(SP) + MOVQ 176(CX), R12 + MOVQ R12, 48(SP) + MOVQ 184(CX), CX + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) + + // outBase += outPosition + ADDQ R11, R9 + +sequenceDecs_decodeSync_bmi2_main_loop: + MOVQ (SP), R12 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_bmi2_fill_end + +sequenceDecs_decodeSync_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte + +sequenceDecs_decodeSync_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 8(SP) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_bmi2_fill_2_end + +sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte + +sequenceDecs_decodeSync_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 24(SP) + + // Fill bitreader for state updates + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decodeSync_bmi2_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ R12, $0x01 + JBE sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_bmi2_adjust_end + +sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero + +sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_bmi2_adjust_end + +sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: + MOVQ R13, R12 + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, R12 + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(R12*8), R14 + JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_bmi2_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_bmi2_adjust_skip + MOVQ 152(CX), R12 + MOVQ R12, 160(CX) + +sequenceDecs_decodeSync_bmi2_adjust_skip: + MOVQ 144(CX), R12 + MOVQ R12, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_bmi2_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), CX + MOVQ 24(SP), R12 + LEAQ (CX)(R12*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ R12, 104(R14) + JS error_not_enough_literals + CMPQ CX, $0x00020002 + JA sequenceDecs_decodeSync_bmi2_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok + TESTQ CX, CX + JNZ sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_bmi2_match_len_ofs_ok: + MOVQ 24(SP), CX + MOVQ 8(SP), R12 + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ CX, CX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, CX + JZ copy_1_word + MOVB (R10)(R14*1), R15 + MOVB R15, (R9)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, CX + JZ copy_1_dword + MOVW (R10)(R14*1), R15 + MOVW R15, (R9)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, CX + JZ copy_1_qword + MOVL (R10)(R14*1), R15 + MOVL R15, (R9)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, CX + JZ copy_1_test + MOVQ (R10)(R14*1), R15 + MOVQ R15, (R9)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R10)(R14*1), X0 + MOVUPS X0, (R9)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, CX + JB copy_1 + ADDQ CX, R10 + ADDQ CX, R9 + ADDQ CX, R11 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R11, CX + ADDQ 40(SP), CX + CMPQ R12, CX + JG error_match_off_too_big + CMPQ R12, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JGE copy_all_from_history + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(CX*1), R12 + MOVB R12, (R9)(CX*1) + ADDQ $0x01, CX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(CX*1), R12 + MOVW R12, (R9)(CX*1) + ADDQ $0x02, CX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(CX*1), R12 + MOVL R12, (R9)(CX*1) + ADDQ $0x04, CX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(CX*1), R12 + MOVQ R12, (R9)(CX*1) + ADDQ $0x08, CX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(CX*1), X0 + MOVUPS X0, (R9)(CX*1) + ADDQ $0x10, CX + +copy_4_test: + CMPQ CX, R13 + JB copy_4 + ADDQ R13, R11 + ADDQ R13, R9 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, CX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R9)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, CX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R9)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, CX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R9)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, CX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R9)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R9)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, CX + JB copy_5 + ADDQ CX, R9 + ADDQ CX, R11 + SUBQ CX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R9, CX + SUBQ R12, CX + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ R12, R12 + +copy_2: + MOVUPS (CX)(R12*1), X0 + MOVUPS X0, (R9)(R12*1) + ADDQ $0x10, R12 + CMPQ R12, R13 + JB copy_2 + ADDQ R13, R9 + ADDQ R13, R11 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ R12, R12 + +copy_slow_3: + MOVB (CX)(R12*1), R14 + MOVB R14, (R9)(R12*1) + INCQ R12 + CMPQ R12, R13 + JB copy_slow_3 + ADDQ R13, R9 + ADDQ R13, R11 + +handle_loop: + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decodeSync_bmi2_main_loop + +loop_finished: + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R11, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R10 + MOVQ R10, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_bmi2_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: CMOV, SSE +TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) + MOVQ 144(AX), R11 + MOVQ 136(AX), R12 + MOVQ 200(AX), CX + MOVQ CX, 56(SP) + MOVQ 176(AX), CX + MOVQ CX, 48(SP) + MOVQ 184(AX), AX + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) + + // outBase += outPosition + ADDQ R12, R10 + +sequenceDecs_decodeSync_safe_amd64_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_safe_amd64_fill_end + +sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_safe_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_safe_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte + +sequenceDecs_decodeSync_safe_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 8(SP) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_safe_amd64_fill_2_end + +sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte + +sequenceDecs_decodeSync_safe_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 24(SP) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R13 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, DI + +sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R13 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R8 + +sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R13 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R9 + +sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decodeSync_safe_amd64_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ AX, $0x01 + JBE sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + +sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero + +sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + +sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: + MOVQ R13, AX + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, AX + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(AX*8), R14 + JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_safe_amd64_adjust_skip + MOVQ 152(CX), AX + MOVQ AX, 160(CX) + +sequenceDecs_decodeSync_safe_amd64_adjust_skip: + MOVQ 144(CX), AX + MOVQ AX, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_safe_amd64_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), AX + MOVQ 24(SP), CX + LEAQ (AX)(CX*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ CX, 104(R14) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok: + MOVQ 24(SP), AX + MOVQ 8(SP), CX + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ AX, AX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, AX + JZ copy_1_word + MOVB (R11)(R14*1), R15 + MOVB R15, (R10)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, AX + JZ copy_1_dword + MOVW (R11)(R14*1), R15 + MOVW R15, (R10)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, AX + JZ copy_1_qword + MOVL (R11)(R14*1), R15 + MOVL R15, (R10)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, AX + JZ copy_1_test + MOVQ (R11)(R14*1), R15 + MOVQ R15, (R10)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R11)(R14*1), X0 + MOVUPS X0, (R10)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, AX + JB copy_1 + ADDQ AX, R11 + ADDQ AX, R10 + ADDQ AX, R12 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R12, AX + ADDQ 40(SP), AX + CMPQ CX, AX + JG error_match_off_too_big + CMPQ CX, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JGE copy_all_from_history + XORQ AX, AX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(AX*1), CL + MOVB CL, (R10)(AX*1) + ADDQ $0x01, AX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(AX*1), CX + MOVW CX, (R10)(AX*1) + ADDQ $0x02, AX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(AX*1), CX + MOVL CX, (R10)(AX*1) + ADDQ $0x04, AX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(AX*1), CX + MOVQ CX, (R10)(AX*1) + ADDQ $0x08, AX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(AX*1), X0 + MOVUPS X0, (R10)(AX*1) + ADDQ $0x10, AX + +copy_4_test: + CMPQ AX, R13 + JB copy_4 + ADDQ R13, R12 + ADDQ R13, R10 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, AX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R10)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, AX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R10)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, AX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R10)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, AX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R10)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R10)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, AX + JB copy_5 + ADDQ AX, R10 + ADDQ AX, R12 + SUBQ AX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R10, AX + SUBQ CX, AX + + // ml <= mo + CMPQ R13, CX + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_2_word + MOVB (AX)(CX*1), R14 + MOVB R14, (R10)(CX*1) + ADDQ $0x01, CX + +copy_2_word: + TESTQ $0x00000002, R13 + JZ copy_2_dword + MOVW (AX)(CX*1), R14 + MOVW R14, (R10)(CX*1) + ADDQ $0x02, CX + +copy_2_dword: + TESTQ $0x00000004, R13 + JZ copy_2_qword + MOVL (AX)(CX*1), R14 + MOVL R14, (R10)(CX*1) + ADDQ $0x04, CX + +copy_2_qword: + TESTQ $0x00000008, R13 + JZ copy_2_test + MOVQ (AX)(CX*1), R14 + MOVQ R14, (R10)(CX*1) + ADDQ $0x08, CX + JMP copy_2_test + +copy_2: + MOVUPS (AX)(CX*1), X0 + MOVUPS X0, (R10)(CX*1) + ADDQ $0x10, CX + +copy_2_test: + CMPQ CX, R13 + JB copy_2 + ADDQ R13, R10 + ADDQ R13, R12 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ CX, CX + +copy_slow_3: + MOVB (AX)(CX*1), R14 + MOVB R14, (R10)(CX*1) + INCQ CX + CMPQ CX, R13 + JB copy_slow_3 + ADDQ R13, R10 + ADDQ R13, R12 + +handle_loop: + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decodeSync_safe_amd64_main_loop + +loop_finished: + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R12, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R11 + MOVQ R11, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: BMI, BMI2, CMOV, SSE +TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) + MOVQ 144(CX), R10 + MOVQ 136(CX), R11 + MOVQ 200(CX), R12 + MOVQ R12, 56(SP) + MOVQ 176(CX), R12 + MOVQ R12, 48(SP) + MOVQ 184(CX), CX + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) + + // outBase += outPosition + ADDQ R11, R9 + +sequenceDecs_decodeSync_safe_bmi2_main_loop: + MOVQ (SP), R12 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_end + +sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte + +sequenceDecs_decodeSync_safe_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 8(SP) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_end + +sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte + +sequenceDecs_decodeSync_safe_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 24(SP) + + // Fill bitreader for state updates + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decodeSync_safe_bmi2_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ R12, $0x01 + JBE sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + +sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero + +sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + +sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: + MOVQ R13, R12 + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, R12 + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(R12*8), R14 + JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_safe_bmi2_adjust_skip + MOVQ 152(CX), R12 + MOVQ R12, 160(CX) + +sequenceDecs_decodeSync_safe_bmi2_adjust_skip: + MOVQ 144(CX), R12 + MOVQ R12, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_safe_bmi2_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), CX + MOVQ 24(SP), R12 + LEAQ (CX)(R12*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ R12, 104(R14) + JS error_not_enough_literals + CMPQ CX, $0x00020002 + JA sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok + TESTQ CX, CX + JNZ sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok: + MOVQ 24(SP), CX + MOVQ 8(SP), R12 + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ CX, CX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, CX + JZ copy_1_word + MOVB (R10)(R14*1), R15 + MOVB R15, (R9)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, CX + JZ copy_1_dword + MOVW (R10)(R14*1), R15 + MOVW R15, (R9)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, CX + JZ copy_1_qword + MOVL (R10)(R14*1), R15 + MOVL R15, (R9)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, CX + JZ copy_1_test + MOVQ (R10)(R14*1), R15 + MOVQ R15, (R9)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R10)(R14*1), X0 + MOVUPS X0, (R9)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, CX + JB copy_1 + ADDQ CX, R10 + ADDQ CX, R9 + ADDQ CX, R11 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R11, CX + ADDQ 40(SP), CX + CMPQ R12, CX + JG error_match_off_too_big + CMPQ R12, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JGE copy_all_from_history + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(CX*1), R12 + MOVB R12, (R9)(CX*1) + ADDQ $0x01, CX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(CX*1), R12 + MOVW R12, (R9)(CX*1) + ADDQ $0x02, CX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(CX*1), R12 + MOVL R12, (R9)(CX*1) + ADDQ $0x04, CX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(CX*1), R12 + MOVQ R12, (R9)(CX*1) + ADDQ $0x08, CX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(CX*1), X0 + MOVUPS X0, (R9)(CX*1) + ADDQ $0x10, CX + +copy_4_test: + CMPQ CX, R13 + JB copy_4 + ADDQ R13, R11 + ADDQ R13, R9 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, CX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R9)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, CX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R9)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, CX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R9)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, CX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R9)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R9)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, CX + JB copy_5 + ADDQ CX, R9 + ADDQ CX, R11 + SUBQ CX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R9, CX + SUBQ R12, CX + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ R12, R12 + TESTQ $0x00000001, R13 + JZ copy_2_word + MOVB (CX)(R12*1), R14 + MOVB R14, (R9)(R12*1) + ADDQ $0x01, R12 + +copy_2_word: + TESTQ $0x00000002, R13 + JZ copy_2_dword + MOVW (CX)(R12*1), R14 + MOVW R14, (R9)(R12*1) + ADDQ $0x02, R12 + +copy_2_dword: + TESTQ $0x00000004, R13 + JZ copy_2_qword + MOVL (CX)(R12*1), R14 + MOVL R14, (R9)(R12*1) + ADDQ $0x04, R12 + +copy_2_qword: + TESTQ $0x00000008, R13 + JZ copy_2_test + MOVQ (CX)(R12*1), R14 + MOVQ R14, (R9)(R12*1) + ADDQ $0x08, R12 + JMP copy_2_test + +copy_2: + MOVUPS (CX)(R12*1), X0 + MOVUPS X0, (R9)(R12*1) + ADDQ $0x10, R12 + +copy_2_test: + CMPQ R12, R13 + JB copy_2 + ADDQ R13, R9 + ADDQ R13, R11 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ R12, R12 + +copy_slow_3: + MOVB (CX)(R12*1), R14 + MOVB R14, (R9)(R12*1) + INCQ R12 + CMPQ R12, R13 + JB copy_slow_3 + ADDQ R13, R9 + ADDQ R13, R11 + +handle_loop: + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decodeSync_safe_bmi2_main_loop + +loop_finished: + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R11, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R10 + MOVQ R10, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go new file mode 100644 index 000000000..c3452bc3a --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go @@ -0,0 +1,237 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +package zstd + +import ( + "fmt" + "io" +) + +// decode sequences from the stream with the provided history but without dictionary. +func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { + return false, nil +} + +// decode sequences from the stream without the provided history. +func (s *sequenceDecs) decode(seqs []seqVals) error { + br := s.br + + // Grab full sizes tables, to avoid bounds checks. + llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] + llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state + s.seqSize = 0 + litRemain := len(s.literals) + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + for i := range seqs { + var ll, mo, ml int + if br.off > 4+((maxOffsetBits+16+16)>>3) { + // inlined function: + // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) + + // Final will not read from stream. + var llB, mlB, moB uint8 + ll, llB = llState.final() + ml, mlB = mlState.final() + mo, moB = ofState.final() + + // extra bits are stored in reverse order. + br.fillFast() + mo += br.getBits(moB) + if s.maxBits > 32 { + br.fillFast() + } + ml += br.getBits(mlB) + ll += br.getBits(llB) + + if moB > 1 { + s.prevOffset[2] = s.prevOffset[1] + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = mo + } else { + // mo = s.adjustOffset(mo, ll, moB) + // Inlined for rather big speedup + if ll == 0 { + // There is an exception though, when current sequence's literals_length = 0. + // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, + // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. + mo++ + } + + if mo == 0 { + mo = s.prevOffset[0] + } else { + var temp int + if mo == 3 { + temp = s.prevOffset[0] - 1 + } else { + temp = s.prevOffset[mo] + } + + if temp == 0 { + // 0 is not valid; input is corrupted; force offset to 1 + println("WARNING: temp was 0") + temp = 1 + } + + if mo != 1 { + s.prevOffset[2] = s.prevOffset[1] + } + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = temp + mo = temp + } + } + br.fillFast() + } else { + if br.overread() { + if debugDecoder { + printf("reading sequence %d, exceeded available data\n", i) + } + return io.ErrUnexpectedEOF + } + ll, mo, ml = s.next(br, llState, mlState, ofState) + br.fill() + } + + if debugSequences { + println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) + } + // Evaluate. + // We might be doing this async, so do it early. + if mo == 0 && ml > 0 { + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + } + if ml > maxMatchLen { + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) + } + s.seqSize += ll + ml + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + litRemain -= ll + if litRemain < 0 { + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll) + } + seqs[i] = seqVals{ + ll: ll, + ml: ml, + mo: mo, + } + if i == len(seqs)-1 { + // This is the last sequence, so we shouldn't update state. + break + } + + // Manually inlined, ~ 5-20% faster + // Update all 3 states at once. Approx 20% faster. + nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() + if nBits == 0 { + llState = llTable[llState.newState()&maxTableMask] + mlState = mlTable[mlState.newState()&maxTableMask] + ofState = ofTable[ofState.newState()&maxTableMask] + } else { + bits := br.get32BitsFast(nBits) + lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) + llState = llTable[(llState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits >> (ofState.nbBits() & 31)) + lowBits &= bitMask[mlState.nbBits()&15] + mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] + ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] + } + } + s.seqSize += litRemain + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + return err +} + +// executeSimple handles cases when a dictionary is not used. +func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { + // Ensure we have enough output size... + if len(s.out)+s.seqSize > cap(s.out) { + addBytes := s.seqSize + len(s.out) + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + for _, seq := range seqs { + // Add literals + copy(out[t:], s.literals[:seq.ll]) + t += seq.ll + s.literals = s.literals[seq.ll:] + + // Malformed input + if seq.mo > t+len(hist) || seq.mo > s.windowSize { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) + } + + // Copy from history. + if v := seq.mo - t; v > 0 { + // v is the start position in history from end. + start := len(hist) - v + if seq.ml > v { + // Some goes into the current block. + // Copy remainder of history + copy(out[t:], hist[start:]) + t += v + seq.ml -= v + } else { + copy(out[t:], hist[start:start+seq.ml]) + t += seq.ml + continue + } + } + + // We must be in the current buffer now + if seq.ml > 0 { + start := t - seq.mo + if seq.ml <= t-start { + // No overlap + copy(out[t:], out[start:start+seq.ml]) + t += seq.ml + } else { + // Overlapping copy + // Extend destination slice and copy one byte at the time. + src := out[start : start+seq.ml] + dst := out[t:] + dst = dst[:len(src)] + t += len(src) + // Destination is the space we just added. + for i := range src { + dst[i] = src[i] + } + } + } + } + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go index ffffcbc25..b53f606a1 100644 --- a/vendor/github.com/klauspost/compress/zstd/zip.go +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -21,23 +21,34 @@ const ZipMethodPKWare = 20 var zipReaderPool sync.Pool // newZipReader creates a pooled zip decompressor. -func newZipReader(r io.Reader) io.ReadCloser { - dec, ok := zipReaderPool.Get().(*Decoder) - if ok { - dec.Reset(r) - } else { - d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) - if err != nil { - panic(err) - } - dec = d +func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser { + pool := &zipReaderPool + if len(opts) > 0 { + opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...) + // Force concurrency 1 + opts = append(opts, WithDecoderConcurrency(1)) + // Create our own pool + pool = &sync.Pool{} + } + return func(r io.Reader) io.ReadCloser { + dec, ok := pool.Get().(*Decoder) + if ok { + dec.Reset(r) + } else { + d, err := NewReader(r, opts...) + if err != nil { + panic(err) + } + dec = d + } + return &pooledZipReader{dec: dec, pool: pool} } - return &pooledZipReader{dec: dec} } type pooledZipReader struct { - mu sync.Mutex // guards Close and Read - dec *Decoder + mu sync.Mutex // guards Close and Read + pool *sync.Pool + dec *Decoder } func (r *pooledZipReader) Read(p []byte) (n int, err error) { @@ -48,8 +59,8 @@ func (r *pooledZipReader) Read(p []byte) (n int, err error) { } dec, err := r.dec.Read(p) if err == io.EOF { - err = r.dec.Reset(nil) - zipReaderPool.Put(r.dec) + r.dec.Reset(nil) + r.pool.Put(r.dec) r.dec = nil } return dec, err @@ -61,7 +72,7 @@ func (r *pooledZipReader) Close() error { var err error if r.dec != nil { err = r.dec.Reset(nil) - zipReaderPool.Put(r.dec) + r.pool.Put(r.dec) r.dec = nil } return err @@ -115,6 +126,9 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { // ZipDecompressor returns a decompressor that can be registered with zip libraries. // See ZipCompressor for example. -func ZipDecompressor() func(r io.Reader) io.ReadCloser { - return newZipReader +// Options can be specified. WithDecoderConcurrency(1) is forced, +// and by default a 128MB maximum decompression window is specified. +// The window size can be overridden if required. +func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser { + return newZipReader(opts...) } diff --git a/vendor/github.com/urfave/cli/v2/LICENSE b/vendor/github.com/urfave/cli/v2/LICENSE index 42a597e29..2c84c78a1 100644 --- a/vendor/github.com/urfave/cli/v2/LICENSE +++ b/vendor/github.com/urfave/cli/v2/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2016 Jeremy Saenz & Contributors +Copyright (c) 2022 urfave/cli maintainers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/vendor/github.com/urfave/cli/v2/README.md b/vendor/github.com/urfave/cli/v2/README.md index 134e72e89..6e4d698c2 100644 --- a/vendor/github.com/urfave/cli/v2/README.md +++ b/vendor/github.com/urfave/cli/v2/README.md @@ -78,3 +78,7 @@ export PATH=$PATH:$GOPATH/bin cli is tested against multiple versions of Go on Linux, and against the latest released version of Go on OS X and Windows. This project uses Github Actions for builds. To see our currently supported go versions and platforms, look at the [./.github/workflows/cli.yml](https://github.com/urfave/cli/blob/main/.github/workflows/cli.yml). + +## License + +See [`LICENSE`](./LICENSE) diff --git a/vendor/github.com/urfave/cli/v2/flag.go b/vendor/github.com/urfave/cli/v2/flag.go index b85bafd54..d2a33f01f 100644 --- a/vendor/github.com/urfave/cli/v2/flag.go +++ b/vendor/github.com/urfave/cli/v2/flag.go @@ -402,3 +402,7 @@ func flagFromEnvOrFile(envVars []string, filePath string) (val string, ok bool) } return "", false } + +func flagSplitMultiValues(val string) []string { + return strings.Split(val, ",") +} diff --git a/vendor/github.com/urfave/cli/v2/flag_float64_slice.go b/vendor/github.com/urfave/cli/v2/flag_float64_slice.go index d573ae5f3..219c50150 100644 --- a/vendor/github.com/urfave/cli/v2/flag_float64_slice.go +++ b/vendor/github.com/urfave/cli/v2/flag_float64_slice.go @@ -43,12 +43,14 @@ func (f *Float64Slice) Set(value string) error { return nil } - tmp, err := strconv.ParseFloat(value, 64) - if err != nil { - return err - } + for _, s := range flagSplitMultiValues(value) { + tmp, err := strconv.ParseFloat(strings.TrimSpace(s), 64) + if err != nil { + return err + } - f.slice = append(f.slice, tmp) + f.slice = append(f.slice, tmp) + } return nil } @@ -151,7 +153,7 @@ func (f *Float64SliceFlag) Apply(set *flag.FlagSet) error { if val != "" { f.Value = &Float64Slice{} - for _, s := range strings.Split(val, ",") { + for _, s := range flagSplitMultiValues(val) { if err := f.Value.Set(strings.TrimSpace(s)); err != nil { return fmt.Errorf("could not parse %q as float64 slice value for flag %s: %s", f.Value, f.Name, err) } diff --git a/vendor/github.com/urfave/cli/v2/flag_int64_slice.go b/vendor/github.com/urfave/cli/v2/flag_int64_slice.go index 16f5665b0..6ea7ae8b9 100644 --- a/vendor/github.com/urfave/cli/v2/flag_int64_slice.go +++ b/vendor/github.com/urfave/cli/v2/flag_int64_slice.go @@ -43,12 +43,14 @@ func (i *Int64Slice) Set(value string) error { return nil } - tmp, err := strconv.ParseInt(value, 0, 64) - if err != nil { - return err - } + for _, s := range flagSplitMultiValues(value) { + tmp, err := strconv.ParseInt(strings.TrimSpace(s), 0, 64) + if err != nil { + return err + } - i.slice = append(i.slice, tmp) + i.slice = append(i.slice, tmp) + } return nil } @@ -151,7 +153,7 @@ func (f *Int64SliceFlag) Apply(set *flag.FlagSet) error { if val, ok := flagFromEnvOrFile(f.EnvVars, f.FilePath); ok { f.Value = &Int64Slice{} - for _, s := range strings.Split(val, ",") { + for _, s := range flagSplitMultiValues(val) { if err := f.Value.Set(strings.TrimSpace(s)); err != nil { return fmt.Errorf("could not parse %q as int64 slice value for flag %s: %s", val, f.Name, err) } diff --git a/vendor/github.com/urfave/cli/v2/flag_int_slice.go b/vendor/github.com/urfave/cli/v2/flag_int_slice.go index b0032299a..38f6a33b0 100644 --- a/vendor/github.com/urfave/cli/v2/flag_int_slice.go +++ b/vendor/github.com/urfave/cli/v2/flag_int_slice.go @@ -54,12 +54,14 @@ func (i *IntSlice) Set(value string) error { return nil } - tmp, err := strconv.ParseInt(value, 0, 64) - if err != nil { - return err - } + for _, s := range flagSplitMultiValues(value) { + tmp, err := strconv.ParseInt(strings.TrimSpace(s), 0, 64) + if err != nil { + return err + } - i.slice = append(i.slice, int(tmp)) + i.slice = append(i.slice, int(tmp)) + } return nil } @@ -162,7 +164,7 @@ func (f *IntSliceFlag) Apply(set *flag.FlagSet) error { if val, ok := flagFromEnvOrFile(f.EnvVars, f.FilePath); ok { f.Value = &IntSlice{} - for _, s := range strings.Split(val, ",") { + for _, s := range flagSplitMultiValues(val) { if err := f.Value.Set(strings.TrimSpace(s)); err != nil { return fmt.Errorf("could not parse %q as int slice value for flag %s: %s", val, f.Name, err) } diff --git a/vendor/github.com/urfave/cli/v2/flag_string_slice.go b/vendor/github.com/urfave/cli/v2/flag_string_slice.go index 387923577..ff0a4b7da 100644 --- a/vendor/github.com/urfave/cli/v2/flag_string_slice.go +++ b/vendor/github.com/urfave/cli/v2/flag_string_slice.go @@ -42,7 +42,9 @@ func (s *StringSlice) Set(value string) error { return nil } - s.slice = append(s.slice, value) + for _, t := range flagSplitMultiValues(value) { + s.slice = append(s.slice, strings.TrimSpace(t)) + } return nil } @@ -160,7 +162,7 @@ func (f *StringSliceFlag) Apply(set *flag.FlagSet) error { destination = f.Destination } - for _, s := range strings.Split(val, ",") { + for _, s := range flagSplitMultiValues(val) { if err := destination.Set(strings.TrimSpace(s)); err != nil { return fmt.Errorf("could not parse %q as string value for flag %s: %s", val, f.Name, err) } diff --git a/vendor/golang.org/x/sys/unix/syscall_aix.go b/vendor/golang.org/x/sys/unix/syscall_aix.go index f2a114fc2..ad22c33db 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix.go @@ -37,6 +37,7 @@ func Creat(path string, mode uint32) (fd int, err error) { } //sys utimes(path string, times *[2]Timeval) (err error) + func Utimes(path string, tv []Timeval) error { if len(tv) != 2 { return EINVAL @@ -45,6 +46,7 @@ func Utimes(path string, tv []Timeval) error { } //sys utimensat(dirfd int, path string, times *[2]Timespec, flag int) (err error) + func UtimesNano(path string, ts []Timespec) error { if len(ts) != 2 { return EINVAL @@ -300,11 +302,13 @@ func direntNamlen(buf []byte) (uint64, bool) { } //sys getdirent(fd int, buf []byte) (n int, err error) + func Getdents(fd int, buf []byte) (n int, err error) { return getdirent(fd, buf) } //sys wait4(pid Pid_t, status *_C_int, options int, rusage *Rusage) (wpid Pid_t, err error) + func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, err error) { var status _C_int var r Pid_t @@ -372,6 +376,7 @@ func (w WaitStatus) TrapCause() int { return -1 } //sys fcntl(fd int, cmd int, arg int) (val int, err error) //sys fsyncRange(fd int, how int, start int64, length int64) (err error) = fsync_range + func Fsync(fd int) error { return fsyncRange(fd, O_SYNC, 0, 0) } @@ -536,6 +541,7 @@ func Poll(fds []PollFd, timeout int) (n int, err error) { //sys Getsystemcfg(label int) (n uint64) //sys umount(target string) (err error) + func Unmount(target string, flags int) (err error) { if flags != 0 { // AIX doesn't have any flags for umount. diff --git a/vendor/golang.org/x/sys/unix/syscall_dragonfly.go b/vendor/golang.org/x/sys/unix/syscall_dragonfly.go index c61e27498..61c0d0de1 100644 --- a/vendor/golang.org/x/sys/unix/syscall_dragonfly.go +++ b/vendor/golang.org/x/sys/unix/syscall_dragonfly.go @@ -125,11 +125,13 @@ func Pipe2(p []int, flags int) (err error) { } //sys extpread(fd int, p []byte, flags int, offset int64) (n int, err error) + func pread(fd int, p []byte, offset int64) (n int, err error) { return extpread(fd, p, 0, offset) } //sys extpwrite(fd int, p []byte, flags int, offset int64) (n int, err error) + func pwrite(fd int, p []byte, offset int64) (n int, err error) { return extpwrite(fd, p, 0, offset) } diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index d251dafae..c8d203212 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -512,24 +512,24 @@ func (sa *SockaddrL2) sockaddr() (unsafe.Pointer, _Socklen, error) { // // Server example: // -// fd, _ := Socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM) -// _ = unix.Bind(fd, &unix.SockaddrRFCOMM{ -// Channel: 1, -// Addr: [6]uint8{0, 0, 0, 0, 0, 0}, // BDADDR_ANY or 00:00:00:00:00:00 -// }) -// _ = Listen(fd, 1) -// nfd, sa, _ := Accept(fd) -// fmt.Printf("conn addr=%v fd=%d", sa.(*unix.SockaddrRFCOMM).Addr, nfd) -// Read(nfd, buf) +// fd, _ := Socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM) +// _ = unix.Bind(fd, &unix.SockaddrRFCOMM{ +// Channel: 1, +// Addr: [6]uint8{0, 0, 0, 0, 0, 0}, // BDADDR_ANY or 00:00:00:00:00:00 +// }) +// _ = Listen(fd, 1) +// nfd, sa, _ := Accept(fd) +// fmt.Printf("conn addr=%v fd=%d", sa.(*unix.SockaddrRFCOMM).Addr, nfd) +// Read(nfd, buf) // // Client example: // -// fd, _ := Socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM) -// _ = Connect(fd, &SockaddrRFCOMM{ -// Channel: 1, -// Addr: [6]byte{0x11, 0x22, 0x33, 0xaa, 0xbb, 0xcc}, // CC:BB:AA:33:22:11 -// }) -// Write(fd, []byte(`hello`)) +// fd, _ := Socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM) +// _ = Connect(fd, &SockaddrRFCOMM{ +// Channel: 1, +// Addr: [6]byte{0x11, 0x22, 0x33, 0xaa, 0xbb, 0xcc}, // CC:BB:AA:33:22:11 +// }) +// Write(fd, []byte(`hello`)) type SockaddrRFCOMM struct { // Addr represents a bluetooth address, byte ordering is little-endian. Addr [6]uint8 @@ -556,12 +556,12 @@ func (sa *SockaddrRFCOMM) sockaddr() (unsafe.Pointer, _Socklen, error) { // The SockaddrCAN struct must be bound to the socket file descriptor // using Bind before the CAN socket can be used. // -// // Read one raw CAN frame -// fd, _ := Socket(AF_CAN, SOCK_RAW, CAN_RAW) -// addr := &SockaddrCAN{Ifindex: index} -// Bind(fd, addr) -// frame := make([]byte, 16) -// Read(fd, frame) +// // Read one raw CAN frame +// fd, _ := Socket(AF_CAN, SOCK_RAW, CAN_RAW) +// addr := &SockaddrCAN{Ifindex: index} +// Bind(fd, addr) +// frame := make([]byte, 16) +// Read(fd, frame) // // The full SocketCAN documentation can be found in the linux kernel // archives at: https://www.kernel.org/doc/Documentation/networking/can.txt @@ -632,13 +632,13 @@ func (sa *SockaddrCANJ1939) sockaddr() (unsafe.Pointer, _Socklen, error) { // Here is an example of using an AF_ALG socket with SHA1 hashing. // The initial socket setup process is as follows: // -// // Open a socket to perform SHA1 hashing. -// fd, _ := unix.Socket(unix.AF_ALG, unix.SOCK_SEQPACKET, 0) -// addr := &unix.SockaddrALG{Type: "hash", Name: "sha1"} -// unix.Bind(fd, addr) -// // Note: unix.Accept does not work at this time; must invoke accept() -// // manually using unix.Syscall. -// hashfd, _, _ := unix.Syscall(unix.SYS_ACCEPT, uintptr(fd), 0, 0) +// // Open a socket to perform SHA1 hashing. +// fd, _ := unix.Socket(unix.AF_ALG, unix.SOCK_SEQPACKET, 0) +// addr := &unix.SockaddrALG{Type: "hash", Name: "sha1"} +// unix.Bind(fd, addr) +// // Note: unix.Accept does not work at this time; must invoke accept() +// // manually using unix.Syscall. +// hashfd, _, _ := unix.Syscall(unix.SYS_ACCEPT, uintptr(fd), 0, 0) // // Once a file descriptor has been returned from Accept, it may be used to // perform SHA1 hashing. The descriptor is not safe for concurrent use, but @@ -647,39 +647,39 @@ func (sa *SockaddrCANJ1939) sockaddr() (unsafe.Pointer, _Socklen, error) { // When hashing a small byte slice or string, a single Write and Read may // be used: // -// // Assume hashfd is already configured using the setup process. -// hash := os.NewFile(hashfd, "sha1") -// // Hash an input string and read the results. Each Write discards -// // previous hash state. Read always reads the current state. -// b := make([]byte, 20) -// for i := 0; i < 2; i++ { -// io.WriteString(hash, "Hello, world.") -// hash.Read(b) -// fmt.Println(hex.EncodeToString(b)) -// } -// // Output: -// // 2ae01472317d1935a84797ec1983ae243fc6aa28 -// // 2ae01472317d1935a84797ec1983ae243fc6aa28 +// // Assume hashfd is already configured using the setup process. +// hash := os.NewFile(hashfd, "sha1") +// // Hash an input string and read the results. Each Write discards +// // previous hash state. Read always reads the current state. +// b := make([]byte, 20) +// for i := 0; i < 2; i++ { +// io.WriteString(hash, "Hello, world.") +// hash.Read(b) +// fmt.Println(hex.EncodeToString(b)) +// } +// // Output: +// // 2ae01472317d1935a84797ec1983ae243fc6aa28 +// // 2ae01472317d1935a84797ec1983ae243fc6aa28 // // For hashing larger byte slices, or byte streams such as those read from // a file or socket, use Sendto with MSG_MORE to instruct the kernel to update // the hash digest instead of creating a new one for a given chunk and finalizing it. // -// // Assume hashfd and addr are already configured using the setup process. -// hash := os.NewFile(hashfd, "sha1") -// // Hash the contents of a file. -// f, _ := os.Open("/tmp/linux-4.10-rc7.tar.xz") -// b := make([]byte, 4096) -// for { -// n, err := f.Read(b) -// if err == io.EOF { -// break -// } -// unix.Sendto(hashfd, b[:n], unix.MSG_MORE, addr) -// } -// hash.Read(b) -// fmt.Println(hex.EncodeToString(b)) -// // Output: 85cdcad0c06eef66f805ecce353bec9accbeecc5 +// // Assume hashfd and addr are already configured using the setup process. +// hash := os.NewFile(hashfd, "sha1") +// // Hash the contents of a file. +// f, _ := os.Open("/tmp/linux-4.10-rc7.tar.xz") +// b := make([]byte, 4096) +// for { +// n, err := f.Read(b) +// if err == io.EOF { +// break +// } +// unix.Sendto(hashfd, b[:n], unix.MSG_MORE, addr) +// } +// hash.Read(b) +// fmt.Println(hex.EncodeToString(b)) +// // Output: 85cdcad0c06eef66f805ecce353bec9accbeecc5 // // For more information, see: http://www.chronox.de/crypto-API/crypto/userspace-if.html. type SockaddrALG struct { diff --git a/vendor/golang.org/x/sys/unix/syscall_openbsd.go b/vendor/golang.org/x/sys/unix/syscall_openbsd.go index 15d637d63..78daceb33 100644 --- a/vendor/golang.org/x/sys/unix/syscall_openbsd.go +++ b/vendor/golang.org/x/sys/unix/syscall_openbsd.go @@ -81,6 +81,7 @@ func Pipe(p []int) (err error) { } //sysnb pipe2(p *[2]_C_int, flags int) (err error) + func Pipe2(p []int, flags int) error { if len(p) != 2 { return EINVAL @@ -95,6 +96,7 @@ func Pipe2(p []int, flags int) error { } //sys Getdents(fd int, buf []byte) (n int, err error) + func Getdirentries(fd int, buf []byte, basep *uintptr) (n int, err error) { n, err = Getdents(fd, buf) if err != nil || basep == nil { diff --git a/vendor/golang.org/x/sys/windows/exec_windows.go b/vendor/golang.org/x/sys/windows/exec_windows.go index 855698bb2..75980fd44 100644 --- a/vendor/golang.org/x/sys/windows/exec_windows.go +++ b/vendor/golang.org/x/sys/windows/exec_windows.go @@ -15,11 +15,11 @@ import ( // in http://msdn.microsoft.com/en-us/library/ms880421. // This function returns "" (2 double quotes) if s is empty. // Alternatively, these transformations are done: -// - every back slash (\) is doubled, but only if immediately -// followed by double quote ("); -// - every double quote (") is escaped by back slash (\); -// - finally, s is wrapped with double quotes (arg -> "arg"), -// but only if there is space or tab inside s. +// - every back slash (\) is doubled, but only if immediately +// followed by double quote ("); +// - every double quote (") is escaped by back slash (\); +// - finally, s is wrapped with double quotes (arg -> "arg"), +// but only if there is space or tab inside s. func EscapeArg(s string) string { if len(s) == 0 { return "\"\"" diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index ce3075c45..636e5de60 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -623,7 +623,6 @@ var ( func getStdHandle(stdhandle uint32) (fd Handle) { r, _ := GetStdHandle(stdhandle) - CloseOnExec(r) return r } diff --git a/vendor/google.golang.org/api/internal/version.go b/vendor/google.golang.org/api/internal/version.go index 3599baa0f..a69b66d7d 100644 --- a/vendor/google.golang.org/api/internal/version.go +++ b/vendor/google.golang.org/api/internal/version.go @@ -5,4 +5,4 @@ package internal // Version is the current tagged release of the library. -const Version = "0.75.0" +const Version = "0.77.0" diff --git a/vendor/modules.txt b/vendor/modules.txt index 48ad757c5..40b4b6292 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -34,7 +34,7 @@ github.com/VictoriaMetrics/metricsql/binaryop # github.com/VividCortex/ewma v1.2.0 ## explicit; go 1.12 github.com/VividCortex/ewma -# github.com/aws/aws-sdk-go v1.44.0 +# github.com/aws/aws-sdk-go v1.44.4 ## explicit; go 1.11 github.com/aws/aws-sdk-go/aws github.com/aws/aws-sdk-go/aws/arn @@ -128,8 +128,8 @@ github.com/golang/protobuf/ptypes/timestamp # github.com/golang/snappy v0.0.4 ## explicit github.com/golang/snappy -# github.com/google/go-cmp v0.5.7 -## explicit; go 1.11 +# github.com/google/go-cmp v0.5.8 +## explicit; go 1.13 github.com/google/go-cmp/cmp github.com/google/go-cmp/cmp/internal/diff github.com/google/go-cmp/cmp/internal/flags @@ -151,13 +151,14 @@ github.com/influxdata/influxdb/pkg/escape # github.com/jmespath/go-jmespath v0.4.0 ## explicit; go 1.14 github.com/jmespath/go-jmespath -# github.com/klauspost/compress v1.15.1 -## explicit; go 1.15 +# github.com/klauspost/compress v1.15.2 +## explicit; go 1.16 github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse github.com/klauspost/compress/gzip github.com/klauspost/compress/huff0 +github.com/klauspost/compress/internal/cpuinfo github.com/klauspost/compress/internal/snapref github.com/klauspost/compress/zlib github.com/klauspost/compress/zstd @@ -220,7 +221,7 @@ github.com/rivo/uniseg # github.com/russross/blackfriday/v2 v2.1.0 ## explicit github.com/russross/blackfriday/v2 -# github.com/urfave/cli/v2 v2.5.0 +# github.com/urfave/cli/v2 v2.5.1 ## explicit; go 1.18 github.com/urfave/cli/v2 # github.com/valyala/bytebufferpool v1.0.0 @@ -294,7 +295,7 @@ golang.org/x/oauth2/jwt # golang.org/x/sync v0.0.0-20210220032951-036812b2e83c ## explicit golang.org/x/sync/errgroup -# golang.org/x/sys v0.0.0-20220422013727-9388b58f7150 +# golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba ## explicit; go 1.17 golang.org/x/sys/internal/unsafeheader golang.org/x/sys/unix @@ -309,7 +310,7 @@ golang.org/x/text/unicode/norm ## explicit; go 1.11 golang.org/x/xerrors golang.org/x/xerrors/internal -# google.golang.org/api v0.75.0 +# google.golang.org/api v0.77.0 ## explicit; go 1.15 google.golang.org/api/googleapi google.golang.org/api/googleapi/transport @@ -342,7 +343,7 @@ google.golang.org/appengine/internal/socket google.golang.org/appengine/internal/urlfetch google.golang.org/appengine/socket google.golang.org/appengine/urlfetch -# google.golang.org/genproto v0.0.0-20220422154200-b37d22cd5731 +# google.golang.org/genproto v0.0.0-20220429170224-98d788798c3e ## explicit; go 1.15 google.golang.org/genproto/googleapis/api/annotations google.golang.org/genproto/googleapis/iam/v1