From 05713469c3a9998f392d2305092f3ac73ec18e8a Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 5 Aug 2019 10:33:21 +0300 Subject: [PATCH] vendor: `make vendor-update` --- go.mod | 4 +- go.sum | 8 +- vendor/github.com/klauspost/compress/LICENSE | 1 + .../klauspost/compress/huff0/decompress.go | 6 +- .../klauspost/compress/zstd/README.md | 6 +- .../klauspost/compress/zstd/blockenc.go | 60 ++++++++-- .../klauspost/compress/zstd/decoder.go | 8 +- .../klauspost/compress/zstd/enc_dfast.go | 36 ++---- .../klauspost/compress/zstd/enc_fast.go | 17 +-- .../klauspost/compress/zstd/framedec.go | 3 + .../klauspost/compress/zstd/fse_decoder.go | 113 +++++++++++++----- .../klauspost/compress/zstd/seqdec.go | 103 ++++++++-------- .../golang.org/x/sys/unix/affinity_linux.go | 8 +- vendor/golang.org/x/sys/unix/dirent.go | 2 +- vendor/golang.org/x/sys/unix/endian_little.go | 2 +- vendor/modules.txt | 4 +- 16 files changed, 226 insertions(+), 155 deletions(-) diff --git a/go.mod b/go.mod index 43c1637bb4..e0034cf7a2 100644 --- a/go.mod +++ b/go.mod @@ -6,13 +6,13 @@ require ( github.com/cespare/xxhash/v2 v2.0.1-0.20190104013014-3767db7a7e18 github.com/golang/snappy v0.0.1 github.com/google/go-cmp v0.3.0 // indirect - github.com/klauspost/compress v1.7.4 + github.com/klauspost/compress v1.7.5 github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/valyala/fastjson v1.4.1 github.com/valyala/gozstd v1.5.1 github.com/valyala/histogram v1.0.1 github.com/valyala/quicktemplate v1.1.1 - golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 + golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa ) go 1.12 diff --git a/go.sum b/go.sum index 02b5574977..167b14e30a 100644 --- a/go.sum +++ b/go.sum @@ -20,8 +20,8 @@ github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.7.4 h1:4UqAIzZ1Ns2epCTyJ1d2xMWvxtX+FNSCYWeOFogK9nc= -github.com/klauspost/compress v1.7.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= +github.com/klauspost/compress v1.7.5 h1:NMapGoDIKPKpk2hpcgAU6XHfsREHG2p8PIg7C3f/jpI= +github.com/klauspost/compress v1.7.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= @@ -49,5 +49,5 @@ github.com/valyala/quicktemplate v1.1.1 h1:C58y/wN0FMTi2PR0n3onltemfFabany53j7M6 github.com/valyala/quicktemplate v1.1.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4= github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI= -golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M= +golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/vendor/github.com/klauspost/compress/LICENSE b/vendor/github.com/klauspost/compress/LICENSE index 7448756763..1eb75ef68e 100644 --- a/vendor/github.com/klauspost/compress/LICENSE +++ b/vendor/github.com/klauspost/compress/LICENSE @@ -1,4 +1,5 @@ Copyright (c) 2012 The Go Authors. All rights reserved. +Copyright (c) 2019 Klaus Post. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index 261c54274c..7b454c2eb7 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -247,9 +247,13 @@ func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { dstOut := s.Out dstEvery := (dstSize + 3) / 4 + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + single := s.dt.single[:tlSize] + decode := func(br *bitReader) byte { val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ - v := s.dt.single[val] + v := single[val&tlMask] br.bitsRead += v.nBits return v.byte } diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index 670f98af44..d9d38b23f1 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -34,7 +34,8 @@ For now, a high speed (fastest) and medium-fast (default) compressor has been im The "Fastest" compression ratio is roughly equivalent to zstd level 1. The "Default" compression ration is roughly equivalent to zstd level 3 (default). -In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast. +In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. +The compression ratio compared to stdlib is around level 3, but usually 3x as fast. Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2. @@ -217,7 +218,8 @@ silesia.tar zstd 3 211947520 66793301 1377 146.79 As part of the development process a *Snappy* -> *Zstandard* converter was also built. -This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. Note that a single block is not framed. +This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. +Note that a single block is not framed. Conversion is done by converting the stream directly from Snappy without intermediate full decoding. Therefore the compression ratio is much less than what can be done by a full decompression diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index cba24c76d1..9d9151a0ef 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -155,14 +155,17 @@ func (h *literalsHeader) setSize(regenLen int) { } // setSizes will set the size of a compressed literals section and the input length. -func (h *literalsHeader) setSizes(compLen, inLen int) { +func (h *literalsHeader) setSizes(compLen, inLen int, single bool) { compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen)) // Only retain 2 bits const mask = 3 lh := uint64(*h & mask) switch { case compBits <= 10 && inBits <= 10: - lh |= (1 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) + if !single { + lh |= 1 << 2 + } + lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) if debug { const mmask = (1 << 24) - 1 n := (lh >> 4) & mmask @@ -175,8 +178,14 @@ func (h *literalsHeader) setSizes(compLen, inLen int) { } case compBits <= 14 && inBits <= 14: lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60) + if single { + panic("single stream used with more than 10 bits length.") + } case compBits <= 18 && inBits <= 18: lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60) + if single { + panic("single stream used with more than 10 bits length.") + } default: panic("internal error: block too big") } @@ -307,12 +316,30 @@ func (b *blockEnc) encodeLits() error { return nil } - // TODO: Switch to 1X when less than x bytes. - out, reUsed, err := huff0.Compress4X(b.literals, b.litEnc) - // Bail out of compression is too little. - if len(out) > (len(b.literals) - len(b.literals)>>4) { + var ( + out []byte + reUsed, single bool + err error + ) + if len(b.literals) >= 1024 { + // Use 4 Streams. + out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) + if len(out) > len(b.literals)-len(b.literals)>>4 { + // Bail out of compression is too little. + err = huff0.ErrIncompressible + } + } else if len(b.literals) > 32 { + // Use 1 stream + single = true + out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) + if len(out) > len(b.literals)-len(b.literals)>>4 { + // Bail out of compression is too little. + err = huff0.ErrIncompressible + } + } else { err = huff0.ErrIncompressible } + switch err { case huff0.ErrIncompressible: if debug { @@ -351,7 +378,7 @@ func (b *blockEnc) encodeLits() error { lh.setType(literalsBlockCompressed) } // Set sizes - lh.setSizes(len(out), len(b.literals)) + lh.setSizes(len(out), len(b.literals), single) bh.setSize(uint32(len(out) + lh.size() + 1)) // Write block headers. @@ -381,16 +408,23 @@ func (b *blockEnc) encode() error { b.output = bh.appendTo(b.output) var ( - out []byte - reUsed bool - err error + out []byte + reUsed, single bool + err error ) - if len(b.literals) > 32 { - // TODO: Switch to 1X on small blocks. + if len(b.literals) >= 1024 { + // Use 4 Streams. out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) if len(out) > len(b.literals)-len(b.literals)>>4 { err = huff0.ErrIncompressible } + } else if len(b.literals) > 32 { + // Use 1 stream + single = true + out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) + if len(out) > len(b.literals)-len(b.literals)>>4 { + err = huff0.ErrIncompressible + } } else { err = huff0.ErrIncompressible } @@ -435,7 +469,7 @@ func (b *blockEnc) encode() error { } } } - lh.setSizes(len(out), len(b.literals)) + lh.setSizes(len(out), len(b.literals), single) if debug { printf("Compressed %d literals to %d bytes", len(b.literals), len(out)) println("Adding literal header:", lh) diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 2bd2a1300c..a32a34cb6c 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -281,17 +281,17 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } d.decoders <- block frame.rawInput = nil + frame.bBuf = nil d.frames <- frame }() + frame.bBuf = input if cap(dst) == 0 { // Allocate 1MB by default if nothing is provided. dst = make([]byte, 0, 1<<20) } - // Allocation here: - br := byteBuf(input) for { - err := frame.reset(&br) + err := frame.reset(&frame.bBuf) if err == io.EOF { return dst, nil } @@ -313,7 +313,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { if err != nil { return dst, err } - if len(br) == 0 { + if len(frame.bBuf) == 0 { break } } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index fb50e877f8..e120625d85 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -82,16 +82,11 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { stepSize++ } - // TEMPLATE - const kSearchStrength = 8 // nextEmit is where in src the next emitLiteral should start from. nextEmit := s cv := load6432(src, s) - // nextHash is the hash at s - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) // Relative offsets offset1 := int32(blk.recentOffsets[0]) @@ -119,8 +114,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS = nextHashS & dFastShortTableMask - nextHashL = nextHashL & dFastLongTableMask + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -172,8 +167,6 @@ encodeLoop: break encodeLoop } cv = load6432(src, s) - nextHashS = hash5(cv, dFastShortTableBits) - nextHashL = hash8(cv, dFastLongTableBits) continue } const repOff2 = 1 @@ -221,8 +214,6 @@ encodeLoop: break encodeLoop } cv = load6432(src, s) - nextHashS = hash5(cv, dFastShortTableBits) - nextHashL = hash8(cv, dFastLongTableBits) // Swap offsets offset1, offset2 = offset2, offset1 continue @@ -296,8 +287,6 @@ encodeLoop: break encodeLoop } cv = load6432(src, s) - nextHashS = hash5(cv, dFastShortTableBits) - nextHashL = hash8(cv, dFastLongTableBits) } // A 4-byte match has been found. Update recent offsets. @@ -354,20 +343,18 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hash8(cv0, dFastLongTableBits)&dFastLongTableMask] = te0 - e.longTable[hash8(cv1, dFastLongTableBits)&dFastLongTableMask] = te1 + e.longTable[hash8(cv0, dFastLongTableBits)] = te0 + e.longTable[hash8(cv1, dFastLongTableBits)] = te1 cv0 >>= 8 cv1 >>= 8 te0.offset++ te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - e.table[hash5(cv0, dFastShortTableBits)&dFastShortTableMask] = te0 - e.table[hash5(cv1, dFastShortTableBits)&dFastShortTableMask] = te1 + e.table[hash5(cv0, dFastShortTableBits)] = te0 + e.table[hash5(cv1, dFastShortTableBits)] = te1 cv = load6432(src, s) - nextHashS = hash5(cv1>>8, dFastShortTableBits) - nextHashL = hash8(cv, dFastLongTableBits) if !canRepeat { continue @@ -381,14 +368,17 @@ encodeLoop: break } + // Store this, since we have it. + nextHashS := hash5(cv1>>8, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + // We have at least 4 byte match. // No need to check backwards. We come straight from a match l := 4 + e.matchlen(s+4, o2+4, src) - // Store this, since we have it. entry := tableEntry{offset: s + e.cur, val: uint32(cv)} - e.longTable[nextHashL&dFastLongTableMask] = entry - e.table[nextHashS&dFastShortTableMask] = entry + e.longTable[nextHashL] = entry + e.table[nextHashS] = entry seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 @@ -408,8 +398,6 @@ encodeLoop: break encodeLoop } cv = load6432(src, s) - nextHashS = hash5(cv, dFastShortTableBits) - nextHashL = hash8(cv, dFastLongTableBits) } } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index a8edaa8885..6f388de041 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -124,8 +124,6 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { // nextEmit is where in src the next emitLiteral should start from. nextEmit := s cv := load6432(src, s) - // nextHash is the hash at s - nextHash := hash6(cv, hashLog) // Relative offsets offset1 := int32(blk.recentOffsets[0]) @@ -157,8 +155,8 @@ encodeLoop: panic("offset0 was 0") } - nextHash2 := hash6(cv>>8, hashLog) & tableMask - nextHash = nextHash & tableMask + nextHash := hash6(cv, hashLog) + nextHash2 := hash6(cv>>8, hashLog) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -207,8 +205,6 @@ encodeLoop: break encodeLoop } cv = load6432(src, s) - //nextHash = hashLen(cv, hashLog, mls) - nextHash = hash6(cv, hashLog) continue } coffset0 := s - (candidate.offset - e.cur) @@ -245,7 +241,6 @@ encodeLoop: break encodeLoop } cv = load6432(src, s) - nextHash = hash6(cv, hashLog) } // A 4-byte match has been found. We'll later see if more than 4 bytes. offset2 = offset1 @@ -292,15 +287,16 @@ encodeLoop: break encodeLoop } cv = load6432(src, s) - nextHash = hash6(cv, hashLog) // Check offset 2 - if o2 := s - offset2; canRepeat && o2 > 0 && load3232(src, o2) == uint32(cv) { + if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { // We have at least 4 byte match. // No need to check backwards. We come straight from a match l := 4 + e.matchlen(s+4, o2+4, src) + // Store this, since we have it. - e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: uint32(cv)} + nextHash := hash6(cv, hashLog) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 // Since litlen is always 0, this is offset 1. @@ -319,7 +315,6 @@ encodeLoop: } // Prepare next loop. cv = load6432(src, s) - nextHash = hash6(cv, hashLog) } } diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 7f1e225d72..0c2a623074 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -39,6 +39,9 @@ type frameDec struct { rawInput byteBuffer + // Byte buffer that can be reused for small input blocks. + bBuf byteBuf + // asyncRunning indicates whether the async routine processes input on 'decoding'. asyncRunning bool asyncRunningMu sync.Mutex diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index a86d00bc35..9efe34feb3 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -184,29 +184,75 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { // decSymbol contains information about a state entry, // Including the state offset base, the output symbol and // the number of bits to read for the low part of the destination state. -type decSymbol struct { - newState uint16 - addBits uint8 // Used for symbols until transformed. - nbBits uint8 - baseline uint32 +// Using a composite uint64 is faster than a struct with separate members. +type decSymbol uint64 + +func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol { + return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32) +} + +func (d decSymbol) nbBits() uint8 { + return uint8(d) +} + +func (d decSymbol) addBits() uint8 { + return uint8(d >> 8) +} + +func (d decSymbol) newState() uint16 { + return uint16(d >> 16) +} + +func (d decSymbol) baseline() uint32 { + return uint32(d >> 32) +} + +func (d decSymbol) baselineInt() int { + return int(d >> 32) +} + +func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) { + *d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32) +} + +func (d *decSymbol) setNBits(nBits uint8) { + const mask = 0xffffffffffffff00 + *d = (*d & mask) | decSymbol(nBits) +} + +func (d *decSymbol) setAddBits(addBits uint8) { + const mask = 0xffffffffffff00ff + *d = (*d & mask) | (decSymbol(addBits) << 8) +} + +func (d *decSymbol) setNewState(state uint16) { + const mask = 0xffffffff0000ffff + *d = (*d & mask) | decSymbol(state)<<16 +} + +func (d *decSymbol) setBaseline(baseline uint32) { + const mask = 0xffffffff + *d = (*d & mask) | decSymbol(baseline)<<32 +} + +func (d *decSymbol) setExt(addBits uint8, baseline uint32) { + const mask = 0xffff00ff + *d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32) } // decSymbolValue returns the transformed decSymbol for the given symbol. func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) { if int(symb) >= len(t) { - return decSymbol{}, fmt.Errorf("rle symbol %d >= max %d", symb, len(t)) + return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t)) } lu := t[symb] - return decSymbol{ - addBits: lu.addBits, - baseline: lu.baseLine, - }, nil + return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil } // setRLE will set the decoder til RLE mode. func (s *fseDecoder) setRLE(symbol decSymbol) { s.actualTableLog = 0 - s.maxBits = symbol.addBits + s.maxBits = symbol.addBits() s.dt[0] = symbol } @@ -220,7 +266,7 @@ func (s *fseDecoder) buildDtable() error { { for i, v := range s.norm[:s.symbolLen] { if v == -1 { - s.dt[highThreshold].addBits = uint8(i) + s.dt[highThreshold].setAddBits(uint8(i)) highThreshold-- symbolNext[i] = 1 } else { @@ -235,7 +281,7 @@ func (s *fseDecoder) buildDtable() error { position := uint32(0) for ss, v := range s.norm[:s.symbolLen] { for i := 0; i < int(v); i++ { - s.dt[position].addBits = uint8(ss) + s.dt[position].setAddBits(uint8(ss)) position = (position + step) & tableMask for position > highThreshold { // lowprob area @@ -253,11 +299,11 @@ func (s *fseDecoder) buildDtable() error { { tableSize := uint16(1 << s.actualTableLog) for u, v := range s.dt[:tableSize] { - symbol := v.addBits + symbol := v.addBits() nextState := symbolNext[symbol] symbolNext[symbol] = nextState + 1 nBits := s.actualTableLog - byte(highBits(uint32(nextState))) - s.dt[u&maxTableMask].nbBits = nBits + s.dt[u&maxTableMask].setNBits(nBits) newState := (nextState << nBits) - tableSize if newState > tableSize { return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) @@ -266,7 +312,7 @@ func (s *fseDecoder) buildDtable() error { // Seems weird that this is possible with nbits > 0. return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) } - s.dt[u&maxTableMask].newState = newState + s.dt[u&maxTableMask].setNewState(newState) } } return nil @@ -279,25 +325,21 @@ func (s *fseDecoder) transform(t []baseOffset) error { tableSize := uint16(1 << s.actualTableLog) s.maxBits = 0 for i, v := range s.dt[:tableSize] { - if int(v.addBits) >= len(t) { - return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits, len(t)) + add := v.addBits() + if int(add) >= len(t) { + return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t)) } - lu := t[v.addBits] + lu := t[add] if lu.addBits > s.maxBits { s.maxBits = lu.addBits } - s.dt[i&maxTableMask] = decSymbol{ - newState: v.newState, - nbBits: v.nbBits, - addBits: lu.addBits, - baseline: lu.baseLine, - } + v.setExt(lu.addBits, lu.baseLine) + s.dt[i] = v } return nil } type fseState struct { - // TODO: Check if *[1 << maxTablelog]decSymbol is faster. dt []decSymbol state decSymbol } @@ -312,26 +354,31 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) { // next returns the current symbol and sets the next state. // At least tablelog bits must be available in the bit reader. func (s *fseState) next(br *bitReader) { - lowBits := uint16(br.getBits(s.state.nbBits)) - s.state = s.dt[s.state.newState+lowBits] + lowBits := uint16(br.getBits(s.state.nbBits())) + s.state = s.dt[s.state.newState()+lowBits] } // finished returns true if all bits have been read from the bitstream // and the next state would require reading bits from the input. func (s *fseState) finished(br *bitReader) bool { - return br.finished() && s.state.nbBits > 0 + return br.finished() && s.state.nbBits() > 0 } // final returns the current state symbol without decoding the next. func (s *fseState) final() (int, uint8) { - return int(s.state.baseline), s.state.addBits + return s.state.baselineInt(), s.state.addBits() +} + +// final returns the current state symbol without decoding the next. +func (s decSymbol) final() (int, uint8) { + return s.baselineInt(), s.addBits() } // nextFast returns the next symbol and sets the next state. // This can only be used if no symbols are 0 bits. // At least tablelog bits must be available in the bit reader. func (s *fseState) nextFast(br *bitReader) (uint32, uint8) { - lowBits := uint16(br.getBitsFast(s.state.nbBits)) - s.state = s.dt[s.state.newState+lowBits] - return s.state.baseline, s.state.addBits + lowBits := uint16(br.getBitsFast(s.state.nbBits())) + s.state = s.dt[s.state.newState()+lowBits] + return s.state.baseline(), s.state.addBits() } diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index cef69e35b5..15a45f7b50 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -89,6 +89,10 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out [] // decode sequences from the stream with the provided history. func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { startSize := len(s.out) + // Grab full sizes tables, to avoid bounds checks. + llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] + llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state + for i := seqs - 1; i >= 0; i-- { if br.overread() { printf("reading sequence %d, exceeded available data\n", seqs-i) @@ -96,10 +100,10 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { } var litLen, matchOff, matchLen int if br.off > 4+((maxOffsetBits+16+16)>>3) { - litLen, matchOff, matchLen = s.nextFast(br) + litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState) br.fillFast() } else { - litLen, matchOff, matchLen = s.next(br) + litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState) br.fill() } @@ -175,30 +179,25 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { // This is the last sequence, so we shouldn't update state. break } - if true { - // Manually inlined, ~ 5-20% faster - // Update all 3 states at once. Approx 20% faster. - a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - nBits := a.nbBits + b.nbBits + c.nbBits - if nBits == 0 { - s.litLengths.state.state = s.litLengths.state.dt[a.newState] - s.matchLengths.state.state = s.matchLengths.state.dt[b.newState] - s.offsets.state.state = s.offsets.state.dt[c.newState] - } else { - bits := br.getBitsFast(nBits) - lowBits := uint16(bits >> ((c.nbBits + b.nbBits) & 31)) - s.litLengths.state.state = s.litLengths.state.dt[a.newState+lowBits] - - lowBits = uint16(bits >> (c.nbBits & 31)) - lowBits &= bitMask[b.nbBits&15] - s.matchLengths.state.state = s.matchLengths.state.dt[b.newState+lowBits] - - lowBits = uint16(bits) & bitMask[c.nbBits&15] - s.offsets.state.state = s.offsets.state.dt[c.newState+lowBits] - } + // Manually inlined, ~ 5-20% faster + // Update all 3 states at once. Approx 20% faster. + nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() + if nBits == 0 { + llState = llTable[llState.newState()&maxTableMask] + mlState = mlTable[mlState.newState()&maxTableMask] + ofState = ofTable[ofState.newState()&maxTableMask] } else { - s.updateAlt(br) + bits := br.getBitsFast(nBits) + lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) + llState = llTable[(llState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits >> (ofState.nbBits() & 31)) + lowBits &= bitMask[mlState.nbBits()&15] + mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] + ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] } } @@ -230,55 +229,49 @@ func (s *sequenceDecs) updateAlt(br *bitReader) { // Update all 3 states at once. Approx 20% faster. a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - nBits := a.nbBits + b.nbBits + c.nbBits + nBits := a.nbBits() + b.nbBits() + c.nbBits() if nBits == 0 { - s.litLengths.state.state = s.litLengths.state.dt[a.newState] - s.matchLengths.state.state = s.matchLengths.state.dt[b.newState] - s.offsets.state.state = s.offsets.state.dt[c.newState] + s.litLengths.state.state = s.litLengths.state.dt[a.newState()] + s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()] + s.offsets.state.state = s.offsets.state.dt[c.newState()] return } bits := br.getBitsFast(nBits) - lowBits := uint16(bits >> ((c.nbBits + b.nbBits) & 31)) - s.litLengths.state.state = s.litLengths.state.dt[a.newState+lowBits] + lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31)) + s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits] - lowBits = uint16(bits >> (c.nbBits & 31)) - lowBits &= bitMask[b.nbBits&15] - s.matchLengths.state.state = s.matchLengths.state.dt[b.newState+lowBits] + lowBits = uint16(bits >> (c.nbBits() & 31)) + lowBits &= bitMask[b.nbBits()&15] + s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits] - lowBits = uint16(bits) & bitMask[c.nbBits&15] - s.offsets.state.state = s.offsets.state.dt[c.newState+lowBits] + lowBits = uint16(bits) & bitMask[c.nbBits()&15] + s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits] } // nextFast will return new states when there are at least 4 unused bytes left on the stream when done. -func (s *sequenceDecs) nextFast(br *bitReader) (ll, mo, ml int) { +func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { // Final will not read from stream. - ll, llB := s.litLengths.state.final() - ml, mlB := s.matchLengths.state.final() - mo, moB := s.offsets.state.final() + ll, llB := llState.final() + ml, mlB := mlState.final() + mo, moB := ofState.final() // extra bits are stored in reverse order. br.fillFast() - if s.maxBits <= 32 { - mo += br.getBits(moB) - ml += br.getBits(mlB) - ll += br.getBits(llB) - } else { - mo += br.getBits(moB) + mo += br.getBits(moB) + if s.maxBits > 32 { br.fillFast() - // matchlength+literal length, max 32 bits - ml += br.getBits(mlB) - ll += br.getBits(llB) } + ml += br.getBits(mlB) + ll += br.getBits(llB) - // mo = s.adjustOffset(mo, ll, moB) - // Inlined for rather big speedup if moB > 1 { s.prevOffset[2] = s.prevOffset[1] s.prevOffset[1] = s.prevOffset[0] s.prevOffset[0] = mo return } - + // mo = s.adjustOffset(mo, ll, moB) + // Inlined for rather big speedup if ll == 0 { // There is an exception though, when current sequence's literals_length = 0. // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, @@ -312,11 +305,11 @@ func (s *sequenceDecs) nextFast(br *bitReader) (ll, mo, ml int) { return } -func (s *sequenceDecs) next(br *bitReader) (ll, mo, ml int) { +func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { // Final will not read from stream. - ll, llB := s.litLengths.state.final() - ml, mlB := s.matchLengths.state.final() - mo, moB := s.offsets.state.final() + ll, llB := llState.final() + ml, mlB := mlState.final() + mo, moB := ofState.final() // extra bits are stored in reverse order. br.fill() diff --git a/vendor/golang.org/x/sys/unix/affinity_linux.go b/vendor/golang.org/x/sys/unix/affinity_linux.go index 72afe3338c..14e4d5caa3 100644 --- a/vendor/golang.org/x/sys/unix/affinity_linux.go +++ b/vendor/golang.org/x/sys/unix/affinity_linux.go @@ -91,9 +91,13 @@ func onesCount64(x uint64) int { const m0 = 0x5555555555555555 // 01010101 ... const m1 = 0x3333333333333333 // 00110011 ... const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... - const m3 = 0x00ff00ff00ff00ff // etc. - const m4 = 0x0000ffff0000ffff + // Unused in this function, but definitions preserved for + // documentation purposes: + // + // const m3 = 0x00ff00ff00ff00ff // etc. + // const m4 = 0x0000ffff0000ffff + // // Implementation: Parallel summing of adjacent bits. // See "Hacker's Delight", Chap. 5: Counting Bits. // The following pattern shows the general approach: diff --git a/vendor/golang.org/x/sys/unix/dirent.go b/vendor/golang.org/x/sys/unix/dirent.go index 6f3460e69c..304016b688 100644 --- a/vendor/golang.org/x/sys/unix/dirent.go +++ b/vendor/golang.org/x/sys/unix/dirent.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris +// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris package unix diff --git a/vendor/golang.org/x/sys/unix/endian_little.go b/vendor/golang.org/x/sys/unix/endian_little.go index 085df2d8dd..bcdb5d30eb 100644 --- a/vendor/golang.org/x/sys/unix/endian_little.go +++ b/vendor/golang.org/x/sys/unix/endian_little.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // -// +build 386 amd64 amd64p32 arm arm64 ppc64le mipsle mips64le +// +build 386 amd64 amd64p32 arm arm64 ppc64le mipsle mips64le riscv64 package unix diff --git a/vendor/modules.txt b/vendor/modules.txt index 2b707a3b07..a02b1e408d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -6,7 +6,7 @@ github.com/VictoriaMetrics/metrics github.com/cespare/xxhash/v2 # github.com/golang/snappy v0.0.1 github.com/golang/snappy -# github.com/klauspost/compress v1.7.4 +# github.com/klauspost/compress v1.7.5 github.com/klauspost/compress/fse github.com/klauspost/compress/huff0 github.com/klauspost/compress/snappy @@ -24,5 +24,5 @@ github.com/valyala/gozstd github.com/valyala/histogram # github.com/valyala/quicktemplate v1.1.1 github.com/valyala/quicktemplate -# golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 +# golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa golang.org/x/sys/unix