vendor: make vendor-update

This commit is contained in:
Aliaksandr Valialkin 2019-08-05 10:33:21 +03:00
parent 8822079b77
commit 05713469c3
16 changed files with 226 additions and 155 deletions

4
go.mod
View file

@ -6,13 +6,13 @@ require (
github.com/cespare/xxhash/v2 v2.0.1-0.20190104013014-3767db7a7e18
github.com/golang/snappy v0.0.1
github.com/google/go-cmp v0.3.0 // indirect
github.com/klauspost/compress v1.7.4
github.com/klauspost/compress v1.7.5
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/valyala/fastjson v1.4.1
github.com/valyala/gozstd v1.5.1
github.com/valyala/histogram v1.0.1
github.com/valyala/quicktemplate v1.1.1
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa
)
go 1.12

8
go.sum
View file

@ -20,8 +20,8 @@ github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.7.4 h1:4UqAIzZ1Ns2epCTyJ1d2xMWvxtX+FNSCYWeOFogK9nc=
github.com/klauspost/compress v1.7.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.7.5 h1:NMapGoDIKPKpk2hpcgAU6XHfsREHG2p8PIg7C3f/jpI=
github.com/klauspost/compress v1.7.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
@ -49,5 +49,5 @@ github.com/valyala/quicktemplate v1.1.1 h1:C58y/wN0FMTi2PR0n3onltemfFabany53j7M6
github.com/valyala/quicktemplate v1.1.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI=
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View file

@ -1,4 +1,5 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Copyright (c) 2019 Klaus Post. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are

View file

@ -247,9 +247,13 @@ func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
dstOut := s.Out
dstEvery := (dstSize + 3) / 4
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
single := s.dt.single[:tlSize]
decode := func(br *bitReader) byte {
val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
v := s.dt.single[val]
v := single[val&tlMask]
br.bitsRead += v.nBits
return v.byte
}

View file

@ -34,7 +34,8 @@ For now, a high speed (fastest) and medium-fast (default) compressor has been im
The "Fastest" compression ratio is roughly equivalent to zstd level 1.
The "Default" compression ration is roughly equivalent to zstd level 3 (default).
In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode.
The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2.
@ -217,7 +218,8 @@ silesia.tar zstd 3 211947520 66793301 1377 146.79
As part of the development process a *Snappy* -> *Zstandard* converter was also built.
This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. Note that a single block is not framed.
This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream.
Note that a single block is not framed.
Conversion is done by converting the stream directly from Snappy without intermediate full decoding.
Therefore the compression ratio is much less than what can be done by a full decompression

View file

@ -155,14 +155,17 @@ func (h *literalsHeader) setSize(regenLen int) {
}
// setSizes will set the size of a compressed literals section and the input length.
func (h *literalsHeader) setSizes(compLen, inLen int) {
func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
// Only retain 2 bits
const mask = 3
lh := uint64(*h & mask)
switch {
case compBits <= 10 && inBits <= 10:
lh |= (1 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if !single {
lh |= 1 << 2
}
lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if debug {
const mmask = (1 << 24) - 1
n := (lh >> 4) & mmask
@ -175,8 +178,14 @@ func (h *literalsHeader) setSizes(compLen, inLen int) {
}
case compBits <= 14 && inBits <= 14:
lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
if single {
panic("single stream used with more than 10 bits length.")
}
case compBits <= 18 && inBits <= 18:
lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
if single {
panic("single stream used with more than 10 bits length.")
}
default:
panic("internal error: block too big")
}
@ -307,12 +316,30 @@ func (b *blockEnc) encodeLits() error {
return nil
}
// TODO: Switch to 1X when less than x bytes.
out, reUsed, err := huff0.Compress4X(b.literals, b.litEnc)
// Bail out of compression is too little.
if len(out) > (len(b.literals) - len(b.literals)>>4) {
var (
out []byte
reUsed, single bool
err error
)
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
// Bail out of compression is too little.
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
// Bail out of compression is too little.
err = huff0.ErrIncompressible
}
} else {
err = huff0.ErrIncompressible
}
switch err {
case huff0.ErrIncompressible:
if debug {
@ -351,7 +378,7 @@ func (b *blockEnc) encodeLits() error {
lh.setType(literalsBlockCompressed)
}
// Set sizes
lh.setSizes(len(out), len(b.literals))
lh.setSizes(len(out), len(b.literals), single)
bh.setSize(uint32(len(out) + lh.size() + 1))
// Write block headers.
@ -381,16 +408,23 @@ func (b *blockEnc) encode() error {
b.output = bh.appendTo(b.output)
var (
out []byte
reUsed bool
err error
out []byte
reUsed, single bool
err error
)
if len(b.literals) > 32 {
// TODO: Switch to 1X on small blocks.
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else {
err = huff0.ErrIncompressible
}
@ -435,7 +469,7 @@ func (b *blockEnc) encode() error {
}
}
}
lh.setSizes(len(out), len(b.literals))
lh.setSizes(len(out), len(b.literals), single)
if debug {
printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
println("Adding literal header:", lh)

View file

@ -281,17 +281,17 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
d.decoders <- block
frame.rawInput = nil
frame.bBuf = nil
d.frames <- frame
}()
frame.bBuf = input
if cap(dst) == 0 {
// Allocate 1MB by default if nothing is provided.
dst = make([]byte, 0, 1<<20)
}
// Allocation here:
br := byteBuf(input)
for {
err := frame.reset(&br)
err := frame.reset(&frame.bBuf)
if err == io.EOF {
return dst, nil
}
@ -313,7 +313,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
if err != nil {
return dst, err
}
if len(br) == 0 {
if len(frame.bBuf) == 0 {
break
}
}

View file

@ -82,16 +82,11 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
stepSize++
}
// TEMPLATE
const kSearchStrength = 8
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// nextHash is the hash at s
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
@ -119,8 +114,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS = nextHashS & dFastShortTableMask
nextHashL = nextHashL & dFastLongTableMask
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -172,8 +167,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
continue
}
const repOff2 = 1
@ -221,8 +214,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
// Swap offsets
offset1, offset2 = offset2, offset1
continue
@ -296,8 +287,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
}
// A 4-byte match has been found. Update recent offsets.
@ -354,20 +343,18 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
e.longTable[hash8(cv0, dFastLongTableBits)&dFastLongTableMask] = te0
e.longTable[hash8(cv1, dFastLongTableBits)&dFastLongTableMask] = te1
e.longTable[hash8(cv0, dFastLongTableBits)] = te0
e.longTable[hash8(cv1, dFastLongTableBits)] = te1
cv0 >>= 8
cv1 >>= 8
te0.offset++
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
e.table[hash5(cv0, dFastShortTableBits)&dFastShortTableMask] = te0
e.table[hash5(cv1, dFastShortTableBits)&dFastShortTableMask] = te1
e.table[hash5(cv0, dFastShortTableBits)] = te0
e.table[hash5(cv1, dFastShortTableBits)] = te1
cv = load6432(src, s)
nextHashS = hash5(cv1>>8, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
if !canRepeat {
continue
@ -381,14 +368,17 @@ encodeLoop:
break
}
// Store this, since we have it.
nextHashS := hash5(cv1>>8, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.longTable[nextHashL&dFastLongTableMask] = entry
e.table[nextHashS&dFastShortTableMask] = entry
e.longTable[nextHashL] = entry
e.table[nextHashS] = entry
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
@ -408,8 +398,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
}
}

View file

@ -124,8 +124,6 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// nextHash is the hash at s
nextHash := hash6(cv, hashLog)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
@ -157,8 +155,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHash2 := hash6(cv>>8, hashLog) & tableMask
nextHash = nextHash & tableMask
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
@ -207,8 +205,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
//nextHash = hashLen(cv, hashLog, mls)
nextHash = hash6(cv, hashLog)
continue
}
coffset0 := s - (candidate.offset - e.cur)
@ -245,7 +241,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
}
// A 4-byte match has been found. We'll later see if more than 4 bytes.
offset2 = offset1
@ -292,15 +287,16 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
// Check offset 2
if o2 := s - offset2; canRepeat && o2 > 0 && load3232(src, o2) == uint32(cv) {
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: uint32(cv)}
nextHash := hash6(cv, hashLog)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
@ -319,7 +315,6 @@ encodeLoop:
}
// Prepare next loop.
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
}
}

View file

@ -39,6 +39,9 @@ type frameDec struct {
rawInput byteBuffer
// Byte buffer that can be reused for small input blocks.
bBuf byteBuf
// asyncRunning indicates whether the async routine processes input on 'decoding'.
asyncRunning bool
asyncRunningMu sync.Mutex

View file

@ -184,29 +184,75 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
// decSymbol contains information about a state entry,
// Including the state offset base, the output symbol and
// the number of bits to read for the low part of the destination state.
type decSymbol struct {
newState uint16
addBits uint8 // Used for symbols until transformed.
nbBits uint8
baseline uint32
// Using a composite uint64 is faster than a struct with separate members.
type decSymbol uint64
func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol {
return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d decSymbol) nbBits() uint8 {
return uint8(d)
}
func (d decSymbol) addBits() uint8 {
return uint8(d >> 8)
}
func (d decSymbol) newState() uint16 {
return uint16(d >> 16)
}
func (d decSymbol) baseline() uint32 {
return uint32(d >> 32)
}
func (d decSymbol) baselineInt() int {
return int(d >> 32)
}
func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d *decSymbol) setNBits(nBits uint8) {
const mask = 0xffffffffffffff00
*d = (*d & mask) | decSymbol(nBits)
}
func (d *decSymbol) setAddBits(addBits uint8) {
const mask = 0xffffffffffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8)
}
func (d *decSymbol) setNewState(state uint16) {
const mask = 0xffffffff0000ffff
*d = (*d & mask) | decSymbol(state)<<16
}
func (d *decSymbol) setBaseline(baseline uint32) {
const mask = 0xffffffff
*d = (*d & mask) | decSymbol(baseline)<<32
}
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
const mask = 0xffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
}
// decSymbolValue returns the transformed decSymbol for the given symbol.
func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) {
if int(symb) >= len(t) {
return decSymbol{}, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
}
lu := t[symb]
return decSymbol{
addBits: lu.addBits,
baseline: lu.baseLine,
}, nil
return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil
}
// setRLE will set the decoder til RLE mode.
func (s *fseDecoder) setRLE(symbol decSymbol) {
s.actualTableLog = 0
s.maxBits = symbol.addBits
s.maxBits = symbol.addBits()
s.dt[0] = symbol
}
@ -220,7 +266,7 @@ func (s *fseDecoder) buildDtable() error {
{
for i, v := range s.norm[:s.symbolLen] {
if v == -1 {
s.dt[highThreshold].addBits = uint8(i)
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
symbolNext[i] = 1
} else {
@ -235,7 +281,7 @@ func (s *fseDecoder) buildDtable() error {
position := uint32(0)
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].addBits = uint8(ss)
s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask
for position > highThreshold {
// lowprob area
@ -253,11 +299,11 @@ func (s *fseDecoder) buildDtable() error {
{
tableSize := uint16(1 << s.actualTableLog)
for u, v := range s.dt[:tableSize] {
symbol := v.addBits
symbol := v.addBits()
nextState := symbolNext[symbol]
symbolNext[symbol] = nextState + 1
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
s.dt[u&maxTableMask].nbBits = nBits
s.dt[u&maxTableMask].setNBits(nBits)
newState := (nextState << nBits) - tableSize
if newState > tableSize {
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
@ -266,7 +312,7 @@ func (s *fseDecoder) buildDtable() error {
// Seems weird that this is possible with nbits > 0.
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
}
s.dt[u&maxTableMask].newState = newState
s.dt[u&maxTableMask].setNewState(newState)
}
}
return nil
@ -279,25 +325,21 @@ func (s *fseDecoder) transform(t []baseOffset) error {
tableSize := uint16(1 << s.actualTableLog)
s.maxBits = 0
for i, v := range s.dt[:tableSize] {
if int(v.addBits) >= len(t) {
return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits, len(t))
add := v.addBits()
if int(add) >= len(t) {
return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t))
}
lu := t[v.addBits]
lu := t[add]
if lu.addBits > s.maxBits {
s.maxBits = lu.addBits
}
s.dt[i&maxTableMask] = decSymbol{
newState: v.newState,
nbBits: v.nbBits,
addBits: lu.addBits,
baseline: lu.baseLine,
}
v.setExt(lu.addBits, lu.baseLine)
s.dt[i] = v
}
return nil
}
type fseState struct {
// TODO: Check if *[1 << maxTablelog]decSymbol is faster.
dt []decSymbol
state decSymbol
}
@ -312,26 +354,31 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
// next returns the current symbol and sets the next state.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) next(br *bitReader) {
lowBits := uint16(br.getBits(s.state.nbBits))
s.state = s.dt[s.state.newState+lowBits]
lowBits := uint16(br.getBits(s.state.nbBits()))
s.state = s.dt[s.state.newState()+lowBits]
}
// finished returns true if all bits have been read from the bitstream
// and the next state would require reading bits from the input.
func (s *fseState) finished(br *bitReader) bool {
return br.finished() && s.state.nbBits > 0
return br.finished() && s.state.nbBits() > 0
}
// final returns the current state symbol without decoding the next.
func (s *fseState) final() (int, uint8) {
return int(s.state.baseline), s.state.addBits
return s.state.baselineInt(), s.state.addBits()
}
// final returns the current state symbol without decoding the next.
func (s decSymbol) final() (int, uint8) {
return s.baselineInt(), s.addBits()
}
// nextFast returns the next symbol and sets the next state.
// This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := uint16(br.getBitsFast(s.state.nbBits))
s.state = s.dt[s.state.newState+lowBits]
return s.state.baseline, s.state.addBits
lowBits := uint16(br.getBitsFast(s.state.nbBits()))
s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline(), s.state.addBits()
}

View file

@ -89,6 +89,10 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []
// decode sequences from the stream with the provided history.
func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
startSize := len(s.out)
// Grab full sizes tables, to avoid bounds checks.
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
for i := seqs - 1; i >= 0; i-- {
if br.overread() {
printf("reading sequence %d, exceeded available data\n", seqs-i)
@ -96,10 +100,10 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
}
var litLen, matchOff, matchLen int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
litLen, matchOff, matchLen = s.nextFast(br)
litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState)
br.fillFast()
} else {
litLen, matchOff, matchLen = s.next(br)
litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState)
br.fill()
}
@ -175,30 +179,25 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
// This is the last sequence, so we shouldn't update state.
break
}
if true {
// Manually inlined, ~ 5-20% faster
// Update all 3 states at once. Approx 20% faster.
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
nBits := a.nbBits + b.nbBits + c.nbBits
if nBits == 0 {
s.litLengths.state.state = s.litLengths.state.dt[a.newState]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState]
s.offsets.state.state = s.offsets.state.dt[c.newState]
} else {
bits := br.getBitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits + b.nbBits) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState+lowBits]
lowBits = uint16(bits >> (c.nbBits & 31))
lowBits &= bitMask[b.nbBits&15]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState+lowBits]
lowBits = uint16(bits) & bitMask[c.nbBits&15]
s.offsets.state.state = s.offsets.state.dt[c.newState+lowBits]
}
// Manually inlined, ~ 5-20% faster
// Update all 3 states at once. Approx 20% faster.
nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
if nBits == 0 {
llState = llTable[llState.newState()&maxTableMask]
mlState = mlTable[mlState.newState()&maxTableMask]
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
s.updateAlt(br)
bits := br.getBitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
}
}
@ -230,55 +229,49 @@ func (s *sequenceDecs) updateAlt(br *bitReader) {
// Update all 3 states at once. Approx 20% faster.
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
nBits := a.nbBits + b.nbBits + c.nbBits
nBits := a.nbBits() + b.nbBits() + c.nbBits()
if nBits == 0 {
s.litLengths.state.state = s.litLengths.state.dt[a.newState]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState]
s.offsets.state.state = s.offsets.state.dt[c.newState]
s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
s.offsets.state.state = s.offsets.state.dt[c.newState()]
return
}
bits := br.getBitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits + b.nbBits) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState+lowBits]
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
lowBits = uint16(bits >> (c.nbBits & 31))
lowBits &= bitMask[b.nbBits&15]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState+lowBits]
lowBits = uint16(bits >> (c.nbBits() & 31))
lowBits &= bitMask[b.nbBits()&15]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
lowBits = uint16(bits) & bitMask[c.nbBits&15]
s.offsets.state.state = s.offsets.state.dt[c.newState+lowBits]
lowBits = uint16(bits) & bitMask[c.nbBits()&15]
s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
}
// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
func (s *sequenceDecs) nextFast(br *bitReader) (ll, mo, ml int) {
func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := s.litLengths.state.final()
ml, mlB := s.matchLengths.state.final()
mo, moB := s.offsets.state.final()
ll, llB := llState.final()
ml, mlB := mlState.final()
mo, moB := ofState.final()
// extra bits are stored in reverse order.
br.fillFast()
if s.maxBits <= 32 {
mo += br.getBits(moB)
ml += br.getBits(mlB)
ll += br.getBits(llB)
} else {
mo += br.getBits(moB)
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fillFast()
// matchlength+literal length, max 32 bits
ml += br.getBits(mlB)
ll += br.getBits(llB)
}
ml += br.getBits(mlB)
ll += br.getBits(llB)
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if moB > 1 {
s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo
return
}
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
@ -312,11 +305,11 @@ func (s *sequenceDecs) nextFast(br *bitReader) (ll, mo, ml int) {
return
}
func (s *sequenceDecs) next(br *bitReader) (ll, mo, ml int) {
func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := s.litLengths.state.final()
ml, mlB := s.matchLengths.state.final()
mo, moB := s.offsets.state.final()
ll, llB := llState.final()
ml, mlB := mlState.final()
mo, moB := ofState.final()
// extra bits are stored in reverse order.
br.fill()

View file

@ -91,9 +91,13 @@ func onesCount64(x uint64) int {
const m0 = 0x5555555555555555 // 01010101 ...
const m1 = 0x3333333333333333 // 00110011 ...
const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
const m3 = 0x00ff00ff00ff00ff // etc.
const m4 = 0x0000ffff0000ffff
// Unused in this function, but definitions preserved for
// documentation purposes:
//
// const m3 = 0x00ff00ff00ff00ff // etc.
// const m4 = 0x0000ffff0000ffff
//
// Implementation: Parallel summing of adjacent bits.
// See "Hacker's Delight", Chap. 5: Counting Bits.
// The following pattern shows the general approach:

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris
// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
package unix

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// +build 386 amd64 amd64p32 arm arm64 ppc64le mipsle mips64le
// +build 386 amd64 amd64p32 arm arm64 ppc64le mipsle mips64le riscv64
package unix

4
vendor/modules.txt vendored
View file

@ -6,7 +6,7 @@ github.com/VictoriaMetrics/metrics
github.com/cespare/xxhash/v2
# github.com/golang/snappy v0.0.1
github.com/golang/snappy
# github.com/klauspost/compress v1.7.4
# github.com/klauspost/compress v1.7.5
github.com/klauspost/compress/fse
github.com/klauspost/compress/huff0
github.com/klauspost/compress/snappy
@ -24,5 +24,5 @@ github.com/valyala/gozstd
github.com/valyala/histogram
# github.com/valyala/quicktemplate v1.1.1
github.com/valyala/quicktemplate
# golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7
# golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa
golang.org/x/sys/unix