vendor: make vendor-update

This commit is contained in:
Aliaksandr Valialkin 2019-08-05 10:33:21 +03:00
parent 8822079b77
commit 05713469c3
16 changed files with 226 additions and 155 deletions

4
go.mod
View file

@ -6,13 +6,13 @@ require (
github.com/cespare/xxhash/v2 v2.0.1-0.20190104013014-3767db7a7e18 github.com/cespare/xxhash/v2 v2.0.1-0.20190104013014-3767db7a7e18
github.com/golang/snappy v0.0.1 github.com/golang/snappy v0.0.1
github.com/google/go-cmp v0.3.0 // indirect github.com/google/go-cmp v0.3.0 // indirect
github.com/klauspost/compress v1.7.4 github.com/klauspost/compress v1.7.5
github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/valyala/fastjson v1.4.1 github.com/valyala/fastjson v1.4.1
github.com/valyala/gozstd v1.5.1 github.com/valyala/gozstd v1.5.1
github.com/valyala/histogram v1.0.1 github.com/valyala/histogram v1.0.1
github.com/valyala/quicktemplate v1.1.1 github.com/valyala/quicktemplate v1.1.1
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa
) )
go 1.12 go 1.12

8
go.sum
View file

@ -20,8 +20,8 @@ github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.7.4 h1:4UqAIzZ1Ns2epCTyJ1d2xMWvxtX+FNSCYWeOFogK9nc= github.com/klauspost/compress v1.7.5 h1:NMapGoDIKPKpk2hpcgAU6XHfsREHG2p8PIg7C3f/jpI=
github.com/klauspost/compress v1.7.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.7.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE= github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
@ -49,5 +49,5 @@ github.com/valyala/quicktemplate v1.1.1 h1:C58y/wN0FMTi2PR0n3onltemfFabany53j7M6
github.com/valyala/quicktemplate v1.1.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4= github.com/valyala/quicktemplate v1.1.1/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180911220305-26e67e76b6c3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 h1:LepdCS8Gf/MVejFIt8lsiexZATdoGVyp5bcyS+rYoUI= golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M=
golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View file

@ -1,4 +1,5 @@
Copyright (c) 2012 The Go Authors. All rights reserved. Copyright (c) 2012 The Go Authors. All rights reserved.
Copyright (c) 2019 Klaus Post. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are

View file

@ -247,9 +247,13 @@ func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
dstOut := s.Out dstOut := s.Out
dstEvery := (dstSize + 3) / 4 dstEvery := (dstSize + 3) / 4
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
single := s.dt.single[:tlSize]
decode := func(br *bitReader) byte { decode := func(br *bitReader) byte {
val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
v := s.dt.single[val] v := single[val&tlMask]
br.bitsRead += v.nBits br.bitsRead += v.nBits
return v.byte return v.byte
} }

View file

@ -34,7 +34,8 @@ For now, a high speed (fastest) and medium-fast (default) compressor has been im
The "Fastest" compression ratio is roughly equivalent to zstd level 1. The "Fastest" compression ratio is roughly equivalent to zstd level 1.
The "Default" compression ration is roughly equivalent to zstd level 3 (default). The "Default" compression ration is roughly equivalent to zstd level 3 (default).
In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast. In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode.
The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2. Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2.
@ -217,7 +218,8 @@ silesia.tar zstd 3 211947520 66793301 1377 146.79
As part of the development process a *Snappy* -> *Zstandard* converter was also built. As part of the development process a *Snappy* -> *Zstandard* converter was also built.
This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. Note that a single block is not framed. This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream.
Note that a single block is not framed.
Conversion is done by converting the stream directly from Snappy without intermediate full decoding. Conversion is done by converting the stream directly from Snappy without intermediate full decoding.
Therefore the compression ratio is much less than what can be done by a full decompression Therefore the compression ratio is much less than what can be done by a full decompression

View file

@ -155,14 +155,17 @@ func (h *literalsHeader) setSize(regenLen int) {
} }
// setSizes will set the size of a compressed literals section and the input length. // setSizes will set the size of a compressed literals section and the input length.
func (h *literalsHeader) setSizes(compLen, inLen int) { func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen)) compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
// Only retain 2 bits // Only retain 2 bits
const mask = 3 const mask = 3
lh := uint64(*h & mask) lh := uint64(*h & mask)
switch { switch {
case compBits <= 10 && inBits <= 10: case compBits <= 10 && inBits <= 10:
lh |= (1 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) if !single {
lh |= 1 << 2
}
lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if debug { if debug {
const mmask = (1 << 24) - 1 const mmask = (1 << 24) - 1
n := (lh >> 4) & mmask n := (lh >> 4) & mmask
@ -175,8 +178,14 @@ func (h *literalsHeader) setSizes(compLen, inLen int) {
} }
case compBits <= 14 && inBits <= 14: case compBits <= 14 && inBits <= 14:
lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60) lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
if single {
panic("single stream used with more than 10 bits length.")
}
case compBits <= 18 && inBits <= 18: case compBits <= 18 && inBits <= 18:
lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60) lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
if single {
panic("single stream used with more than 10 bits length.")
}
default: default:
panic("internal error: block too big") panic("internal error: block too big")
} }
@ -307,12 +316,30 @@ func (b *blockEnc) encodeLits() error {
return nil return nil
} }
// TODO: Switch to 1X when less than x bytes. var (
out, reUsed, err := huff0.Compress4X(b.literals, b.litEnc) out []byte
// Bail out of compression is too little. reUsed, single bool
if len(out) > (len(b.literals) - len(b.literals)>>4) { err error
)
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
// Bail out of compression is too little.
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
// Bail out of compression is too little.
err = huff0.ErrIncompressible
}
} else {
err = huff0.ErrIncompressible err = huff0.ErrIncompressible
} }
switch err { switch err {
case huff0.ErrIncompressible: case huff0.ErrIncompressible:
if debug { if debug {
@ -351,7 +378,7 @@ func (b *blockEnc) encodeLits() error {
lh.setType(literalsBlockCompressed) lh.setType(literalsBlockCompressed)
} }
// Set sizes // Set sizes
lh.setSizes(len(out), len(b.literals)) lh.setSizes(len(out), len(b.literals), single)
bh.setSize(uint32(len(out) + lh.size() + 1)) bh.setSize(uint32(len(out) + lh.size() + 1))
// Write block headers. // Write block headers.
@ -381,16 +408,23 @@ func (b *blockEnc) encode() error {
b.output = bh.appendTo(b.output) b.output = bh.appendTo(b.output)
var ( var (
out []byte out []byte
reUsed bool reUsed, single bool
err error err error
) )
if len(b.literals) > 32 { if len(b.literals) >= 1024 {
// TODO: Switch to 1X on small blocks. // Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 { if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible err = huff0.ErrIncompressible
} }
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else { } else {
err = huff0.ErrIncompressible err = huff0.ErrIncompressible
} }
@ -435,7 +469,7 @@ func (b *blockEnc) encode() error {
} }
} }
} }
lh.setSizes(len(out), len(b.literals)) lh.setSizes(len(out), len(b.literals), single)
if debug { if debug {
printf("Compressed %d literals to %d bytes", len(b.literals), len(out)) printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
println("Adding literal header:", lh) println("Adding literal header:", lh)

View file

@ -281,17 +281,17 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
} }
d.decoders <- block d.decoders <- block
frame.rawInput = nil frame.rawInput = nil
frame.bBuf = nil
d.frames <- frame d.frames <- frame
}() }()
frame.bBuf = input
if cap(dst) == 0 { if cap(dst) == 0 {
// Allocate 1MB by default if nothing is provided. // Allocate 1MB by default if nothing is provided.
dst = make([]byte, 0, 1<<20) dst = make([]byte, 0, 1<<20)
} }
// Allocation here:
br := byteBuf(input)
for { for {
err := frame.reset(&br) err := frame.reset(&frame.bBuf)
if err == io.EOF { if err == io.EOF {
return dst, nil return dst, nil
} }
@ -313,7 +313,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
if err != nil { if err != nil {
return dst, err return dst, err
} }
if len(br) == 0 { if len(frame.bBuf) == 0 {
break break
} }
} }

View file

@ -82,16 +82,11 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
stepSize++ stepSize++
} }
// TEMPLATE
const kSearchStrength = 8 const kSearchStrength = 8
// nextEmit is where in src the next emitLiteral should start from. // nextEmit is where in src the next emitLiteral should start from.
nextEmit := s nextEmit := s
cv := load6432(src, s) cv := load6432(src, s)
// nextHash is the hash at s
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
// Relative offsets // Relative offsets
offset1 := int32(blk.recentOffsets[0]) offset1 := int32(blk.recentOffsets[0])
@ -119,8 +114,8 @@ encodeLoop:
panic("offset0 was 0") panic("offset0 was 0")
} }
nextHashS = nextHashS & dFastShortTableMask nextHashS := hash5(cv, dFastShortTableBits)
nextHashL = nextHashL & dFastLongTableMask nextHashL := hash8(cv, dFastLongTableBits)
candidateL := e.longTable[nextHashL] candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS] candidateS := e.table[nextHashS]
@ -172,8 +167,6 @@ encodeLoop:
break encodeLoop break encodeLoop
} }
cv = load6432(src, s) cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
continue continue
} }
const repOff2 = 1 const repOff2 = 1
@ -221,8 +214,6 @@ encodeLoop:
break encodeLoop break encodeLoop
} }
cv = load6432(src, s) cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
// Swap offsets // Swap offsets
offset1, offset2 = offset2, offset1 offset1, offset2 = offset2, offset1
continue continue
@ -296,8 +287,6 @@ encodeLoop:
break encodeLoop break encodeLoop
} }
cv = load6432(src, s) cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
} }
// A 4-byte match has been found. Update recent offsets. // A 4-byte match has been found. Update recent offsets.
@ -354,20 +343,18 @@ encodeLoop:
cv1 := load6432(src, index1) cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
e.longTable[hash8(cv0, dFastLongTableBits)&dFastLongTableMask] = te0 e.longTable[hash8(cv0, dFastLongTableBits)] = te0
e.longTable[hash8(cv1, dFastLongTableBits)&dFastLongTableMask] = te1 e.longTable[hash8(cv1, dFastLongTableBits)] = te1
cv0 >>= 8 cv0 >>= 8
cv1 >>= 8 cv1 >>= 8
te0.offset++ te0.offset++
te1.offset++ te1.offset++
te0.val = uint32(cv0) te0.val = uint32(cv0)
te1.val = uint32(cv1) te1.val = uint32(cv1)
e.table[hash5(cv0, dFastShortTableBits)&dFastShortTableMask] = te0 e.table[hash5(cv0, dFastShortTableBits)] = te0
e.table[hash5(cv1, dFastShortTableBits)&dFastShortTableMask] = te1 e.table[hash5(cv1, dFastShortTableBits)] = te1
cv = load6432(src, s) cv = load6432(src, s)
nextHashS = hash5(cv1>>8, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
if !canRepeat { if !canRepeat {
continue continue
@ -381,14 +368,17 @@ encodeLoop:
break break
} }
// Store this, since we have it.
nextHashS := hash5(cv1>>8, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
// We have at least 4 byte match. // We have at least 4 byte match.
// No need to check backwards. We come straight from a match // No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src) l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
entry := tableEntry{offset: s + e.cur, val: uint32(cv)} entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.longTable[nextHashL&dFastLongTableMask] = entry e.longTable[nextHashL] = entry
e.table[nextHashS&dFastShortTableMask] = entry e.table[nextHashS] = entry
seq.matchLen = uint32(l) - zstdMinMatch seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0 seq.litLen = 0
@ -408,8 +398,6 @@ encodeLoop:
break encodeLoop break encodeLoop
} }
cv = load6432(src, s) cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
} }
} }

View file

@ -124,8 +124,6 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// nextEmit is where in src the next emitLiteral should start from. // nextEmit is where in src the next emitLiteral should start from.
nextEmit := s nextEmit := s
cv := load6432(src, s) cv := load6432(src, s)
// nextHash is the hash at s
nextHash := hash6(cv, hashLog)
// Relative offsets // Relative offsets
offset1 := int32(blk.recentOffsets[0]) offset1 := int32(blk.recentOffsets[0])
@ -157,8 +155,8 @@ encodeLoop:
panic("offset0 was 0") panic("offset0 was 0")
} }
nextHash2 := hash6(cv>>8, hashLog) & tableMask nextHash := hash6(cv, hashLog)
nextHash = nextHash & tableMask nextHash2 := hash6(cv>>8, hashLog)
candidate := e.table[nextHash] candidate := e.table[nextHash]
candidate2 := e.table[nextHash2] candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2 repIndex := s - offset1 + 2
@ -207,8 +205,6 @@ encodeLoop:
break encodeLoop break encodeLoop
} }
cv = load6432(src, s) cv = load6432(src, s)
//nextHash = hashLen(cv, hashLog, mls)
nextHash = hash6(cv, hashLog)
continue continue
} }
coffset0 := s - (candidate.offset - e.cur) coffset0 := s - (candidate.offset - e.cur)
@ -245,7 +241,6 @@ encodeLoop:
break encodeLoop break encodeLoop
} }
cv = load6432(src, s) cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
} }
// A 4-byte match has been found. We'll later see if more than 4 bytes. // A 4-byte match has been found. We'll later see if more than 4 bytes.
offset2 = offset1 offset2 = offset1
@ -292,15 +287,16 @@ encodeLoop:
break encodeLoop break encodeLoop
} }
cv = load6432(src, s) cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
// Check offset 2 // Check offset 2
if o2 := s - offset2; canRepeat && o2 > 0 && load3232(src, o2) == uint32(cv) { if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match. // We have at least 4 byte match.
// No need to check backwards. We come straight from a match // No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src) l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it. // Store this, since we have it.
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: uint32(cv)} nextHash := hash6(cv, hashLog)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0 seq.litLen = 0
// Since litlen is always 0, this is offset 1. // Since litlen is always 0, this is offset 1.
@ -319,7 +315,6 @@ encodeLoop:
} }
// Prepare next loop. // Prepare next loop.
cv = load6432(src, s) cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
} }
} }

View file

@ -39,6 +39,9 @@ type frameDec struct {
rawInput byteBuffer rawInput byteBuffer
// Byte buffer that can be reused for small input blocks.
bBuf byteBuf
// asyncRunning indicates whether the async routine processes input on 'decoding'. // asyncRunning indicates whether the async routine processes input on 'decoding'.
asyncRunning bool asyncRunning bool
asyncRunningMu sync.Mutex asyncRunningMu sync.Mutex

View file

@ -184,29 +184,75 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
// decSymbol contains information about a state entry, // decSymbol contains information about a state entry,
// Including the state offset base, the output symbol and // Including the state offset base, the output symbol and
// the number of bits to read for the low part of the destination state. // the number of bits to read for the low part of the destination state.
type decSymbol struct { // Using a composite uint64 is faster than a struct with separate members.
newState uint16 type decSymbol uint64
addBits uint8 // Used for symbols until transformed.
nbBits uint8 func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol {
baseline uint32 return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d decSymbol) nbBits() uint8 {
return uint8(d)
}
func (d decSymbol) addBits() uint8 {
return uint8(d >> 8)
}
func (d decSymbol) newState() uint16 {
return uint16(d >> 16)
}
func (d decSymbol) baseline() uint32 {
return uint32(d >> 32)
}
func (d decSymbol) baselineInt() int {
return int(d >> 32)
}
func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d *decSymbol) setNBits(nBits uint8) {
const mask = 0xffffffffffffff00
*d = (*d & mask) | decSymbol(nBits)
}
func (d *decSymbol) setAddBits(addBits uint8) {
const mask = 0xffffffffffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8)
}
func (d *decSymbol) setNewState(state uint16) {
const mask = 0xffffffff0000ffff
*d = (*d & mask) | decSymbol(state)<<16
}
func (d *decSymbol) setBaseline(baseline uint32) {
const mask = 0xffffffff
*d = (*d & mask) | decSymbol(baseline)<<32
}
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
const mask = 0xffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
} }
// decSymbolValue returns the transformed decSymbol for the given symbol. // decSymbolValue returns the transformed decSymbol for the given symbol.
func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) { func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) {
if int(symb) >= len(t) { if int(symb) >= len(t) {
return decSymbol{}, fmt.Errorf("rle symbol %d >= max %d", symb, len(t)) return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
} }
lu := t[symb] lu := t[symb]
return decSymbol{ return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil
addBits: lu.addBits,
baseline: lu.baseLine,
}, nil
} }
// setRLE will set the decoder til RLE mode. // setRLE will set the decoder til RLE mode.
func (s *fseDecoder) setRLE(symbol decSymbol) { func (s *fseDecoder) setRLE(symbol decSymbol) {
s.actualTableLog = 0 s.actualTableLog = 0
s.maxBits = symbol.addBits s.maxBits = symbol.addBits()
s.dt[0] = symbol s.dt[0] = symbol
} }
@ -220,7 +266,7 @@ func (s *fseDecoder) buildDtable() error {
{ {
for i, v := range s.norm[:s.symbolLen] { for i, v := range s.norm[:s.symbolLen] {
if v == -1 { if v == -1 {
s.dt[highThreshold].addBits = uint8(i) s.dt[highThreshold].setAddBits(uint8(i))
highThreshold-- highThreshold--
symbolNext[i] = 1 symbolNext[i] = 1
} else { } else {
@ -235,7 +281,7 @@ func (s *fseDecoder) buildDtable() error {
position := uint32(0) position := uint32(0)
for ss, v := range s.norm[:s.symbolLen] { for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ { for i := 0; i < int(v); i++ {
s.dt[position].addBits = uint8(ss) s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask position = (position + step) & tableMask
for position > highThreshold { for position > highThreshold {
// lowprob area // lowprob area
@ -253,11 +299,11 @@ func (s *fseDecoder) buildDtable() error {
{ {
tableSize := uint16(1 << s.actualTableLog) tableSize := uint16(1 << s.actualTableLog)
for u, v := range s.dt[:tableSize] { for u, v := range s.dt[:tableSize] {
symbol := v.addBits symbol := v.addBits()
nextState := symbolNext[symbol] nextState := symbolNext[symbol]
symbolNext[symbol] = nextState + 1 symbolNext[symbol] = nextState + 1
nBits := s.actualTableLog - byte(highBits(uint32(nextState))) nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
s.dt[u&maxTableMask].nbBits = nBits s.dt[u&maxTableMask].setNBits(nBits)
newState := (nextState << nBits) - tableSize newState := (nextState << nBits) - tableSize
if newState > tableSize { if newState > tableSize {
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
@ -266,7 +312,7 @@ func (s *fseDecoder) buildDtable() error {
// Seems weird that this is possible with nbits > 0. // Seems weird that this is possible with nbits > 0.
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
} }
s.dt[u&maxTableMask].newState = newState s.dt[u&maxTableMask].setNewState(newState)
} }
} }
return nil return nil
@ -279,25 +325,21 @@ func (s *fseDecoder) transform(t []baseOffset) error {
tableSize := uint16(1 << s.actualTableLog) tableSize := uint16(1 << s.actualTableLog)
s.maxBits = 0 s.maxBits = 0
for i, v := range s.dt[:tableSize] { for i, v := range s.dt[:tableSize] {
if int(v.addBits) >= len(t) { add := v.addBits()
return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits, len(t)) if int(add) >= len(t) {
return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t))
} }
lu := t[v.addBits] lu := t[add]
if lu.addBits > s.maxBits { if lu.addBits > s.maxBits {
s.maxBits = lu.addBits s.maxBits = lu.addBits
} }
s.dt[i&maxTableMask] = decSymbol{ v.setExt(lu.addBits, lu.baseLine)
newState: v.newState, s.dt[i] = v
nbBits: v.nbBits,
addBits: lu.addBits,
baseline: lu.baseLine,
}
} }
return nil return nil
} }
type fseState struct { type fseState struct {
// TODO: Check if *[1 << maxTablelog]decSymbol is faster.
dt []decSymbol dt []decSymbol
state decSymbol state decSymbol
} }
@ -312,26 +354,31 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
// next returns the current symbol and sets the next state. // next returns the current symbol and sets the next state.
// At least tablelog bits must be available in the bit reader. // At least tablelog bits must be available in the bit reader.
func (s *fseState) next(br *bitReader) { func (s *fseState) next(br *bitReader) {
lowBits := uint16(br.getBits(s.state.nbBits)) lowBits := uint16(br.getBits(s.state.nbBits()))
s.state = s.dt[s.state.newState+lowBits] s.state = s.dt[s.state.newState()+lowBits]
} }
// finished returns true if all bits have been read from the bitstream // finished returns true if all bits have been read from the bitstream
// and the next state would require reading bits from the input. // and the next state would require reading bits from the input.
func (s *fseState) finished(br *bitReader) bool { func (s *fseState) finished(br *bitReader) bool {
return br.finished() && s.state.nbBits > 0 return br.finished() && s.state.nbBits() > 0
} }
// final returns the current state symbol without decoding the next. // final returns the current state symbol without decoding the next.
func (s *fseState) final() (int, uint8) { func (s *fseState) final() (int, uint8) {
return int(s.state.baseline), s.state.addBits return s.state.baselineInt(), s.state.addBits()
}
// final returns the current state symbol without decoding the next.
func (s decSymbol) final() (int, uint8) {
return s.baselineInt(), s.addBits()
} }
// nextFast returns the next symbol and sets the next state. // nextFast returns the next symbol and sets the next state.
// This can only be used if no symbols are 0 bits. // This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader. // At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) { func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := uint16(br.getBitsFast(s.state.nbBits)) lowBits := uint16(br.getBitsFast(s.state.nbBits()))
s.state = s.dt[s.state.newState+lowBits] s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline, s.state.addBits return s.state.baseline(), s.state.addBits()
} }

View file

@ -89,6 +89,10 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []
// decode sequences from the stream with the provided history. // decode sequences from the stream with the provided history.
func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error { func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
startSize := len(s.out) startSize := len(s.out)
// Grab full sizes tables, to avoid bounds checks.
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
for i := seqs - 1; i >= 0; i-- { for i := seqs - 1; i >= 0; i-- {
if br.overread() { if br.overread() {
printf("reading sequence %d, exceeded available data\n", seqs-i) printf("reading sequence %d, exceeded available data\n", seqs-i)
@ -96,10 +100,10 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
} }
var litLen, matchOff, matchLen int var litLen, matchOff, matchLen int
if br.off > 4+((maxOffsetBits+16+16)>>3) { if br.off > 4+((maxOffsetBits+16+16)>>3) {
litLen, matchOff, matchLen = s.nextFast(br) litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState)
br.fillFast() br.fillFast()
} else { } else {
litLen, matchOff, matchLen = s.next(br) litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState)
br.fill() br.fill()
} }
@ -175,30 +179,25 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
// This is the last sequence, so we shouldn't update state. // This is the last sequence, so we shouldn't update state.
break break
} }
if true {
// Manually inlined, ~ 5-20% faster
// Update all 3 states at once. Approx 20% faster.
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
nBits := a.nbBits + b.nbBits + c.nbBits // Manually inlined, ~ 5-20% faster
if nBits == 0 { // Update all 3 states at once. Approx 20% faster.
s.litLengths.state.state = s.litLengths.state.dt[a.newState] nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState] if nBits == 0 {
s.offsets.state.state = s.offsets.state.dt[c.newState] llState = llTable[llState.newState()&maxTableMask]
} else { mlState = mlTable[mlState.newState()&maxTableMask]
bits := br.getBitsFast(nBits) ofState = ofTable[ofState.newState()&maxTableMask]
lowBits := uint16(bits >> ((c.nbBits + b.nbBits) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState+lowBits]
lowBits = uint16(bits >> (c.nbBits & 31))
lowBits &= bitMask[b.nbBits&15]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState+lowBits]
lowBits = uint16(bits) & bitMask[c.nbBits&15]
s.offsets.state.state = s.offsets.state.dt[c.newState+lowBits]
}
} else { } else {
s.updateAlt(br) bits := br.getBitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
} }
} }
@ -230,55 +229,49 @@ func (s *sequenceDecs) updateAlt(br *bitReader) {
// Update all 3 states at once. Approx 20% faster. // Update all 3 states at once. Approx 20% faster.
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
nBits := a.nbBits + b.nbBits + c.nbBits nBits := a.nbBits() + b.nbBits() + c.nbBits()
if nBits == 0 { if nBits == 0 {
s.litLengths.state.state = s.litLengths.state.dt[a.newState] s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState] s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
s.offsets.state.state = s.offsets.state.dt[c.newState] s.offsets.state.state = s.offsets.state.dt[c.newState()]
return return
} }
bits := br.getBitsFast(nBits) bits := br.getBitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits + b.nbBits) & 31)) lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState+lowBits] s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
lowBits = uint16(bits >> (c.nbBits & 31)) lowBits = uint16(bits >> (c.nbBits() & 31))
lowBits &= bitMask[b.nbBits&15] lowBits &= bitMask[b.nbBits()&15]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState+lowBits] s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
lowBits = uint16(bits) & bitMask[c.nbBits&15] lowBits = uint16(bits) & bitMask[c.nbBits()&15]
s.offsets.state.state = s.offsets.state.dt[c.newState+lowBits] s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
} }
// nextFast will return new states when there are at least 4 unused bytes left on the stream when done. // nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
func (s *sequenceDecs) nextFast(br *bitReader) (ll, mo, ml int) { func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream. // Final will not read from stream.
ll, llB := s.litLengths.state.final() ll, llB := llState.final()
ml, mlB := s.matchLengths.state.final() ml, mlB := mlState.final()
mo, moB := s.offsets.state.final() mo, moB := ofState.final()
// extra bits are stored in reverse order. // extra bits are stored in reverse order.
br.fillFast() br.fillFast()
if s.maxBits <= 32 { mo += br.getBits(moB)
mo += br.getBits(moB) if s.maxBits > 32 {
ml += br.getBits(mlB)
ll += br.getBits(llB)
} else {
mo += br.getBits(moB)
br.fillFast() br.fillFast()
// matchlength+literal length, max 32 bits
ml += br.getBits(mlB)
ll += br.getBits(llB)
} }
ml += br.getBits(mlB)
ll += br.getBits(llB)
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if moB > 1 { if moB > 1 {
s.prevOffset[2] = s.prevOffset[1] s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0] s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo s.prevOffset[0] = mo
return return
} }
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 { if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0. // There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
@ -312,11 +305,11 @@ func (s *sequenceDecs) nextFast(br *bitReader) (ll, mo, ml int) {
return return
} }
func (s *sequenceDecs) next(br *bitReader) (ll, mo, ml int) { func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream. // Final will not read from stream.
ll, llB := s.litLengths.state.final() ll, llB := llState.final()
ml, mlB := s.matchLengths.state.final() ml, mlB := mlState.final()
mo, moB := s.offsets.state.final() mo, moB := ofState.final()
// extra bits are stored in reverse order. // extra bits are stored in reverse order.
br.fill() br.fill()

View file

@ -91,9 +91,13 @@ func onesCount64(x uint64) int {
const m0 = 0x5555555555555555 // 01010101 ... const m0 = 0x5555555555555555 // 01010101 ...
const m1 = 0x3333333333333333 // 00110011 ... const m1 = 0x3333333333333333 // 00110011 ...
const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
const m3 = 0x00ff00ff00ff00ff // etc.
const m4 = 0x0000ffff0000ffff
// Unused in this function, but definitions preserved for
// documentation purposes:
//
// const m3 = 0x00ff00ff00ff00ff // etc.
// const m4 = 0x0000ffff0000ffff
//
// Implementation: Parallel summing of adjacent bits. // Implementation: Parallel summing of adjacent bits.
// See "Hacker's Delight", Chap. 5: Counting Bits. // See "Hacker's Delight", Chap. 5: Counting Bits.
// The following pattern shows the general approach: // The following pattern shows the general approach:

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris // +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
package unix package unix

View file

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// //
// +build 386 amd64 amd64p32 arm arm64 ppc64le mipsle mips64le // +build 386 amd64 amd64p32 arm arm64 ppc64le mipsle mips64le riscv64
package unix package unix

4
vendor/modules.txt vendored
View file

@ -6,7 +6,7 @@ github.com/VictoriaMetrics/metrics
github.com/cespare/xxhash/v2 github.com/cespare/xxhash/v2
# github.com/golang/snappy v0.0.1 # github.com/golang/snappy v0.0.1
github.com/golang/snappy github.com/golang/snappy
# github.com/klauspost/compress v1.7.4 # github.com/klauspost/compress v1.7.5
github.com/klauspost/compress/fse github.com/klauspost/compress/fse
github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0
github.com/klauspost/compress/snappy github.com/klauspost/compress/snappy
@ -24,5 +24,5 @@ github.com/valyala/gozstd
github.com/valyala/histogram github.com/valyala/histogram
# github.com/valyala/quicktemplate v1.1.1 # github.com/valyala/quicktemplate v1.1.1
github.com/valyala/quicktemplate github.com/valyala/quicktemplate
# golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7 # golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa
golang.org/x/sys/unix golang.org/x/sys/unix