make encoding.MarshalVarInt64s faster (#5721)

* make encoding.MarshalVarInt64s faster

* add fast path for MarshalVarInt64s

* make UnmarshalVarUint64s faster

* remove comment
This commit is contained in:
Fuchun Zhang 2024-02-01 11:33:59 +08:00 committed by Aliaksandr Valialkin
parent c91614b626
commit 49e3665d6d
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB

View file

@ -3,6 +3,7 @@ package encoding
import (
"encoding/binary"
"fmt"
"math/bits"
"sync"
)
@ -91,21 +92,35 @@ func MarshalVarInt64(dst []byte, v int64) []byte {
// MarshalVarInt64s appends marshaled vs to dst and returns the result.
func MarshalVarInt64s(dst []byte, vs []int64) []byte {
for _, v := range vs {
if v < 0x40 && v > -0x40 {
// Fast path
c := int8(v)
v := (c << 1) ^ (c >> 7) // zig-zag encoding without branching.
dst = append(dst, byte(v))
n := uint64((v << 1) ^ (v >> 63))
if n < (1 << 7) {
dst = append(dst, byte(n))
continue
}
v = (v << 1) ^ (v >> 63) // zig-zag encoding without branching.
u := uint64(v)
for u > 0x7f {
dst = append(dst, 0x80|byte(u))
u >>= 7
switch (64 - bits.LeadingZeros64(n>>1)) / 7 {
case 0:
dst = append(dst, byte(n))
case 1:
dst = append(dst, byte(n|0x80), byte(n>>7))
case 2:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte(n>>14))
case 3:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte(n>>21))
case 4:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28))
case 5:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35))
case 6:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35|0x80), byte(n>>42))
case 7:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35|0x80), byte(n>>42|0x80), byte(n>>49))
case 8:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35|0x80), byte(n>>42|0x80), byte(n>>49|0x80), byte(n>>56))
case 9:
fallthrough
default:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35|0x80), byte(n>>42|0x80), byte(n>>49|0x80), byte(n>>56|0x80), byte(n>>63))
}
dst = append(dst, byte(u))
}
return dst
}
@ -134,30 +149,57 @@ func UnmarshalVarInt64s(dst []int64, src []byte) ([]byte, error) {
dst[i] = int64(v)
continue
}
// Slow path
u := uint64(c & 0x7f)
startIdx := idx - 1
shift := uint8(0)
for c >= 0x80 {
if idx >= uint(len(src)) {
return nil, fmt.Errorf("unexpected end of encoded varint at byte %d; src=%x", idx-startIdx, src[startIdx:])
}
if idx-startIdx > 9 {
return src[idx:], fmt.Errorf("too long encoded varint; the maximum allowed length is 10 bytes; got %d bytes; src=%x",
(idx-startIdx)+1, src[startIdx:])
}
c = src[idx]
if idx < uint(len(src)) && src[idx] < 0x80 {
// Fast path, for 2 bytes
n := uint64(c&0x7f) | (uint64(src[idx]&0x7f) << 7)
dst[i] = int64(n>>1) ^ (int64(n<<63) >> 63)
idx++
shift += 7
u |= uint64(c&0x7f) << shift
continue
}
v := int64(u>>1) ^ (int64(u<<63) >> 63) // zig-zag decoding without branching.
dst[i] = v
j := idx + 1
for ; j < uint(len(src)); j++ { // find end loc
if src[j] < 0x80 {
break
}
}
if j-idx > 10 {
return nil, fmt.Errorf("cannot unmarshal varint, buffer too long, len=%d", j-idx)
}
dst[i] = unmarshalVarInt64ForOne(src[idx-1 : j+1])
idx = j + 1
}
return src[idx:], nil
}
func unmarshalVarInt64ForOne(buf []byte) int64 {
var n uint64
switch len(buf) {
case 1:
n = uint64(buf[0] & 0x7F)
case 2:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7)
case 3:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14)
case 4:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21)
case 5:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28)
case 6:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35)
case 7:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35) | (uint64(buf[6]&0x7F) << 42)
case 8:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35) | (uint64(buf[6]&0x7F) << 42) | (uint64(buf[7]&0x7F) << 49)
case 9:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35) | (uint64(buf[6]&0x7F) << 42) | (uint64(buf[7]&0x7F) << 49) | (uint64(buf[8]&0x7F) << 56)
case 10:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35) | (uint64(buf[6]&0x7F) << 42) | (uint64(buf[7]&0x7F) << 49) | (uint64(buf[8]&0x7F) << 56) | (uint64(buf[9]&0x7F) << 63)
default:
panic("impossible error: buf length must be 1 to 10")
}
return int64(n>>1) ^ (int64(n<<63) >> 63)
}
// MarshalVarUint64 appends marshaled u to dst and returns the result.
func MarshalVarUint64(dst []byte, u uint64) []byte {
var tmp [1]uint64