lib/encoding: optimizing UnmarshalVarUint64 and UnmarshalVarInt64 a bit

This commit is contained in:
Aliaksandr Valialkin 2024-05-12 16:32:11 +02:00
parent 51de9f30fc
commit e66465cb03
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
2 changed files with 192 additions and 21 deletions

View file

@ -85,9 +85,22 @@ func UnmarshalInt64(src []byte) int64 {
// MarshalVarInt64 appends marshalsed v to dst and returns the result.
func MarshalVarInt64(dst []byte, v int64) []byte {
var tmp [1]int64
tmp[0] = v
return MarshalVarInt64s(dst, tmp[:])
u := uint64((v << 1) ^ (v >> 63))
if v < (1<<6) && v > (-1<<6) {
return append(dst, byte(u))
}
if u < (1 << (2 * 7)) {
return append(dst, byte(u|0x80), byte(u>>7))
}
if u < (1 << (3 * 7)) {
return append(dst, byte(u|0x80), byte((u>>7)|0x80), byte(u>>(2*7)))
}
// Slow path for big integers
var tmp [1]uint64
tmp[0] = u
return MarshalVarUint64s(dst, tmp[:])
}
// MarshalVarInt64s appends marshaled vs to dst and returns the result.
@ -154,16 +167,19 @@ func marshalVarInt64sSlow(dst []byte, vs []int64) []byte {
return dst
}
// UnmarshalVarInt64 returns unmarshaled int64 from src and returns
// the remaining tail from src.
// UnmarshalVarInt64 returns unmarshaled int64 from src and returns the remaining tail from src.
func UnmarshalVarInt64(src []byte) ([]byte, int64, error) {
var tmp [1]int64
tail, err := UnmarshalVarInt64s(tmp[:], src)
return tail, tmp[0], err
// TODO substitute binary.Uvarint with binary.Varint when benchmark results will show it is faster.
// It is slower on amd64/linux Go1.22.
u64, offset := binary.Uvarint(src)
if offset <= 0 {
return src, 0, fmt.Errorf("cannot unmarshal varint")
}
i64 := int64(int64(u64>>1) ^ (int64(u64<<63) >> 63))
return src[offset:], i64, nil
}
// UnmarshalVarInt64s unmarshals len(dst) int64 values from src to dst
// and returns the remaining tail from src.
// UnmarshalVarInt64s unmarshals len(dst) int64 values from src to dst and returns the remaining tail from src.
func UnmarshalVarInt64s(dst []int64, src []byte) ([]byte, error) {
if len(src) < len(dst) {
return src, fmt.Errorf("too small len(src)=%d; it must be bigger or equal to len(dst)=%d", len(src), len(dst))
@ -270,6 +286,17 @@ func unmarshalVarInt64sSlow(dst []int64, src []byte) ([]byte, error) {
// MarshalVarUint64 appends marshaled u to dst and returns the result.
func MarshalVarUint64(dst []byte, u uint64) []byte {
if u < (1 << 7) {
return append(dst, byte(u))
}
if u < (1 << (2 * 7)) {
return append(dst, byte(u|0x80), byte(u>>7))
}
if u < (1 << (3 * 7)) {
return append(dst, byte(u|0x80), byte((u>>7)|0x80), byte(u>>(2*7)))
}
// Slow path for big integers.
var tmp [1]uint64
tmp[0] = u
return MarshalVarUint64s(dst, tmp[:])
@ -336,16 +363,16 @@ func marshalVarUint64sSlow(dst []byte, us []uint64) []byte {
return dst
}
// UnmarshalVarUint64 returns unmarshaled uint64 from src and returns
// the remaining tail from src.
// UnmarshalVarUint64 returns unmarshaled uint64 from src and returns the remaining tail from src.
func UnmarshalVarUint64(src []byte) ([]byte, uint64, error) {
var tmp [1]uint64
tail, err := UnmarshalVarUint64s(tmp[:], src)
return tail, tmp[0], err
u64, offset := binary.Uvarint(src)
if offset <= 0 {
return src, 0, fmt.Errorf("cannot read varuint")
}
return src[offset:], u64, nil
}
// UnmarshalVarUint64s unmarshals len(dst) uint64 values from src to dst
// and returns the remaining tail from src.
// UnmarshalVarUint64s unmarshals len(dst) uint64 values from src to dst and returns the remaining tail from src.
func UnmarshalVarUint64s(dst []uint64, src []byte) ([]byte, error) {
if len(src) < len(dst) {
return src, fmt.Errorf("too small len(src)=%d; it must be bigger or equal to len(dst)=%d", len(src), len(dst))

View file

@ -79,7 +79,7 @@ func benchmarkMarshalVarUint64s(b *testing.B, maxValue uint64) {
var data []uint64
n := maxValue
for i := 0; i < numsCount; i++ {
if n <= 0 {
if n > maxValue {
n = maxValue
}
data = append(data, n)
@ -119,7 +119,7 @@ func benchmarkMarshalVarInt64s(b *testing.B, maxValue int64) {
var data []int64
n := maxValue
for i := 0; i < numsCount; i++ {
if n <= 0 {
if n < -maxValue {
n = maxValue
}
data = append(data, n)
@ -139,6 +139,52 @@ func benchmarkMarshalVarInt64s(b *testing.B, maxValue int64) {
})
}
func BenchmarkUnmarshalVarUint64(b *testing.B) {
b.Run("up-to-(1<<7)-1", func(b *testing.B) {
benchmarkUnmarshalVarUint64(b, (1<<7)-1)
})
b.Run("up-to-(1<<14)-1", func(b *testing.B) {
benchmarkUnmarshalVarUint64(b, (1<<14)-1)
})
b.Run("up-to-(1<<28)-1", func(b *testing.B) {
benchmarkUnmarshalVarUint64(b, (1<<28)-1)
})
b.Run("up-to-(1<<64)-1", func(b *testing.B) {
benchmarkUnmarshalVarUint64(b, (1<<64)-1)
})
}
func benchmarkUnmarshalVarUint64(b *testing.B, maxValue uint64) {
const numsCount = 8000
var data []byte
n := maxValue
for i := 0; i < numsCount; i++ {
if n > maxValue {
n = maxValue
}
data = MarshalVarUint64(data, n)
n--
}
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(numsCount)
b.RunParallel(func(pb *testing.PB) {
var sink uint64
for pb.Next() {
src := data
for len(src) > 0 {
tail, n, err := UnmarshalVarUint64(src)
if err != nil {
panic(fmt.Errorf("unexpected error: %w", err))
}
sink += n
src = tail
}
}
Sink.Add(sink)
})
}
func BenchmarkUnmarshalVarUint64s(b *testing.B) {
b.Run("up-to-(1<<7)-1", func(b *testing.B) {
benchmarkUnmarshalVarUint64s(b, (1<<7)-1)
@ -159,7 +205,7 @@ func benchmarkUnmarshalVarUint64s(b *testing.B, maxValue uint64) {
var data []byte
n := maxValue
for i := 0; i < numsCount; i++ {
if n <= 0 {
if n > maxValue {
n = maxValue
}
data = MarshalVarUint64(data, n)
@ -185,6 +231,52 @@ func benchmarkUnmarshalVarUint64s(b *testing.B, maxValue uint64) {
})
}
func BenchmarkUnmarshalVarInt64(b *testing.B) {
b.Run("up-to-(1<<6)-1", func(b *testing.B) {
benchmarkUnmarshalVarInt64(b, (1<<6)-1)
})
b.Run("up-to-(1<<13)-1", func(b *testing.B) {
benchmarkUnmarshalVarInt64(b, (1<<13)-1)
})
b.Run("up-to-(1<<27)-1", func(b *testing.B) {
benchmarkUnmarshalVarInt64(b, (1<<27)-1)
})
b.Run("up-to-(1<<63)-1", func(b *testing.B) {
benchmarkUnmarshalVarInt64(b, (1<<63)-1)
})
}
func benchmarkUnmarshalVarInt64(b *testing.B, maxValue int64) {
const numsCount = 8000
var data []byte
n := maxValue
for i := 0; i < numsCount; i++ {
if n < -maxValue {
n = maxValue
}
data = MarshalVarInt64(data, n)
n--
}
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(numsCount)
b.RunParallel(func(pb *testing.PB) {
var sink uint64
for pb.Next() {
src := data
for len(src) > 0 {
tail, n, err := UnmarshalVarInt64(src)
if err != nil {
panic(fmt.Errorf("unexpected error: %w", err))
}
sink += uint64(n)
src = tail
}
}
Sink.Add(sink)
})
}
func BenchmarkUnmarshalVarInt64s(b *testing.B) {
b.Run("up-to-(1<<6)-1", func(b *testing.B) {
benchmarkUnmarshalVarInt64s(b, (1<<6)-1)
@ -205,7 +297,7 @@ func benchmarkUnmarshalVarInt64s(b *testing.B, maxValue int64) {
var data []byte
n := maxValue
for i := 0; i < numsCount; i++ {
if n <= 0 {
if n < -maxValue {
n = maxValue
}
data = MarshalVarInt64(data, n)
@ -231,5 +323,57 @@ func benchmarkUnmarshalVarInt64s(b *testing.B, maxValue int64) {
})
}
func BenchmarkMarshalVarUint64(b *testing.B) {
b.Run("small-ints", func(b *testing.B) {
benchmarkMarshalVarUint64(b, []uint64{1, 2, 3, 4, 5, 67, 127})
})
b.Run("big-ints", func(b *testing.B) {
benchmarkMarshalVarUint64(b, []uint64{12355, 89832432, 8989843, 8989989, 883443, 9891233, 8232434342})
})
}
func benchmarkMarshalVarUint64(b *testing.B, a []uint64) {
b.ReportAllocs()
b.SetBytes(int64(len(a)))
b.RunParallel(func(pb *testing.PB) {
var buf []byte
var sink uint64
for pb.Next() {
buf = buf[:0]
for _, n := range a {
buf = MarshalVarUint64(buf, n)
}
sink += uint64(len(buf))
}
Sink.Add(sink)
})
}
func BenchmarkMarshalVarInt64(b *testing.B) {
b.Run("small-ints", func(b *testing.B) {
benchmarkMarshalVarInt64(b, []int64{1, 2, 3, -4, 5, -60, 63})
})
b.Run("big-ints", func(b *testing.B) {
benchmarkMarshalVarInt64(b, []int64{12355, -89832432, 8989843, -8989989, 883443, -9891233, 8232434342})
})
}
func benchmarkMarshalVarInt64(b *testing.B, a []int64) {
b.ReportAllocs()
b.SetBytes(int64(len(a)))
b.RunParallel(func(pb *testing.PB) {
var buf []byte
var sink uint64
for pb.Next() {
buf = buf[:0]
for _, n := range a {
buf = MarshalVarInt64(buf, n)
}
sink += uint64(len(buf))
}
Sink.Add(sink)
})
}
var testMarshaledInt64Data = MarshalInt64(nil, 1234567890)
var testMarshaledUint64Data = MarshalUint64(nil, 1234567890)