From 6b90570ed3d43edfa5b902dbb725e416a87abbb1 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@gmail.com>
Date: Tue, 25 May 2021 14:13:36 +0300
Subject: [PATCH] lib/uint64set: store pointers to bucket16 instead of bucket16
 objects in bucket32

This speeds up bucket32.addBucketAtPos() when bucket32.buckets contains big number of items,
since the copying of bucket16 pointers is much faster than the copying of bucket16 objects.

This is a cpu profile for copying bucket16 objects:

      10ms     13.43s (flat, cum) 32.01% of Total
      10ms      120ms    650:	b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...)
         .          .    651:	b.b16his[pos] = hi
         .     13.31s    652:	b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...)
         .          .    653:	b16 := &b.buckets[pos]
         .          .    654:	*b16 = bucket16{}
         .          .    655:	return b16
         .          .    656:}

This is a cpu profile for copying pointers to bucket16:

      10ms      1.14s (flat, cum)  2.19% of Total
         .      100ms    647:	b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...)
         .          .    648:	b.b16his[pos] = hi
      10ms      700ms    649:	b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...)
         .      330ms    650:	b16 := &bucket16{}
         .          .    651:	b.buckets[pos] = b16
         .          .    652:	return b16
         .          .    653:}
---
 lib/uint64set/uint64set.go | 53 ++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/lib/uint64set/uint64set.go b/lib/uint64set/uint64set.go
index 4096c0aac5..37c5417ac7 100644
--- a/lib/uint64set/uint64set.go
+++ b/lib/uint64set/uint64set.go
@@ -79,9 +79,7 @@ func (s *Set) SizeBytes() uint64 {
 	}
 	n := uint64(unsafe.Sizeof(*s))
 	for i := range s.buckets {
-		b32 := &s.buckets[i]
-		n += uint64(unsafe.Sizeof(b32))
-		n += b32.sizeBytes()
+		n += s.buckets[i].sizeBytes()
 	}
 	return n
 }
@@ -411,7 +409,7 @@ type bucket32 struct {
 	b16his []uint16
 
 	// buckets are sorted by b16his
-	buckets []bucket16
+	buckets []*bucket16
 }
 
 func (b *bucket32) getLen() int {
@@ -434,7 +432,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) {
 			for j < len(a.b16his) {
 				b16 := b.addBucket16(a.b16his[j])
 				if mayOwn {
-					*b16 = a.buckets[j]
+					*b16 = *a.buckets[j]
 				} else {
 					a.buckets[j].copyTo(b16)
 				}
@@ -445,7 +443,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) {
 		for j < len(a.b16his) && a.b16his[j] < b.b16his[i] {
 			b16 := b.addBucket16(a.b16his[j])
 			if mayOwn {
-				*b16 = a.buckets[j]
+				*b16 = *a.buckets[j]
 			} else {
 				a.buckets[j].copyTo(b16)
 			}
@@ -455,7 +453,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) {
 			break
 		}
 		if b.b16his[i] == a.b16his[j] {
-			b.buckets[i].union(&a.buckets[j])
+			b.buckets[i].union(a.buckets[j])
 			i++
 			j++
 		}
@@ -481,7 +479,7 @@ func (b *bucket32) intersect(a *bucket32) {
 	j := 0
 	for {
 		for i < len(b.b16his) && j < len(a.b16his) && b.b16his[i] < a.b16his[j] {
-			b.buckets[i] = bucket16{}
+			*b.buckets[i] = bucket16{}
 			i++
 		}
 		if i >= len(b.b16his) {
@@ -492,13 +490,13 @@ func (b *bucket32) intersect(a *bucket32) {
 		}
 		if j >= len(a.b16his) {
 			for i < len(b.b16his) {
-				b.buckets[i] = bucket16{}
+				*b.buckets[i] = bucket16{}
 				i++
 			}
 			break
 		}
 		if b.b16his[i] == a.b16his[j] {
-			b.buckets[i].intersect(&a.buckets[j])
+			b.buckets[i].intersect(a.buckets[j])
 			i++
 			j++
 		}
@@ -506,16 +504,15 @@ func (b *bucket32) intersect(a *bucket32) {
 	// Remove zero buckets
 	b16his := b.b16his[:0]
 	bs := b.buckets[:0]
-	for i := range b.buckets {
-		b32 := &b.buckets[i]
-		if b32.isZero() {
+	for i, b16 := range b.buckets {
+		if b16.isZero() {
 			continue
 		}
 		b16his = append(b16his, b.b16his[i])
-		bs = append(bs, *b32)
+		bs = append(bs, b16)
 	}
 	for i := len(bs); i < len(b.buckets); i++ {
-		b.buckets[i] = bucket16{}
+		b.buckets[i] = nil
 	}
 	b.hint = 0
 	b.b16his = b16his
@@ -525,9 +522,9 @@ func (b *bucket32) intersect(a *bucket32) {
 func (b *bucket32) forEach(f func(part []uint64) bool) bool {
 	xbuf := partBufPool.Get().(*[]uint64)
 	buf := *xbuf
-	for i := range b.buckets {
+	for i, b16 := range b.buckets {
 		hi16 := b.b16his[i]
-		buf = b.buckets[i].appendTo(buf[:0], b.hi, hi16)
+		buf = b16.appendTo(buf[:0], b.hi, hi16)
 		if !f(buf) {
 			return false
 		}
@@ -547,9 +544,7 @@ var partBufPool = &sync.Pool{
 func (b *bucket32) sizeBytes() uint64 {
 	n := uint64(unsafe.Sizeof(*b))
 	n += 2 * uint64(len(b.b16his))
-	for i := range b.buckets {
-		b16 := &b.buckets[i]
-		n += uint64(unsafe.Sizeof(b16))
+	for _, b16 := range b.buckets {
 		n += b16.sizeBytes()
 	}
 	return n
@@ -561,9 +556,11 @@ func (b *bucket32) copyTo(dst *bucket32) {
 	// Do not reuse dst.buckets, since it may be used in other places.
 	dst.buckets = nil
 	if len(b.buckets) > 0 {
-		dst.buckets = make([]bucket16, len(b.buckets))
-		for i := range b.buckets {
-			b.buckets[i].copyTo(&dst.buckets[i])
+		dst.buckets = make([]*bucket16, len(b.buckets))
+		for i, b16 := range b.buckets {
+			b16Dst := &bucket16{}
+			b16.copyTo(b16Dst)
+			dst.buckets[i] = b16Dst
 		}
 	}
 }
@@ -617,7 +614,7 @@ func (b *bucket32) getOrCreateBucket16(hi uint16) *bucket16 {
 	if n < 0 || n >= len(his) || his[n] != hi {
 		return b.addBucketAtPos(hi, n)
 	}
-	return &bs[n]
+	return bs[n]
 }
 
 func (b *bucket32) addSlow(hi, lo uint16) bool {
@@ -635,8 +632,8 @@ func (b *bucket32) addSlow(hi, lo uint16) bool {
 
 func (b *bucket32) addBucket16(hi uint16) *bucket16 {
 	b.b16his = append(b.b16his, hi)
-	b.buckets = append(b.buckets, bucket16{})
-	return &b.buckets[len(b.buckets)-1]
+	b.buckets = append(b.buckets, &bucket16{})
+	return b.buckets[len(b.buckets)-1]
 }
 
 func (b *bucket32) addBucketAtPos(hi uint16, pos int) *bucket16 {
@@ -650,8 +647,8 @@ func (b *bucket32) addBucketAtPos(hi uint16, pos int) *bucket16 {
 	b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...)
 	b.b16his[pos] = hi
 	b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...)
-	b16 := &b.buckets[pos]
-	*b16 = bucket16{}
+	b16 := &bucket16{}
+	b.buckets[pos] = b16
 	return b16
 }