From 6b90570ed3d43edfa5b902dbb725e416a87abbb1 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 25 May 2021 14:13:36 +0300 Subject: [PATCH] lib/uint64set: store pointers to bucket16 instead of bucket16 objects in bucket32 This speeds up bucket32.addBucketAtPos() when bucket32.buckets contains big number of items, since the copying of bucket16 pointers is much faster than the copying of bucket16 objects. This is a cpu profile for copying bucket16 objects: 10ms 13.43s (flat, cum) 32.01% of Total 10ms 120ms 650: b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...) . . 651: b.b16his[pos] = hi . 13.31s 652: b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...) . . 653: b16 := &b.buckets[pos] . . 654: *b16 = bucket16{} . . 655: return b16 . . 656:} This is a cpu profile for copying pointers to bucket16: 10ms 1.14s (flat, cum) 2.19% of Total . 100ms 647: b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...) . . 648: b.b16his[pos] = hi 10ms 700ms 649: b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...) . 330ms 650: b16 := &bucket16{} . . 651: b.buckets[pos] = b16 . . 652: return b16 . . 653:} --- lib/uint64set/uint64set.go | 53 ++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/lib/uint64set/uint64set.go b/lib/uint64set/uint64set.go index 4096c0aac5..37c5417ac7 100644 --- a/lib/uint64set/uint64set.go +++ b/lib/uint64set/uint64set.go @@ -79,9 +79,7 @@ func (s *Set) SizeBytes() uint64 { } n := uint64(unsafe.Sizeof(*s)) for i := range s.buckets { - b32 := &s.buckets[i] - n += uint64(unsafe.Sizeof(b32)) - n += b32.sizeBytes() + n += s.buckets[i].sizeBytes() } return n } @@ -411,7 +409,7 @@ type bucket32 struct { b16his []uint16 // buckets are sorted by b16his - buckets []bucket16 + buckets []*bucket16 } func (b *bucket32) getLen() int { @@ -434,7 +432,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) { for j < len(a.b16his) { b16 := b.addBucket16(a.b16his[j]) if mayOwn { - *b16 = a.buckets[j] + *b16 = *a.buckets[j] } else { a.buckets[j].copyTo(b16) } @@ -445,7 +443,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) { for j < len(a.b16his) && a.b16his[j] < b.b16his[i] { b16 := b.addBucket16(a.b16his[j]) if mayOwn { - *b16 = a.buckets[j] + *b16 = *a.buckets[j] } else { a.buckets[j].copyTo(b16) } @@ -455,7 +453,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) { break } if b.b16his[i] == a.b16his[j] { - b.buckets[i].union(&a.buckets[j]) + b.buckets[i].union(a.buckets[j]) i++ j++ } @@ -481,7 +479,7 @@ func (b *bucket32) intersect(a *bucket32) { j := 0 for { for i < len(b.b16his) && j < len(a.b16his) && b.b16his[i] < a.b16his[j] { - b.buckets[i] = bucket16{} + *b.buckets[i] = bucket16{} i++ } if i >= len(b.b16his) { @@ -492,13 +490,13 @@ func (b *bucket32) intersect(a *bucket32) { } if j >= len(a.b16his) { for i < len(b.b16his) { - b.buckets[i] = bucket16{} + *b.buckets[i] = bucket16{} i++ } break } if b.b16his[i] == a.b16his[j] { - b.buckets[i].intersect(&a.buckets[j]) + b.buckets[i].intersect(a.buckets[j]) i++ j++ } @@ -506,16 +504,15 @@ func (b *bucket32) intersect(a *bucket32) { // Remove zero buckets b16his := b.b16his[:0] bs := b.buckets[:0] - for i := range b.buckets { - b32 := &b.buckets[i] - if b32.isZero() { + for i, b16 := range b.buckets { + if b16.isZero() { continue } b16his = append(b16his, b.b16his[i]) - bs = append(bs, *b32) + bs = append(bs, b16) } for i := len(bs); i < len(b.buckets); i++ { - b.buckets[i] = bucket16{} + b.buckets[i] = nil } b.hint = 0 b.b16his = b16his @@ -525,9 +522,9 @@ func (b *bucket32) intersect(a *bucket32) { func (b *bucket32) forEach(f func(part []uint64) bool) bool { xbuf := partBufPool.Get().(*[]uint64) buf := *xbuf - for i := range b.buckets { + for i, b16 := range b.buckets { hi16 := b.b16his[i] - buf = b.buckets[i].appendTo(buf[:0], b.hi, hi16) + buf = b16.appendTo(buf[:0], b.hi, hi16) if !f(buf) { return false } @@ -547,9 +544,7 @@ var partBufPool = &sync.Pool{ func (b *bucket32) sizeBytes() uint64 { n := uint64(unsafe.Sizeof(*b)) n += 2 * uint64(len(b.b16his)) - for i := range b.buckets { - b16 := &b.buckets[i] - n += uint64(unsafe.Sizeof(b16)) + for _, b16 := range b.buckets { n += b16.sizeBytes() } return n @@ -561,9 +556,11 @@ func (b *bucket32) copyTo(dst *bucket32) { // Do not reuse dst.buckets, since it may be used in other places. dst.buckets = nil if len(b.buckets) > 0 { - dst.buckets = make([]bucket16, len(b.buckets)) - for i := range b.buckets { - b.buckets[i].copyTo(&dst.buckets[i]) + dst.buckets = make([]*bucket16, len(b.buckets)) + for i, b16 := range b.buckets { + b16Dst := &bucket16{} + b16.copyTo(b16Dst) + dst.buckets[i] = b16Dst } } } @@ -617,7 +614,7 @@ func (b *bucket32) getOrCreateBucket16(hi uint16) *bucket16 { if n < 0 || n >= len(his) || his[n] != hi { return b.addBucketAtPos(hi, n) } - return &bs[n] + return bs[n] } func (b *bucket32) addSlow(hi, lo uint16) bool { @@ -635,8 +632,8 @@ func (b *bucket32) addSlow(hi, lo uint16) bool { func (b *bucket32) addBucket16(hi uint16) *bucket16 { b.b16his = append(b.b16his, hi) - b.buckets = append(b.buckets, bucket16{}) - return &b.buckets[len(b.buckets)-1] + b.buckets = append(b.buckets, &bucket16{}) + return b.buckets[len(b.buckets)-1] } func (b *bucket32) addBucketAtPos(hi uint16, pos int) *bucket16 { @@ -650,8 +647,8 @@ func (b *bucket32) addBucketAtPos(hi uint16, pos int) *bucket16 { b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...) b.b16his[pos] = hi b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...) - b16 := &b.buckets[pos] - *b16 = bucket16{} + b16 := &bucket16{} + b.buckets[pos] = b16 return b16 }