lib/uint64set: store pointers to bucket16 instead of bucket16 objects in bucket32

This speeds up bucket32.addBucketAtPos() when bucket32.buckets contains big number of items,
since the copying of bucket16 pointers is much faster than the copying of bucket16 objects.

This is a cpu profile for copying bucket16 objects:

      10ms     13.43s (flat, cum) 32.01% of Total
      10ms      120ms    650:	b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...)
         .          .    651:	b.b16his[pos] = hi
         .     13.31s    652:	b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...)
         .          .    653:	b16 := &b.buckets[pos]
         .          .    654:	*b16 = bucket16{}
         .          .    655:	return b16
         .          .    656:}

This is a cpu profile for copying pointers to bucket16:

      10ms      1.14s (flat, cum)  2.19% of Total
         .      100ms    647:	b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...)
         .          .    648:	b.b16his[pos] = hi
      10ms      700ms    649:	b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...)
         .      330ms    650:	b16 := &bucket16{}
         .          .    651:	b.buckets[pos] = b16
         .          .    652:	return b16
         .          .    653:}
This commit is contained in:
Aliaksandr Valialkin 2021-05-25 14:13:36 +03:00
parent fd264477bf
commit 2233d6ed8a

View file

@ -79,9 +79,7 @@ func (s *Set) SizeBytes() uint64 {
} }
n := uint64(unsafe.Sizeof(*s)) n := uint64(unsafe.Sizeof(*s))
for i := range s.buckets { for i := range s.buckets {
b32 := &s.buckets[i] n += s.buckets[i].sizeBytes()
n += uint64(unsafe.Sizeof(b32))
n += b32.sizeBytes()
} }
return n return n
} }
@ -411,7 +409,7 @@ type bucket32 struct {
b16his []uint16 b16his []uint16
// buckets are sorted by b16his // buckets are sorted by b16his
buckets []bucket16 buckets []*bucket16
} }
func (b *bucket32) getLen() int { func (b *bucket32) getLen() int {
@ -434,7 +432,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) {
for j < len(a.b16his) { for j < len(a.b16his) {
b16 := b.addBucket16(a.b16his[j]) b16 := b.addBucket16(a.b16his[j])
if mayOwn { if mayOwn {
*b16 = a.buckets[j] *b16 = *a.buckets[j]
} else { } else {
a.buckets[j].copyTo(b16) a.buckets[j].copyTo(b16)
} }
@ -445,7 +443,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) {
for j < len(a.b16his) && a.b16his[j] < b.b16his[i] { for j < len(a.b16his) && a.b16his[j] < b.b16his[i] {
b16 := b.addBucket16(a.b16his[j]) b16 := b.addBucket16(a.b16his[j])
if mayOwn { if mayOwn {
*b16 = a.buckets[j] *b16 = *a.buckets[j]
} else { } else {
a.buckets[j].copyTo(b16) a.buckets[j].copyTo(b16)
} }
@ -455,7 +453,7 @@ func (b *bucket32) union(a *bucket32, mayOwn bool) {
break break
} }
if b.b16his[i] == a.b16his[j] { if b.b16his[i] == a.b16his[j] {
b.buckets[i].union(&a.buckets[j]) b.buckets[i].union(a.buckets[j])
i++ i++
j++ j++
} }
@ -481,7 +479,7 @@ func (b *bucket32) intersect(a *bucket32) {
j := 0 j := 0
for { for {
for i < len(b.b16his) && j < len(a.b16his) && b.b16his[i] < a.b16his[j] { for i < len(b.b16his) && j < len(a.b16his) && b.b16his[i] < a.b16his[j] {
b.buckets[i] = bucket16{} *b.buckets[i] = bucket16{}
i++ i++
} }
if i >= len(b.b16his) { if i >= len(b.b16his) {
@ -492,13 +490,13 @@ func (b *bucket32) intersect(a *bucket32) {
} }
if j >= len(a.b16his) { if j >= len(a.b16his) {
for i < len(b.b16his) { for i < len(b.b16his) {
b.buckets[i] = bucket16{} *b.buckets[i] = bucket16{}
i++ i++
} }
break break
} }
if b.b16his[i] == a.b16his[j] { if b.b16his[i] == a.b16his[j] {
b.buckets[i].intersect(&a.buckets[j]) b.buckets[i].intersect(a.buckets[j])
i++ i++
j++ j++
} }
@ -506,16 +504,15 @@ func (b *bucket32) intersect(a *bucket32) {
// Remove zero buckets // Remove zero buckets
b16his := b.b16his[:0] b16his := b.b16his[:0]
bs := b.buckets[:0] bs := b.buckets[:0]
for i := range b.buckets { for i, b16 := range b.buckets {
b32 := &b.buckets[i] if b16.isZero() {
if b32.isZero() {
continue continue
} }
b16his = append(b16his, b.b16his[i]) b16his = append(b16his, b.b16his[i])
bs = append(bs, *b32) bs = append(bs, b16)
} }
for i := len(bs); i < len(b.buckets); i++ { for i := len(bs); i < len(b.buckets); i++ {
b.buckets[i] = bucket16{} b.buckets[i] = nil
} }
b.hint = 0 b.hint = 0
b.b16his = b16his b.b16his = b16his
@ -525,9 +522,9 @@ func (b *bucket32) intersect(a *bucket32) {
func (b *bucket32) forEach(f func(part []uint64) bool) bool { func (b *bucket32) forEach(f func(part []uint64) bool) bool {
xbuf := partBufPool.Get().(*[]uint64) xbuf := partBufPool.Get().(*[]uint64)
buf := *xbuf buf := *xbuf
for i := range b.buckets { for i, b16 := range b.buckets {
hi16 := b.b16his[i] hi16 := b.b16his[i]
buf = b.buckets[i].appendTo(buf[:0], b.hi, hi16) buf = b16.appendTo(buf[:0], b.hi, hi16)
if !f(buf) { if !f(buf) {
return false return false
} }
@ -547,9 +544,7 @@ var partBufPool = &sync.Pool{
func (b *bucket32) sizeBytes() uint64 { func (b *bucket32) sizeBytes() uint64 {
n := uint64(unsafe.Sizeof(*b)) n := uint64(unsafe.Sizeof(*b))
n += 2 * uint64(len(b.b16his)) n += 2 * uint64(len(b.b16his))
for i := range b.buckets { for _, b16 := range b.buckets {
b16 := &b.buckets[i]
n += uint64(unsafe.Sizeof(b16))
n += b16.sizeBytes() n += b16.sizeBytes()
} }
return n return n
@ -561,9 +556,11 @@ func (b *bucket32) copyTo(dst *bucket32) {
// Do not reuse dst.buckets, since it may be used in other places. // Do not reuse dst.buckets, since it may be used in other places.
dst.buckets = nil dst.buckets = nil
if len(b.buckets) > 0 { if len(b.buckets) > 0 {
dst.buckets = make([]bucket16, len(b.buckets)) dst.buckets = make([]*bucket16, len(b.buckets))
for i := range b.buckets { for i, b16 := range b.buckets {
b.buckets[i].copyTo(&dst.buckets[i]) b16Dst := &bucket16{}
b16.copyTo(b16Dst)
dst.buckets[i] = b16Dst
} }
} }
} }
@ -617,7 +614,7 @@ func (b *bucket32) getOrCreateBucket16(hi uint16) *bucket16 {
if n < 0 || n >= len(his) || his[n] != hi { if n < 0 || n >= len(his) || his[n] != hi {
return b.addBucketAtPos(hi, n) return b.addBucketAtPos(hi, n)
} }
return &bs[n] return bs[n]
} }
func (b *bucket32) addSlow(hi, lo uint16) bool { func (b *bucket32) addSlow(hi, lo uint16) bool {
@ -635,8 +632,8 @@ func (b *bucket32) addSlow(hi, lo uint16) bool {
func (b *bucket32) addBucket16(hi uint16) *bucket16 { func (b *bucket32) addBucket16(hi uint16) *bucket16 {
b.b16his = append(b.b16his, hi) b.b16his = append(b.b16his, hi)
b.buckets = append(b.buckets, bucket16{}) b.buckets = append(b.buckets, &bucket16{})
return &b.buckets[len(b.buckets)-1] return b.buckets[len(b.buckets)-1]
} }
func (b *bucket32) addBucketAtPos(hi uint16, pos int) *bucket16 { func (b *bucket32) addBucketAtPos(hi uint16, pos int) *bucket16 {
@ -650,8 +647,8 @@ func (b *bucket32) addBucketAtPos(hi uint16, pos int) *bucket16 {
b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...) b.b16his = append(b.b16his[:pos+1], b.b16his[pos:]...)
b.b16his[pos] = hi b.b16his[pos] = hi
b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...) b.buckets = append(b.buckets[:pos+1], b.buckets[pos:]...)
b16 := &b.buckets[pos] b16 := &bucket16{}
*b16 = bucket16{} b.buckets[pos] = b16
return b16 return b16
} }