package uint64set import ( "math/bits" "sort" ) // Set is a fast set for uint64. // // It should work faster than map[uint64]struct{} for semi-sparse uint64 values // such as MetricIDs generated by lib/storage. // // It is unsafe calling Set methods from concurrent goroutines. type Set struct { itemsCount int buckets bucket32Sorter } type bucket32Sorter []*bucket32 func (s *bucket32Sorter) Len() int { return len(*s) } func (s *bucket32Sorter) Less(i, j int) bool { a := *s return a[i].hi < a[j].hi } func (s *bucket32Sorter) Swap(i, j int) { a := *s a[i], a[j] = a[j], a[i] } // Clone returns an independent copy of s. func (s *Set) Clone() *Set { if s == nil { // Return an empty set, so data could be added into it later. return &Set{} } var dst Set dst.itemsCount = s.itemsCount dst.buckets = make([]*bucket32, len(s.buckets)) for i, b32 := range s.buckets { dst.buckets[i] = b32.clone() } return &dst } // Len returns the number of distinct uint64 values in s. func (s *Set) Len() int { if s == nil { return 0 } return s.itemsCount } // Add adds x to s. func (s *Set) Add(x uint64) { hi := uint32(x >> 32) lo := uint32(x) for _, b32 := range s.buckets { if b32.hi == hi { if b32.add(lo) { s.itemsCount++ } return } } s.addAlloc(hi, lo) } func (s *Set) addAlloc(hi, lo uint32) { var b32 bucket32 b32.hi = hi _ = b32.add(lo) s.itemsCount++ s.buckets = append(s.buckets, &b32) } // Has verifies whether x exists in s. func (s *Set) Has(x uint64) bool { hi := uint32(x >> 32) lo := uint32(x) if s == nil { return false } for _, b32 := range s.buckets { if b32.hi == hi { return b32.has(lo) } } return false } // Del deletes x from s. func (s *Set) Del(x uint64) { hi := uint32(x >> 32) lo := uint32(x) for _, b32 := range s.buckets { if b32.hi == hi { if b32.del(lo) { s.itemsCount-- } return } } } // AppendTo appends all the items from the set to dst and returns the result. // // The returned items are sorted. func (s *Set) AppendTo(dst []uint64) []uint64 { if s == nil { return dst } // pre-allocate memory for dst dstLen := len(dst) if n := s.Len() - cap(dst) + dstLen; n > 0 { dst = append(dst[:cap(dst)], make([]uint64, n)...) dst = dst[:dstLen] } // sort s.buckets if it isn't sorted yet if !sort.IsSorted(&s.buckets) { sort.Sort(&s.buckets) } for _, b32 := range s.buckets { dst = b32.appendTo(dst) } return dst } // Union adds all the items from a to s. func (s *Set) Union(a *Set) { // TODO: optimize it for _, x := range a.AppendTo(nil) { s.Add(x) } } type bucket32 struct { hi uint32 b16his []uint16 buckets []*bucket16 } func (b *bucket32) clone() *bucket32 { var dst bucket32 dst.hi = b.hi dst.b16his = append(dst.b16his[:0], b.b16his...) dst.buckets = make([]*bucket16, len(b.buckets)) for i, b16 := range b.buckets { dst.buckets[i] = b16.clone() } return &dst } // This is for sort.Interface func (b *bucket32) Len() int { return len(b.b16his) } func (b *bucket32) Less(i, j int) bool { return b.b16his[i] < b.b16his[j] } func (b *bucket32) Swap(i, j int) { his := b.b16his buckets := b.buckets his[i], his[j] = his[j], his[i] buckets[i], buckets[j] = buckets[j], buckets[i] } const maxUnsortedBuckets = 32 func (b *bucket32) add(x uint32) bool { hi := uint16(x >> 16) lo := uint16(x) if len(b.buckets) > maxUnsortedBuckets { return b.addSlow(hi, lo) } for i, hi16 := range b.b16his { if hi16 == hi { return i < len(b.buckets) && b.buckets[i].add(lo) } } b.addAllocSmall(hi, lo) return true } func (b *bucket32) addAllocSmall(hi, lo uint16) { var b16 bucket16 _ = b16.add(lo) b.b16his = append(b.b16his, hi) b.buckets = append(b.buckets, &b16) if len(b.buckets) > maxUnsortedBuckets { sort.Sort(b) } } func (b *bucket32) addSlow(hi, lo uint16) bool { n := binarySearch16(b.b16his, hi) if n < 0 || n >= len(b.b16his) || b.b16his[n] != hi { b.addAllocBig(hi, lo, n) return true } return n < len(b.buckets) && b.buckets[n].add(lo) } func (b *bucket32) addAllocBig(hi, lo uint16, n int) { if n < 0 { return } var b16 bucket16 _ = b16.add(lo) if n >= len(b.b16his) { b.b16his = append(b.b16his, hi) b.buckets = append(b.buckets, &b16) return } b.b16his = append(b.b16his[:n+1], b.b16his[n:]...) b.b16his[n] = hi b.buckets = append(b.buckets[:n+1], b.buckets[n:]...) b.buckets[n] = &b16 } func (b *bucket32) has(x uint32) bool { hi := uint16(x >> 16) lo := uint16(x) if len(b.buckets) > maxUnsortedBuckets { return b.hasSlow(hi, lo) } for i, hi16 := range b.b16his { if hi16 == hi { return i < len(b.buckets) && b.buckets[i].has(lo) } } return false } func (b *bucket32) hasSlow(hi, lo uint16) bool { n := binarySearch16(b.b16his, hi) if n < 0 || n >= len(b.b16his) || b.b16his[n] != hi { return false } return n < len(b.buckets) && b.buckets[n].has(lo) } func (b *bucket32) del(x uint32) bool { hi := uint16(x >> 16) lo := uint16(x) if len(b.buckets) > maxUnsortedBuckets { return b.delSlow(hi, lo) } for i, hi16 := range b.b16his { if hi16 == hi { return i < len(b.buckets) && b.buckets[i].del(lo) } } return false } func (b *bucket32) delSlow(hi, lo uint16) bool { n := binarySearch16(b.b16his, hi) if n < 0 || n >= len(b.b16his) || b.b16his[n] != hi { return false } return n < len(b.buckets) && b.buckets[n].del(lo) } func (b *bucket32) appendTo(dst []uint64) []uint64 { if len(b.buckets) <= maxUnsortedBuckets && !sort.IsSorted(b) { sort.Sort(b) } for i, b16 := range b.buckets { hi16 := b.b16his[i] dst = b16.appendTo(dst, b.hi, hi16) } return dst } const ( bitsPerBucket = 1 << 16 wordsPerBucket = bitsPerBucket / 64 ) type bucket16 struct { bits [wordsPerBucket]uint64 } func (b *bucket16) clone() *bucket16 { var dst bucket16 copy(dst.bits[:], b.bits[:]) return &dst } func (b *bucket16) add(x uint16) bool { wordNum, bitMask := getWordNumBitMask(x) word := &b.bits[wordNum] ok := *word&bitMask == 0 *word |= bitMask return ok } func (b *bucket16) has(x uint16) bool { wordNum, bitMask := getWordNumBitMask(x) return b.bits[wordNum]&bitMask != 0 } func (b *bucket16) del(x uint16) bool { wordNum, bitMask := getWordNumBitMask(x) word := &b.bits[wordNum] ok := *word&bitMask != 0 *word &^= bitMask return ok } func (b *bucket16) appendTo(dst []uint64, hi uint32, hi16 uint16) []uint64 { hi64 := uint64(hi)<<32 | uint64(hi16)<<16 var wordNum uint64 for _, word := range b.bits { if word == 0 { wordNum++ continue } x64 := hi64 | (wordNum * 64) for { tzn := uint64(bits.TrailingZeros64(word)) if tzn >= 64 { break } word &^= uint64(1) << tzn x := x64 | tzn dst = append(dst, x) } wordNum++ } return dst } func getWordNumBitMask(x uint16) (uint16, uint64) { wordNum := x / 64 bitMask := uint64(1) << (x & 63) return wordNum, bitMask } func binarySearch16(u16 []uint16, x uint16) int { // The code has been adapted from sort.Search. n := len(u16) i, j := 0, n for i < j { h := int(uint(i+j) >> 1) if h >= 0 && h < len(u16) && u16[h] < x { i = h + 1 } else { j = h } } return i }