VictoriaMetrics/lib/uint64set/uint64set.go

package uint64set

import (
	"math/bits"
	"sort"
)

// Set is a fast set for uint64.
//
// It should work faster than map[uint64]struct{} for semi-sparse uint64 values
// such as MetricIDs generated by lib/storage.
//
// It is unsafe calling Set methods from concurrent goroutines.
type Set struct {
	itemsCount int
	buckets    bucket32Sorter
}

type bucket32Sorter []*bucket32

func (s *bucket32Sorter) Len() int { return len(*s) }
func (s *bucket32Sorter) Less(i, j int) bool {
	a := *s
	return a[i].hi < a[j].hi
}
func (s *bucket32Sorter) Swap(i, j int) {
	a := *s
	a[i], a[j] = a[j], a[i]
}

// Clone returns an independent copy of s.
func (s *Set) Clone() *Set {
	if s == nil {
		// Return an empty set, so data could be added into it later.
		return &Set{}
	}
	var dst Set
	dst.itemsCount = s.itemsCount
	dst.buckets = make([]*bucket32, len(s.buckets))
	for i, b32 := range s.buckets {
		dst.buckets[i] = b32.clone()
	}
	return &dst
}

// Len returns the number of distinct uint64 values in s.
func (s *Set) Len() int {
	if s == nil {
		return 0
	}
	return s.itemsCount
}

// Add adds x to s.
func (s *Set) Add(x uint64) {
	hi := uint32(x >> 32)
	lo := uint32(x)
	for _, b32 := range s.buckets {
		if b32.hi == hi {
			if b32.add(lo) {
				s.itemsCount++
			}
			return
		}
	}
	s.addAlloc(hi, lo)
}

func (s *Set) addAlloc(hi, lo uint32) {
	var b32 bucket32
	b32.hi = hi
	_ = b32.add(lo)
	s.itemsCount++
	s.buckets = append(s.buckets, &b32)
}

// Has verifies whether x exists in s.
func (s *Set) Has(x uint64) bool {
	hi := uint32(x >> 32)
	lo := uint32(x)
	if s == nil {
		return false
	}
	for _, b32 := range s.buckets {
		if b32.hi == hi {
			return b32.has(lo)
		}
	}
	return false
}

// Del deletes x from s.
func (s *Set) Del(x uint64) {
	hi := uint32(x >> 32)
	lo := uint32(x)
	for _, b32 := range s.buckets {
		if b32.hi == hi {
			if b32.del(lo) {
				s.itemsCount--
			}
			return
		}
	}
}

// AppendTo appends all the items from the set to dst and returns the result.
//
// The returned items are sorted.
func (s *Set) AppendTo(dst []uint64) []uint64 {
	if s == nil {
		return dst
	}
	// pre-allocate memory for dst
	dstLen := len(dst)
	if n := s.Len() - cap(dst) + dstLen; n > 0 {
		dst = append(dst[:cap(dst)], make([]uint64, n)...)
		dst = dst[:dstLen]
	}
	// sort s.buckets if it isn't sorted yet
	if !sort.IsSorted(&s.buckets) {
		sort.Sort(&s.buckets)
	}
	for _, b32 := range s.buckets {
		dst = b32.appendTo(dst)
	}
	return dst
}

// Union adds all the items from a to s.
func (s *Set) Union(a *Set) {
	// TODO: optimize it
	for _, x := range a.AppendTo(nil) {
		s.Add(x)
	}
}

type bucket32 struct {
	hi      uint32
	b16his  []uint16
	buckets []*bucket16
}

func (b *bucket32) clone() *bucket32 {
	var dst bucket32
	dst.hi = b.hi
	dst.b16his = append(dst.b16his[:0], b.b16his...)
	dst.buckets = make([]*bucket16, len(b.buckets))
	for i, b16 := range b.buckets {
		dst.buckets[i] = b16.clone()
	}
	return &dst
}

// This is for sort.Interface
func (b *bucket32) Len() int           { return len(b.b16his) }
func (b *bucket32) Less(i, j int) bool { return b.b16his[i] < b.b16his[j] }
func (b *bucket32) Swap(i, j int) {
	his := b.b16his
	buckets := b.buckets
	his[i], his[j] = his[j], his[i]
	buckets[i], buckets[j] = buckets[j], buckets[i]
}

const maxUnsortedBuckets = 32

func (b *bucket32) add(x uint32) bool {
	hi := uint16(x >> 16)
	lo := uint16(x)
	if len(b.buckets) > maxUnsortedBuckets {
		return b.addSlow(hi, lo)
	}
	for i, hi16 := range b.b16his {
		if hi16 == hi {
			return i < len(b.buckets) && b.buckets[i].add(lo)
		}
	}
	b.addAllocSmall(hi, lo)
	return true
}

func (b *bucket32) addAllocSmall(hi, lo uint16) {
	var b16 bucket16
	_ = b16.add(lo)
	b.b16his = append(b.b16his, hi)
	b.buckets = append(b.buckets, &b16)
	if len(b.buckets) > maxUnsortedBuckets {
		sort.Sort(b)
	}
}

func (b *bucket32) addSlow(hi, lo uint16) bool {
	n := binarySearch16(b.b16his, hi)
	if n < 0 || n >= len(b.b16his) || b.b16his[n] != hi {
		b.addAllocBig(hi, lo, n)
		return true
	}
	return n < len(b.buckets) && b.buckets[n].add(lo)
}

func (b *bucket32) addAllocBig(hi, lo uint16, n int) {
	if n < 0 {
		return
	}
	var b16 bucket16
	_ = b16.add(lo)
	if n >= len(b.b16his) {
		b.b16his = append(b.b16his, hi)
		b.buckets = append(b.buckets, &b16)
		return
	}
	b.b16his = append(b.b16his[:n+1], b.b16his[n:]...)
	b.b16his[n] = hi
	b.buckets = append(b.buckets[:n+1], b.buckets[n:]...)
	b.buckets[n] = &b16
}

func (b *bucket32) has(x uint32) bool {
	hi := uint16(x >> 16)
	lo := uint16(x)
	if len(b.buckets) > maxUnsortedBuckets {
		return b.hasSlow(hi, lo)
	}
	for i, hi16 := range b.b16his {
		if hi16 == hi {
			return i < len(b.buckets) && b.buckets[i].has(lo)
		}
	}
	return false
}

func (b *bucket32) hasSlow(hi, lo uint16) bool {
	n := binarySearch16(b.b16his, hi)
	if n < 0 || n >= len(b.b16his) || b.b16his[n] != hi {
		return false
	}
	return n < len(b.buckets) && b.buckets[n].has(lo)
}

func (b *bucket32) del(x uint32) bool {
	hi := uint16(x >> 16)
	lo := uint16(x)
	if len(b.buckets) > maxUnsortedBuckets {
		return b.delSlow(hi, lo)
	}
	for i, hi16 := range b.b16his {
		if hi16 == hi {
			return i < len(b.buckets) && b.buckets[i].del(lo)
		}
	}
	return false
}

func (b *bucket32) delSlow(hi, lo uint16) bool {
	n := binarySearch16(b.b16his, hi)
	if n < 0 || n >= len(b.b16his) || b.b16his[n] != hi {
		return false
	}
	return n < len(b.buckets) && b.buckets[n].del(lo)
}

func (b *bucket32) appendTo(dst []uint64) []uint64 {
	if len(b.buckets) <= maxUnsortedBuckets && !sort.IsSorted(b) {
		sort.Sort(b)
	}
	for i, b16 := range b.buckets {
		hi16 := b.b16his[i]
		dst = b16.appendTo(dst, b.hi, hi16)
	}
	return dst
}

const (
	bitsPerBucket  = 1 << 16
	wordsPerBucket = bitsPerBucket / 64
)

type bucket16 struct {
	bits [wordsPerBucket]uint64
}

func (b *bucket16) clone() *bucket16 {
	var dst bucket16
	copy(dst.bits[:], b.bits[:])
	return &dst
}

func (b *bucket16) add(x uint16) bool {
	wordNum, bitMask := getWordNumBitMask(x)
	word := &b.bits[wordNum]
	ok := *word&bitMask == 0
	*word |= bitMask
	return ok
}

func (b *bucket16) has(x uint16) bool {
	wordNum, bitMask := getWordNumBitMask(x)
	return b.bits[wordNum]&bitMask != 0
}

func (b *bucket16) del(x uint16) bool {
	wordNum, bitMask := getWordNumBitMask(x)
	word := &b.bits[wordNum]
	ok := *word&bitMask != 0
	*word &^= bitMask
	return ok
}

func (b *bucket16) appendTo(dst []uint64, hi uint32, hi16 uint16) []uint64 {
	hi64 := uint64(hi)<<32 | uint64(hi16)<<16
	var wordNum uint64
	for _, word := range b.bits {
		if word == 0 {
			wordNum++
			continue
		}
		x64 := hi64 | (wordNum * 64)
		for {
			tzn := uint64(bits.TrailingZeros64(word))
			if tzn >= 64 {
				break
			}
			word &^= uint64(1) << tzn
			x := x64 | tzn
			dst = append(dst, x)
		}
		wordNum++
	}
	return dst
}

func getWordNumBitMask(x uint16) (uint16, uint64) {
	wordNum := x / 64
	bitMask := uint64(1) << (x & 63)
	return wordNum, bitMask
}

func binarySearch16(u16 []uint16, x uint16) int {
	// The code has been adapted from sort.Search.
	n := len(u16)
	i, j := 0, n
	for i < j {
		h := int(uint(i+j) >> 1)
		if h >= 0 && h < len(u16) && u16[h] < x {
			i = h + 1
		} else {
			j = h
		}
	}
	return i
}
lib/storage: create and use `lib/uint64set` instead of `map[uint64]struct{}` This should improve inverted index search performance for filters matching big number of time series, since `lib/uint64set.Set` is faster than `map[uint64]struct{}` for both `Add` and `Has` calls. See the corresponding benchmarks in `lib/uint64set`. 2019-09-24 18:10:22 +00:00			`package uint64set`

			`import (`
lib/uint64set: optimize Set.AppendTo 2019-09-24 21:34:09 +00:00			`"math/bits"`
lib/storage: create and use `lib/uint64set` instead of `map[uint64]struct{}` This should improve inverted index search performance for filters matching big number of time series, since `lib/uint64set.Set` is faster than `map[uint64]struct{}` for both `Add` and `Has` calls. See the corresponding benchmarks in `lib/uint64set`. 2019-09-24 18:10:22 +00:00			`"sort"`
			`)`

			`// Set is a fast set for uint64.`
			`//`
			`// It should work faster than map[uint64]struct{} for semi-sparse uint64 values`
			`// such as MetricIDs generated by lib/storage.`
			`//`
			`// It is unsafe calling Set methods from concurrent goroutines.`
			`type Set struct {`
			`itemsCount int`
			`buckets bucket32Sorter`
			`}`

			`type bucket32Sorter []*bucket32`

			`func (s bucket32Sorter) Len() int { return len(s) }`
			`func (s *bucket32Sorter) Less(i, j int) bool {`
			`a := *s`
			`return a[i].hi < a[j].hi`
			`}`
			`func (s *bucket32Sorter) Swap(i, j int) {`
			`a := *s`
			`a[i], a[j] = a[j], a[i]`
			`}`

			`// Clone returns an independent copy of s.`
			`func (s Set) Clone() Set {`
			`if s == nil {`
lib/uint64set: return an emptry set instead of nil set from `Set.Clone`, since the caller may add data to the cloned set This fixes the following panic in v1.28.1: panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x10 pc=0x783a7e] goroutine 1155 [running]: github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set.(Set).Add(0x0, 0x15b3bfb41e8b71ec) github.com/VictoriaMetrics/VictoriaMetrics@/lib/uint64set/uint64set.go:57 +0x2e github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(indexSearch).getMetricIDsForRecentHours(0xc5bdc0dd40, 0x16e273f6b50, 0x16e2745d3f0, 0x5b8d95, 0x10, 0x4a2f51, 0xaa01000000000000) github.com/VictoriaMetrics/VictoriaMetrics@/lib/storage/index_db.go:1951 +0x260 github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*indexSearch).getMetricIDsForTimeRange(0xc5bdc0dd40, 0x16e273f6b50, 0x16e2745d3f0, 0x5b8d95, 0x10, 0xb296c0, 0xc00009cd80, 0x9bc640) 2019-11-01 14:11:15 +00:00			`// Return an empty set, so data could be added into it later.`
			`return &Set{}`
lib/storage: create and use `lib/uint64set` instead of `map[uint64]struct{}` This should improve inverted index search performance for filters matching big number of time series, since `lib/uint64set.Set` is faster than `map[uint64]struct{}` for both `Add` and `Has` calls. See the corresponding benchmarks in `lib/uint64set`. 2019-09-24 18:10:22 +00:00			`}`
			`var dst Set`
			`dst.itemsCount = s.itemsCount`
			`dst.buckets = make([]*bucket32, len(s.buckets))`
			`for i, b32 := range s.buckets {`
			`dst.buckets[i] = b32.clone()`
			`}`
			`return &dst`
			`}`

			`// Len returns the number of distinct uint64 values in s.`
			`func (s *Set) Len() int {`
			`if s == nil {`
			`return 0`
			`}`
			`return s.itemsCount`
			`}`

			`// Add adds x to s.`
			`func (s *Set) Add(x uint64) {`
			`hi := uint32(x >> 32)`
			`lo := uint32(x)`
			`for _, b32 := range s.buckets {`
			`if b32.hi == hi {`
			`if b32.add(lo) {`
			`s.itemsCount++`
			`}`
			`return`
			`}`
			`}`
			`s.addAlloc(hi, lo)`
			`}`

			`func (s *Set) addAlloc(hi, lo uint32) {`
			`var b32 bucket32`
			`b32.hi = hi`
			`_ = b32.add(lo)`
			`s.itemsCount++`
			`s.buckets = append(s.buckets, &b32)`
			`}`

			`// Has verifies whether x exists in s.`
			`func (s *Set) Has(x uint64) bool {`
			`hi := uint32(x >> 32)`
			`lo := uint32(x)`
			`if s == nil {`
			`return false`
			`}`
			`for _, b32 := range s.buckets {`
			`if b32.hi == hi {`
			`return b32.has(lo)`
			`}`
			`}`
			`return false`
			`}`

			`// Del deletes x from s.`
			`func (s *Set) Del(x uint64) {`
			`hi := uint32(x >> 32)`
			`lo := uint32(x)`
			`for _, b32 := range s.buckets {`
			`if b32.hi == hi {`
			`if b32.del(lo) {`
			`s.itemsCount--`
			`}`
			`return`
			`}`
			`}`
			`}`

			`// AppendTo appends all the items from the set to dst and returns the result.`
			`//`
			`// The returned items are sorted.`
			`func (s *Set) AppendTo(dst []uint64) []uint64 {`
			`if s == nil {`
			`return dst`
			`}`
			`// pre-allocate memory for dst`
			`dstLen := len(dst)`
			`if n := s.Len() - cap(dst) + dstLen; n > 0 {`
			`dst = append(dst[:cap(dst)], make([]uint64, n)...)`
			`dst = dst[:dstLen]`
			`}`
			`// sort s.buckets if it isn't sorted yet`
			`if !sort.IsSorted(&s.buckets) {`
			`sort.Sort(&s.buckets)`
			`}`
			`for _, b32 := range s.buckets {`
			`dst = b32.appendTo(dst)`
			`}`
			`return dst`
			`}`

lib/{storage,uint64set}: add Set.Union() function and use it 2019-11-03 22:34:24 +00:00			`// Union adds all the items from a to s.`
			`func (s Set) Union(a Set) {`
			`// TODO: optimize it`
			`for _, x := range a.AppendTo(nil) {`
			`s.Add(x)`
			`}`
			`}`

lib/storage: create and use `lib/uint64set` instead of `map[uint64]struct{}` This should improve inverted index search performance for filters matching big number of time series, since `lib/uint64set.Set` is faster than `map[uint64]struct{}` for both `Add` and `Has` calls. See the corresponding benchmarks in `lib/uint64set`. 2019-09-24 18:10:22 +00:00			`type bucket32 struct {`
			`hi uint32`
			`b16his []uint16`
			`buckets []*bucket16`
			`}`

			`func (b bucket32) clone() bucket32 {`
			`var dst bucket32`
			`dst.hi = b.hi`
			`dst.b16his = append(dst.b16his[:0], b.b16his...)`
			`dst.buckets = make([]*bucket16, len(b.buckets))`
			`for i, b16 := range b.buckets {`
			`dst.buckets[i] = b16.clone()`
			`}`
			`return &dst`
			`}`

			`// This is for sort.Interface`
			`func (b *bucket32) Len() int { return len(b.b16his) }`
			`func (b *bucket32) Less(i, j int) bool { return b.b16his[i] < b.b16his[j] }`
			`func (b *bucket32) Swap(i, j int) {`
			`his := b.b16his`
			`buckets := b.buckets`
			`his[i], his[j] = his[j], his[i]`
			`buckets[i], buckets[j] = buckets[j], buckets[i]`
			`}`

			`const maxUnsortedBuckets = 32`

			`func (b *bucket32) add(x uint32) bool {`
			`hi := uint16(x >> 16)`
			`lo := uint16(x)`
			`if len(b.buckets) > maxUnsortedBuckets {`
			`return b.addSlow(hi, lo)`
			`}`
			`for i, hi16 := range b.b16his {`
			`if hi16 == hi {`
			`return i < len(b.buckets) && b.buckets[i].add(lo)`
			`}`
			`}`
			`b.addAllocSmall(hi, lo)`
			`return true`
			`}`

			`func (b *bucket32) addAllocSmall(hi, lo uint16) {`
			`var b16 bucket16`
			`_ = b16.add(lo)`
			`b.b16his = append(b.b16his, hi)`
			`b.buckets = append(b.buckets, &b16)`
			`if len(b.buckets) > maxUnsortedBuckets {`
			`sort.Sort(b)`
			`}`
			`}`

			`func (b *bucket32) addSlow(hi, lo uint16) bool {`
			`n := binarySearch16(b.b16his, hi)`
			`if n < 0 \|\| n >= len(b.b16his) \|\| b.b16his[n] != hi {`
			`b.addAllocBig(hi, lo, n)`
			`return true`
			`}`
			`return n < len(b.buckets) && b.buckets[n].add(lo)`
			`}`

			`func (b *bucket32) addAllocBig(hi, lo uint16, n int) {`
			`if n < 0 {`
			`return`
			`}`
			`var b16 bucket16`
			`_ = b16.add(lo)`
			`if n >= len(b.b16his) {`
			`b.b16his = append(b.b16his, hi)`
			`b.buckets = append(b.buckets, &b16)`
			`return`
			`}`
			`b.b16his = append(b.b16his[:n+1], b.b16his[n:]...)`
			`b.b16his[n] = hi`
			`b.buckets = append(b.buckets[:n+1], b.buckets[n:]...)`
			`b.buckets[n] = &b16`
			`}`

			`func (b *bucket32) has(x uint32) bool {`
			`hi := uint16(x >> 16)`
			`lo := uint16(x)`
			`if len(b.buckets) > maxUnsortedBuckets {`
			`return b.hasSlow(hi, lo)`
			`}`
			`for i, hi16 := range b.b16his {`
			`if hi16 == hi {`
			`return i < len(b.buckets) && b.buckets[i].has(lo)`
			`}`
			`}`
			`return false`
			`}`

			`func (b *bucket32) hasSlow(hi, lo uint16) bool {`
			`n := binarySearch16(b.b16his, hi)`
			`if n < 0 \|\| n >= len(b.b16his) \|\| b.b16his[n] != hi {`
			`return false`
			`}`
			`return n < len(b.buckets) && b.buckets[n].has(lo)`
			`}`

			`func (b *bucket32) del(x uint32) bool {`
			`hi := uint16(x >> 16)`
			`lo := uint16(x)`
			`if len(b.buckets) > maxUnsortedBuckets {`
			`return b.delSlow(hi, lo)`
			`}`
			`for i, hi16 := range b.b16his {`
			`if hi16 == hi {`
			`return i < len(b.buckets) && b.buckets[i].del(lo)`
			`}`
			`}`
			`return false`
			`}`

			`func (b *bucket32) delSlow(hi, lo uint16) bool {`
			`n := binarySearch16(b.b16his, hi)`
			`if n < 0 \|\| n >= len(b.b16his) \|\| b.b16his[n] != hi {`
			`return false`
			`}`
			`return n < len(b.buckets) && b.buckets[n].del(lo)`
			`}`

			`func (b *bucket32) appendTo(dst []uint64) []uint64 {`
			`if len(b.buckets) <= maxUnsortedBuckets && !sort.IsSorted(b) {`
			`sort.Sort(b)`
			`}`
			`for i, b16 := range b.buckets {`
			`hi16 := b.b16his[i]`
			`dst = b16.appendTo(dst, b.hi, hi16)`
			`}`
			`return dst`
			`}`

			`const (`
			`bitsPerBucket = 1 << 16`
			`wordsPerBucket = bitsPerBucket / 64`
			`)`

			`type bucket16 struct {`
			`bits [wordsPerBucket]uint64`
			`}`

			`func (b bucket16) clone() bucket16 {`
			`var dst bucket16`
			`copy(dst.bits[:], b.bits[:])`
			`return &dst`
			`}`

			`func (b *bucket16) add(x uint16) bool {`
			`wordNum, bitMask := getWordNumBitMask(x)`
			`word := &b.bits[wordNum]`
			`ok := *word&bitMask == 0`
			`*word \|= bitMask`
			`return ok`
			`}`

			`func (b *bucket16) has(x uint16) bool {`
			`wordNum, bitMask := getWordNumBitMask(x)`
			`return b.bits[wordNum]&bitMask != 0`
			`}`

			`func (b *bucket16) del(x uint16) bool {`
			`wordNum, bitMask := getWordNumBitMask(x)`
			`word := &b.bits[wordNum]`
			`ok := *word&bitMask != 0`
			`*word &^= bitMask`
			`return ok`
			`}`

			`func (b *bucket16) appendTo(dst []uint64, hi uint32, hi16 uint16) []uint64 {`
			`hi64 := uint64(hi)<<32 \| uint64(hi16)<<16`
			`var wordNum uint64`
			`for _, word := range b.bits {`
lib/uint64set: optimize Set.AppendTo 2019-09-24 21:34:09 +00:00			`if word == 0 {`
			`wordNum++`
			`continue`
			`}`
			`x64 := hi64 \| (wordNum * 64)`
			`for {`
			`tzn := uint64(bits.TrailingZeros64(word))`
			`if tzn >= 64 {`
			`break`
lib/storage: create and use `lib/uint64set` instead of `map[uint64]struct{}` This should improve inverted index search performance for filters matching big number of time series, since `lib/uint64set.Set` is faster than `map[uint64]struct{}` for both `Add` and `Has` calls. See the corresponding benchmarks in `lib/uint64set`. 2019-09-24 18:10:22 +00:00			`}`
lib/uint64set: optimize Set.AppendTo 2019-09-24 21:34:09 +00:00			`word &^= uint64(1) << tzn`
			`x := x64 \| tzn`
			`dst = append(dst, x)`
lib/storage: create and use `lib/uint64set` instead of `map[uint64]struct{}` This should improve inverted index search performance for filters matching big number of time series, since `lib/uint64set.Set` is faster than `map[uint64]struct{}` for both `Add` and `Has` calls. See the corresponding benchmarks in `lib/uint64set`. 2019-09-24 18:10:22 +00:00			`}`
			`wordNum++`
			`}`
			`return dst`
			`}`

			`func getWordNumBitMask(x uint16) (uint16, uint64) {`
			`wordNum := x / 64`
			`bitMask := uint64(1) << (x & 63)`
			`return wordNum, bitMask`
			`}`

			`func binarySearch16(u16 []uint16, x uint16) int {`
			`// The code has been adapted from sort.Search.`
			`n := len(u16)`
			`i, j := 0, n`
			`for i < j {`
			`h := int(uint(i+j) >> 1)`
			`if h >= 0 && h < len(u16) && u16[h] < x {`
			`i = h + 1`
			`} else {`
			`j = h`
			`}`
			`}`
			`return i`
			`}`