app/vmselect/promql: optimize buckets_limit(k, buckets) for big number of buckets

This commit is contained in:
Aliaksandr Valialkin 2020-07-25 13:23:55 +03:00
parent cc735da814
commit 978c1e930e

View file

@ -304,7 +304,7 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
// Group timeseries by all MetricGroup+tags excluding `le` tag. // Group timeseries by all MetricGroup+tags excluding `le` tag.
type x struct { type x struct {
le float64 le float64
delta float64 hits float64
ts *timeseries ts *timeseries
} }
m := make(map[string][]x) m := make(map[string][]x)
@ -333,28 +333,40 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
// Remove buckets with the smallest counters. // Remove buckets with the smallest counters.
rvs := make([]*timeseries, 0, len(tss)) rvs := make([]*timeseries, 0, len(tss))
for _, leGroup := range m { for _, leGroup := range m {
for len(leGroup) > limit { if len(leGroup) <= limit {
// Remove a single bucket with the smallest sum. // Fast path - the number of buckets doesn't exceed the given limit.
// TODO: optimize this dumb implementation a bit, since it may be slow on big number of buckets. // Keep all the buckets as is.
for _, xx := range leGroup {
rvs = append(rvs, xx.ts)
}
continue
}
// Slow path - remove buckets with the smallest number of hits until their count reaches the limit.
// Calculate per-bucket hits.
sort.Slice(leGroup, func(i, j int) bool { sort.Slice(leGroup, func(i, j int) bool {
return leGroup[i].le < leGroup[j].le return leGroup[i].le < leGroup[j].le
}) })
for i := range leGroup {
leGroup[i].delta = 0
}
for n := range limits { for n := range limits {
prevValue := float64(0) prevValue := float64(0)
for i := range leGroup { for i := range leGroup {
xx := &leGroup[i] xx := &leGroup[i]
value := xx.ts.Values[n] value := xx.ts.Values[n]
xx.delta += value - prevValue xx.hits += value - prevValue
prevValue = value prevValue = value
} }
} }
sort.Slice(leGroup, func(i, j int) bool { for len(leGroup) > limit {
return leGroup[i].delta < leGroup[j].delta xxMinIdx := 0
}) for i, xx := range leGroup {
leGroup = leGroup[1:] if xx.hits < leGroup[xxMinIdx].hits {
xxMinIdx = i
}
}
if xxMinIdx+1 < len(leGroup) {
leGroup[xxMinIdx+1].hits += leGroup[xxMinIdx].hits
}
leGroup = append(leGroup[:xxMinIdx], leGroup[xxMinIdx+1:]...)
} }
for _, xx := range leGroup { for _, xx := range leGroup {
rvs = append(rvs, xx.ts) rvs = append(rvs, xx.ts)