app/vmselect/promql: merge adjancent buckets with the smallest summary number of hits in buckets_limit() function

This should improve accuracy for the returned buckets
This commit is contained in:
Aliaksandr Valialkin 2021-03-17 14:30:52 +02:00
parent a3b3d434a3
commit 69201806f8

View file

@ -347,8 +347,7 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
return nil, nil return nil, nil
} }
if limit < 3 { if limit < 3 {
// Preserve the first and the last bucket for better accuracy, // Preserve the first and the last bucket for better accuracy for min and max values.
// since these buckets are usually `[0...leMin]` and `(leMax ... +Inf]`
limit = 3 limit = 3
} }
tss := vmrangeBucketsToLE(args[1]) tss := vmrangeBucketsToLE(args[1])
@ -412,21 +411,17 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
} }
} }
for len(leGroup) > limit { for len(leGroup) > limit {
// Preserve the first and the last bucket for better accuracy, // Preserve the first and the last bucket for better accuracy for min and max values
// since these buckets are usually `[0...leMin]` and `(leMax ... +Inf]` xxMinIdx := 1
xxMinIdx := 0 minMergeHits := leGroup[1].hits + leGroup[2].hits
for i, xx := range leGroup[1 : len(leGroup)-1] { for i := range leGroup[1 : len(leGroup)-2] {
if xx.hits < leGroup[xxMinIdx].hits { mergeHits := leGroup[i+1].hits + leGroup[i+2].hits
xxMinIdx = i if mergeHits < minMergeHits {
xxMinIdx = i + 1
minMergeHits = mergeHits
} }
} }
xxMinIdx++ leGroup[xxMinIdx].hits += leGroup[xxMinIdx+1].hits
// Merge the leGroup[xxMinIdx] bucket with the smallest adjacent bucket in order to preserve
// the maximum accuracy.
if xxMinIdx > 1 && leGroup[xxMinIdx-1].hits < leGroup[xxMinIdx+1].hits {
xxMinIdx--
}
leGroup[xxMinIdx+1].hits += leGroup[xxMinIdx].hits
leGroup = append(leGroup[:xxMinIdx], leGroup[xxMinIdx+1:]...) leGroup = append(leGroup[:xxMinIdx], leGroup[xxMinIdx+1:]...)
} }
for _, xx := range leGroup { for _, xx := range leGroup {