mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
app/vmselect/promql: properly calculate histogram_quantile over unexpected le
buckets
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819
This commit is contained in:
parent
82581ab728
commit
8516670582
4 changed files with 62 additions and 11 deletions
|
@ -3849,14 +3849,14 @@ func TestExecSuccess(t *testing.T) {
|
|||
})
|
||||
t.Run(`histogram_quantile(nan-bucket-count-some)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `histogram_quantile(0.6,
|
||||
q := `round(histogram_quantile(0.6,
|
||||
label_set(90, "foo", "bar", "le", "10")
|
||||
or label_set(NaN, "foo", "bar", "le", "30")
|
||||
or label_set(300, "foo", "bar", "le", "+Inf")
|
||||
)`
|
||||
),0.01)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{30, 30, 30, 30, 30, 30},
|
||||
Values: []float64{18.57, 18.57, 18.57, 18.57, 18.57, 18.57},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r.MetricName.Tags = []storage.Tag{{
|
||||
|
|
|
@ -994,16 +994,32 @@ func groupLeTimeseries(tss []*timeseries) map[string][]leTimeseries {
|
|||
}
|
||||
|
||||
func fixBrokenBuckets(i int, xss []leTimeseries) {
|
||||
// Fix broken buckets.
|
||||
// They are already sorted by le, so their values must be in ascending order,
|
||||
// Buckets are already sorted by le, so their values must be in ascending order,
|
||||
// since the next bucket includes all the previous buckets.
|
||||
vPrev := float64(0)
|
||||
for _, xs := range xss {
|
||||
v := xs.ts.Values[i]
|
||||
if v < vPrev || math.IsNaN(v) {
|
||||
xs.ts.Values[i] = vPrev
|
||||
// If the next bucket has lower value than the current bucket,
|
||||
// then the current bucket must be substituted with the next bucket value.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819
|
||||
if len(xss) < 2 {
|
||||
return
|
||||
}
|
||||
for j := len(xss)-1; j>=0; j-- {
|
||||
v := xss[j].ts.Values[i]
|
||||
if !math.IsNaN(v) {
|
||||
j++
|
||||
for j < len(xss) {
|
||||
xss[j].ts.Values[i] = v
|
||||
j++
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
vNext := xss[len(xss)-1].ts.Values[i]
|
||||
for j := len(xss)-2; j >= 0; j-- {
|
||||
v := xss[j].ts.Values[i]
|
||||
if math.IsNaN(v) || v > vNext {
|
||||
xss[j].ts.Values[i] = vNext
|
||||
} else {
|
||||
vPrev = v
|
||||
vNext = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
34
app/vmselect/promql/transform_test.go
Normal file
34
app/vmselect/promql/transform_test.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
package promql
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFixBrokenBuckets(t *testing.T) {
|
||||
f := func(values, expectedResult []float64) {
|
||||
t.Helper()
|
||||
xss := make([]leTimeseries, len(values))
|
||||
for i, v := range values {
|
||||
xss[i].ts = ×eries{
|
||||
Values: []float64{v},
|
||||
}
|
||||
}
|
||||
fixBrokenBuckets(0, xss)
|
||||
result := make([]float64, len(values))
|
||||
for i, xs := range xss {
|
||||
result[i] = xs.ts.Values[0]
|
||||
}
|
||||
if !reflect.DeepEqual(result, expectedResult) {
|
||||
t.Fatalf("unexpected result for values=%v\ngot\n%v\nwant\n%v", values, result, expectedResult)
|
||||
}
|
||||
}
|
||||
f(nil, []float64{})
|
||||
f([]float64{1}, []float64{1})
|
||||
f([]float64{1,2}, []float64{1,2})
|
||||
f([]float64{2,1}, []float64{1,1})
|
||||
f([]float64{1,2,3,nan,nan},[]float64{1,2,3,3,3})
|
||||
f([]float64{5,1,2,3,nan}, []float64{1,1,2,3,3})
|
||||
f([]float64{1,5,2,nan,6,3}, []float64{1,2,2,3,3,3})
|
||||
f([]float64{5,10,4,3}, []float64{3,3,3,3})
|
||||
}
|
|
@ -53,6 +53,7 @@ scrape_configs:
|
|||
|
||||
* BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes.
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets when calculating [rate](https://docs.victoriametrics.com/MetricsQL.html#rate), [irate](https://docs.victoriametrics.com/MetricsQL.html#irate) and [increase](https://docs.victoriametrics.com/MetricsQL.html#increase) functions. Previously these functions could return zero values after partial counter resets until the counter increases to the last value before partial counter reset. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787).
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly calculate [histogram_quantile](https://docs.victoriametrics.com/MetricsQL.html#histogram_quantile) over Prometheus buckets with unexpected values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes.
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly reload changed `-promscrape.config` file when `-promscrape.configCheckInterval` option is set. The changed config file wasn't reloaded in this case since [v1.69.0](#v1690). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2786). Thanks to @ttyv for the fix.
|
||||
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767).
|
||||
|
|
Loading…
Reference in a new issue