app/vmselect/promql: properly calculate histogram_quantile over unexpected le buckets

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819
This commit is contained in:
Aliaksandr Valialkin 2022-07-05 13:18:55 +03:00
parent 82581ab728
commit 8516670582
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
4 changed files with 62 additions and 11 deletions

View file

@ -3849,14 +3849,14 @@ func TestExecSuccess(t *testing.T) {
})
t.Run(`histogram_quantile(nan-bucket-count-some)`, func(t *testing.T) {
t.Parallel()
q := `histogram_quantile(0.6,
q := `round(histogram_quantile(0.6,
label_set(90, "foo", "bar", "le", "10")
or label_set(NaN, "foo", "bar", "le", "30")
or label_set(300, "foo", "bar", "le", "+Inf")
)`
),0.01)`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{30, 30, 30, 30, 30, 30},
Values: []float64{18.57, 18.57, 18.57, 18.57, 18.57, 18.57},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{{

View file

@ -994,16 +994,32 @@ func groupLeTimeseries(tss []*timeseries) map[string][]leTimeseries {
}
func fixBrokenBuckets(i int, xss []leTimeseries) {
// Fix broken buckets.
// They are already sorted by le, so their values must be in ascending order,
// Buckets are already sorted by le, so their values must be in ascending order,
// since the next bucket includes all the previous buckets.
vPrev := float64(0)
for _, xs := range xss {
v := xs.ts.Values[i]
if v < vPrev || math.IsNaN(v) {
xs.ts.Values[i] = vPrev
// If the next bucket has lower value than the current bucket,
// then the current bucket must be substituted with the next bucket value.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819
if len(xss) < 2 {
return
}
for j := len(xss)-1; j>=0; j-- {
v := xss[j].ts.Values[i]
if !math.IsNaN(v) {
j++
for j < len(xss) {
xss[j].ts.Values[i] = v
j++
}
break
}
}
vNext := xss[len(xss)-1].ts.Values[i]
for j := len(xss)-2; j >= 0; j-- {
v := xss[j].ts.Values[i]
if math.IsNaN(v) || v > vNext {
xss[j].ts.Values[i] = vNext
} else {
vPrev = v
vNext = v
}
}
}

View file

@ -0,0 +1,34 @@
package promql
import (
"reflect"
"testing"
)
func TestFixBrokenBuckets(t *testing.T) {
f := func(values, expectedResult []float64) {
t.Helper()
xss := make([]leTimeseries, len(values))
for i, v := range values {
xss[i].ts = &timeseries{
Values: []float64{v},
}
}
fixBrokenBuckets(0, xss)
result := make([]float64, len(values))
for i, xs := range xss {
result[i] = xs.ts.Values[0]
}
if !reflect.DeepEqual(result, expectedResult) {
t.Fatalf("unexpected result for values=%v\ngot\n%v\nwant\n%v", values, result, expectedResult)
}
}
f(nil, []float64{})
f([]float64{1}, []float64{1})
f([]float64{1,2}, []float64{1,2})
f([]float64{2,1}, []float64{1,1})
f([]float64{1,2,3,nan,nan},[]float64{1,2,3,3,3})
f([]float64{5,1,2,3,nan}, []float64{1,1,2,3,3})
f([]float64{1,5,2,nan,6,3}, []float64{1,2,2,3,3,3})
f([]float64{5,10,4,3}, []float64{3,3,3,3})
}

View file

@ -53,6 +53,7 @@ scrape_configs:
* BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets when calculating [rate](https://docs.victoriametrics.com/MetricsQL.html#rate), [irate](https://docs.victoriametrics.com/MetricsQL.html#irate) and [increase](https://docs.victoriametrics.com/MetricsQL.html#increase) functions. Previously these functions could return zero values after partial counter resets until the counter increases to the last value before partial counter reset. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787).
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly calculate [histogram_quantile](https://docs.victoriametrics.com/MetricsQL.html#histogram_quantile) over Prometheus buckets with unexpected values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly reload changed `-promscrape.config` file when `-promscrape.configCheckInterval` option is set. The changed config file wasn't reloaded in this case since [v1.69.0](#v1690). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2786). Thanks to @ttyv for the fix.
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767).