diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 742c530e9..1f956b4b8 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -3849,14 +3849,14 @@ func TestExecSuccess(t *testing.T) { }) t.Run(`histogram_quantile(nan-bucket-count-some)`, func(t *testing.T) { t.Parallel() - q := `histogram_quantile(0.6, + q := `round(histogram_quantile(0.6, label_set(90, "foo", "bar", "le", "10") or label_set(NaN, "foo", "bar", "le", "30") or label_set(300, "foo", "bar", "le", "+Inf") - )` + ),0.01)` r := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{30, 30, 30, 30, 30, 30}, + Values: []float64{18.57, 18.57, 18.57, 18.57, 18.57, 18.57}, Timestamps: timestampsExpected, } r.MetricName.Tags = []storage.Tag{{ diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index 8a1527bb0..f72604c77 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -994,16 +994,32 @@ func groupLeTimeseries(tss []*timeseries) map[string][]leTimeseries { } func fixBrokenBuckets(i int, xss []leTimeseries) { - // Fix broken buckets. - // They are already sorted by le, so their values must be in ascending order, + // Buckets are already sorted by le, so their values must be in ascending order, // since the next bucket includes all the previous buckets. - vPrev := float64(0) - for _, xs := range xss { - v := xs.ts.Values[i] - if v < vPrev || math.IsNaN(v) { - xs.ts.Values[i] = vPrev + // If the next bucket has lower value than the current bucket, + // then the current bucket must be substituted with the next bucket value. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819 + if len(xss) < 2 { + return + } + for j := len(xss)-1; j>=0; j-- { + v := xss[j].ts.Values[i] + if !math.IsNaN(v) { + j++ + for j < len(xss) { + xss[j].ts.Values[i] = v + j++ + } + break + } + } + vNext := xss[len(xss)-1].ts.Values[i] + for j := len(xss)-2; j >= 0; j-- { + v := xss[j].ts.Values[i] + if math.IsNaN(v) || v > vNext { + xss[j].ts.Values[i] = vNext } else { - vPrev = v + vNext = v } } } diff --git a/app/vmselect/promql/transform_test.go b/app/vmselect/promql/transform_test.go new file mode 100644 index 000000000..114147e25 --- /dev/null +++ b/app/vmselect/promql/transform_test.go @@ -0,0 +1,34 @@ +package promql + +import ( + "reflect" + "testing" +) + +func TestFixBrokenBuckets(t *testing.T) { + f := func(values, expectedResult []float64) { + t.Helper() + xss := make([]leTimeseries, len(values)) + for i, v := range values { + xss[i].ts = ×eries{ + Values: []float64{v}, + } + } + fixBrokenBuckets(0, xss) + result := make([]float64, len(values)) + for i, xs := range xss { + result[i] = xs.ts.Values[0] + } + if !reflect.DeepEqual(result, expectedResult) { + t.Fatalf("unexpected result for values=%v\ngot\n%v\nwant\n%v", values, result, expectedResult) + } + } + f(nil, []float64{}) + f([]float64{1}, []float64{1}) + f([]float64{1,2}, []float64{1,2}) + f([]float64{2,1}, []float64{1,1}) + f([]float64{1,2,3,nan,nan},[]float64{1,2,3,3,3}) + f([]float64{5,1,2,3,nan}, []float64{1,1,2,3,3}) + f([]float64{1,5,2,nan,6,3}, []float64{1,2,2,3,3,3}) + f([]float64{5,10,4,3}, []float64{3,3,3,3}) +} diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index d55a60f4f..695dcc178 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -53,6 +53,7 @@ scrape_configs: * BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes. * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets when calculating [rate](https://docs.victoriametrics.com/MetricsQL.html#rate), [irate](https://docs.victoriametrics.com/MetricsQL.html#irate) and [increase](https://docs.victoriametrics.com/MetricsQL.html#increase) functions. Previously these functions could return zero values after partial counter resets until the counter increases to the last value before partial counter reset. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787). +* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly calculate [histogram_quantile](https://docs.victoriametrics.com/MetricsQL.html#histogram_quantile) over Prometheus buckets with unexpected values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2819). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly reload changed `-promscrape.config` file when `-promscrape.configCheckInterval` option is set. The changed config file wasn't reloaded in this case since [v1.69.0](#v1690). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2786). Thanks to @ttyv for the fix. * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767).