app/vmselect/promql: follow-up for ce4f26db02

- Document the bugfix at docs/CHANGELOG.md
- Filter out NaN values before sorting as suggested at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5509#discussion_r1447369218
- Revert unrelated changes in lib/filestream and lib/fs
- Use simpler test at app/vmselect/promql/exec_test.go

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5509
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5506
This commit is contained in:
Aliaksandr Valialkin 2024-01-16 02:55:06 +02:00
parent 91b6b32c3a
commit 742b1b5443
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
7 changed files with 45 additions and 304 deletions

View file

@ -648,18 +648,26 @@ func newAggrFuncTopK(isReverse bool) aggrFunc {
if err != nil {
return nil, err
}
lt := lessWithNaNs
if isReverse {
lt = lessWithNaNsReversed
}
afe := func(tss []*timeseries, modififer *metricsql.ModifierExpr) []*timeseries {
var tssNoNaNs []*timeseries
for n := range tss[0].Values {
sort.Slice(tss, func(i, j int) bool {
a := tss[i].Values[n]
b := tss[j].Values[n]
return lt(a, b)
// Drop series with NaNs at Values[n] before sorting.
// This is needed for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5506
tssNoNaNs = tssNoNaNs[:0]
for _, ts := range tss {
if !math.IsNaN(ts.Values[n]) {
tssNoNaNs = append(tssNoNaNs, ts)
}
}
sort.Slice(tssNoNaNs, func(i, j int) bool {
a := tssNoNaNs[i].Values[n]
b := tssNoNaNs[j].Values[n]
if isReverse {
a, b = b, a
}
return a < b
})
fillNaNsAtIdx(n, ks[n], tss)
fillNaNsAtIdx(n, ks[n], tssNoNaNs)
}
tss = removeEmptySeries(tss)
reverseSeries(tss)
@ -710,16 +718,31 @@ func getRangeTopKTimeseries(tss []*timeseries, modifier *metricsql.ModifierExpr,
value: value,
}
}
lt := lessWithNaNs
if isReverse {
lt = lessWithNaNsReversed
// Drop maxs with NaNs before sorting.
// This is needed for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5506
maxsNoNaNs := make([]tsWithValue, 0, len(maxs))
for _, tsv := range maxs {
if !math.IsNaN(tsv.value) {
maxsNoNaNs = append(maxsNoNaNs, tsv)
}
}
sort.Slice(maxs, func(i, j int) bool {
return lt(maxs[i].value, maxs[j].value)
sort.Slice(maxsNoNaNs, func(i, j int) bool {
a := maxsNoNaNs[i].value
b := maxsNoNaNs[j].value
if isReverse {
a, b = b, a
}
return a < b
})
for i := range maxs {
tss[i] = maxs[i].ts
for _, tsv := range maxs {
if math.IsNaN(tsv.value) {
maxsNoNaNs = append(maxsNoNaNs, tsv)
}
}
for i := range maxsNoNaNs {
tss[i] = maxsNoNaNs[i].ts
}
remainingSumTS := getRemainingSumTimeseries(tss, modifier, ks, remainingSumTagName)
for i, k := range ks {
fillNaNsAtIdx(i, k, tss)
@ -1199,20 +1222,6 @@ func newAggrQuantileFunc(phis []float64) func(tss []*timeseries, modifier *metri
}
}
func lessWithNaNs(a, b float64) bool {
if math.IsNaN(a) {
return !math.IsNaN(b)
}
return a < b
}
func lessWithNaNsReversed(a, b float64) bool {
if math.IsNaN(a) {
return true
}
return a > b
}
func floatToIntBounded(f float64) int {
if f > math.MaxInt {
return math.MaxInt

View file

@ -1,12 +1,8 @@
package promql
import (
"log"
"math"
"reflect"
"testing"
"github.com/VictoriaMetrics/metricsql"
)
func TestModeNoNaNs(t *testing.T) {
@ -38,268 +34,3 @@ func TestModeNoNaNs(t *testing.T) {
f(1, []float64{2, 3, 3, 4, 4}, 3)
f(1, []float64{4, 3, 2, 3, 4}, 3)
}
func TestLessWithNaNs(t *testing.T) {
f := func(a, b float64, expectedResult bool) {
t.Helper()
result := lessWithNaNs(a, b)
if result != expectedResult {
t.Fatalf("unexpected result; got %v; want %v", result, expectedResult)
}
}
f(nan, nan, false)
f(nan, 1, true)
f(1, nan, false)
f(1, 2, true)
f(2, 1, false)
f(1, 1, false)
}
func TestLessWithNaNsReversed(t *testing.T) {
f := func(a, b float64, expectedResult bool) {
t.Helper()
result := lessWithNaNsReversed(a, b)
if result != expectedResult {
t.Fatalf("unexpected result; got %v; want %v", result, expectedResult)
}
}
f(nan, nan, true)
f(nan, 1, true)
f(1, nan, false)
f(1, 2, false)
f(2, 1, true)
f(1, 1, false)
}
func TestTopK(t *testing.T) {
f := func(all [][]*timeseries, expected []*timeseries, k int, reversed bool) {
t.Helper()
topKFunc := newAggrFuncTopK(reversed)
actual, err := topKFunc(&aggrFuncArg{
args: all,
ae: &metricsql.AggrFuncExpr{
Limit: 1,
Modifier: metricsql.ModifierExpr{},
},
ec: nil,
})
if err != nil {
log.Fatalf("failed to call topK, err=%v", err)
}
for i := range actual {
if !eq(expected[i], actual[i]) {
t.Fatalf("unexpected result: i:%v got:\n%v; want:\t%v", i, actual[i], expected[i])
}
}
}
f(newTestSeries(), []*timeseries{
{
Timestamps: []int64{1, 2, 3, 4, 5},
Values: []float64{nan, nan, 3, 2, 1},
},
{
Timestamps: []int64{1, 2, 3, 4, 5},
Values: []float64{1, 2, 3, 4, 5},
},
{
Timestamps: []int64{1, 2, 3, 4, 5},
Values: []float64{2, 3, nan, nan, nan},
},
}, 2, true)
f(newTestSeries(), []*timeseries{
{
Timestamps: []int64{1, 2, 3, 4, 5},
Values: []float64{3, 4, 5, 6, 7},
},
{
Timestamps: []int64{1, 2, 3, 4, 5},
Values: []float64{nan, nan, 4, 5, 6},
},
{
Timestamps: []int64{1, 2, 3, 4, 5},
Values: []float64{5, 4, nan, nan, nan},
},
}, 2, false)
f(newTestSeriesWithNaNsWithoutOverlap(), []*timeseries{
{
Values: []float64{nan, nan, nan, 2, 1},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{nan, nan, 5, 6, 7},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{2, 3, 4, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{1, 2, nan, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
}, 2, true)
f(newTestSeriesWithNaNsWithoutOverlap(), []*timeseries{
{
Values: []float64{nan, nan, 5, 6, 7},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{nan, nan, 6, 2, 1},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{2, 3, nan, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{1, 2, nan, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
}, 2, false)
f(newTestSeriesWithNaNsWithOverlap(), []*timeseries{
{
Values: []float64{nan, nan, nan, 2, 1},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{nan, nan, nan, 6, 7},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{1, 2, 3, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{2, 3, 4, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
}, 2, true)
f(newTestSeriesWithNaNsWithOverlap(), []*timeseries{
{
Values: []float64{nan, nan, 5, 6, 7},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{nan, nan, 6, 2, 1},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{2, 3, nan, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{1, 2, nan, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
}, 2, false)
}
func newTestSeries() [][]*timeseries {
return [][]*timeseries{
{
{
Values: []float64{2, 2, 2, 2, 2},
Timestamps: []int64{1, 2, 3, 4, 5},
},
},
{
{
Values: []float64{1, 2, 3, 4, 5},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{2, 3, 4, 5, 6},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{5, 4, 3, 2, 1},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{3, 4, 5, 6, 7},
Timestamps: []int64{1, 2, 3, 4, 5},
},
},
}
}
func newTestSeriesWithNaNsWithoutOverlap() [][]*timeseries {
return [][]*timeseries{
{
{
Values: []float64{2, 2, 2, 2, 2},
Timestamps: []int64{1, 2, 3, 4, 5},
},
},
{
{
Values: []float64{1, 2, nan, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{2, 3, 4, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{nan, nan, 6, 2, 1},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{nan, nan, 5, 6, 7},
Timestamps: []int64{1, 2, 3, 4, 5},
},
},
}
}
func newTestSeriesWithNaNsWithOverlap() [][]*timeseries {
return [][]*timeseries{
{
{
Values: []float64{2, 2, 2, 2, 2},
Timestamps: []int64{1, 2, 3, 4, 5},
},
},
{
{
Values: []float64{1, 2, 3, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{2, 3, 4, nan, nan},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{nan, nan, 6, 2, 1},
Timestamps: []int64{1, 2, 3, 4, 5},
},
{
Values: []float64{nan, nan, 5, 6, 7},
Timestamps: []int64{1, 2, 3, 4, 5},
},
},
}
}
func eq(a, b *timeseries) bool {
if !reflect.DeepEqual(a.Timestamps, b.Timestamps) {
return false
}
for i := range a.Values {
if !eqWithNan(a.Values[i], b.Values[i]) {
return false
}
}
return true
}
func eqWithNan(a, b float64) bool {
if math.IsNaN(a) && math.IsNaN(b) {
return true
}
if math.IsNaN(a) || math.IsNaN(b) {
return false
}
return a == b
}

View file

@ -6373,7 +6373,7 @@ func TestExecSuccess(t *testing.T) {
})
t.Run(`bottomk(1)`, func(t *testing.T) {
t.Parallel()
q := `bottomk(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss"))`
q := `bottomk(1, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss") or label_set(time()<100, "a", "b"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, nan, nan, 10, 10, 10},

View file

@ -14,6 +14,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components
* BUGFIX: `vminsert`: properly accept samples via [OpenTelemetry data ingestion protocol](https://docs.victoriametrics.com/#sending-data-via-opentelemetry) when these samples have no [resource attributes](https://opentelemetry.io/docs/instrumentation/go/resources/). Previously such samples were silently skipped.
* BUGFIX: `vmstorage`: added missing `-inmemoryDataFlushInterval` command-line flag, which was missing in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) after implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337) in [v1.85.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.85.0).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): check `-external.url` schema when starting vmalert, must be `http` or `https`. Before, alertmanager could reject alert notifications if `-external.url` contained no or wrong schema.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly return results from [bottomk](https://docs.victoriametrics.com/MetricsQL.html#bottomk) and `bottomk_*()` functions when some of these results contain NaN values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5506). Thanks to @xiaozongyang for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5509).
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle queries, which wrap [rollup functions](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions) with multiple arguments without explicitly specified lookbehind window in square brackets into [aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions). For example, `sum(quantile_over_time(0.5, process_resident_memory_bytes))` was resulting to `expecting at least 2 args to ...; got 1 args` error. Thanks to @atykhyy for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5414).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly assume role with [AWS IRSA authorization](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). Previously role chaining was not supported. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822) for details.
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): check for Error field in response from influx client during migration. Before, only network errors were checked. Thanks to @wozz for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5446).

View file

@ -324,6 +324,6 @@ var bwPool sync.Pool
type streamTracker struct {
fd uintptr
offset uint64 // nolint
length uint64 // nolint
offset uint64
length uint64
}

View file

@ -1,6 +1,6 @@
package filestream
func (st *streamTracker) adviseDontNeed(n int, fdatasync bool) error { // nolint
func (st *streamTracker) adviseDontNeed(n int, fdatasync bool) error {
return nil
}

View file

@ -4,7 +4,7 @@ import (
"os"
)
func fadviseSequentialRead(f *os.File, prefetch bool) error { // nolint
func fadviseSequentialRead(f *os.File, prefetch bool) error {
// TODO: implement this properly
return nil
}