app/vmselect/promql: preserve the order of time series passed to limit_offset() function

Previously time series passed to `limit_offset()` were shuffled according to hash for their labels.
This was unexpected behaviour for most users.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1920 and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/951
This commit is contained in:
Aliaksandr Valialkin 2021-12-12 18:04:55 +02:00
parent 3d4349343d
commit d1f8915ed1
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
10 changed files with 55 additions and 57 deletions

View file

@ -29,7 +29,6 @@ var aggrFuncs = map[string]aggrFunc{
"geomean": newAggrFunc(aggrFuncGeomean), "geomean": newAggrFunc(aggrFuncGeomean),
"group": newAggrFunc(aggrFuncGroup), "group": newAggrFunc(aggrFuncGroup),
"histogram": newAggrFunc(aggrFuncHistogram), "histogram": newAggrFunc(aggrFuncHistogram),
"limit_offset": aggrFuncLimitOffset,
"limitk": aggrFuncLimitK, "limitk": aggrFuncLimitK,
"mad": newAggrFunc(aggrFuncMAD), "mad": newAggrFunc(aggrFuncMAD),
"max": newAggrFunc(aggrFuncMax), "max": newAggrFunc(aggrFuncMax),
@ -1005,37 +1004,12 @@ func aggrFuncLimitK(afa *aggrFuncArg) ([]*timeseries, error) {
if len(limits) > 0 { if len(limits) > 0 {
limit = int(limits[0]) limit = int(limits[0])
} }
afe := newLimitOffsetAggrFunc(limit, 0)
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
}
func aggrFuncLimitOffset(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if err := expectTransformArgsNum(args, 3); err != nil {
return nil, err
}
limit, err := getIntNumber(args[0], 0)
if err != nil {
return nil, fmt.Errorf("cannot obtain limit arg: %w", err)
}
offset, err := getIntNumber(args[1], 1)
if err != nil {
return nil, fmt.Errorf("cannot obtain offset arg: %w", err)
}
afe := newLimitOffsetAggrFunc(limit, offset)
return aggrFuncExt(afe, args[2], &afa.ae.Modifier, afa.ae.Limit, true)
}
func newLimitOffsetAggrFunc(limit, offset int) func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
if offset < 0 {
offset = 0
}
if limit < 0 { if limit < 0 {
limit = 0 limit = 0
} }
return func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries { afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
// Sort series by metricName hash in order to get consistent set of output series // Sort series by metricName hash in order to get consistent set of output series
// across multiple calls to limitk() and limit_offset() functions. // across multiple calls to limitk() function.
// Sort series by hash in order to guarantee uniform selection across series. // Sort series by hash in order to guarantee uniform selection across series.
type hashSeries struct { type hashSeries struct {
h uint64 h uint64
@ -1056,15 +1030,12 @@ func newLimitOffsetAggrFunc(limit, offset int) func(tss []*timeseries, modifier
for i, hs := range hss { for i, hs := range hss {
tss[i] = hs.ts tss[i] = hs.ts
} }
if offset > len(tss) {
return nil
}
tss = tss[offset:]
if limit < len(tss) { if limit < len(tss) {
tss = tss[:limit] tss = tss[:limit]
} }
return tss return tss
} }
return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, true)
} }
func getHash(d *xxhash.Digest, mn *storage.MetricName) uint64 { func getHash(d *xxhash.Digest, mn *storage.MetricName) uint64 {

View file

@ -2055,6 +2055,24 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3} resultExpected := []netstorage.Result{r1, r2, r3}
f(q, resultExpected) f(q, resultExpected)
}) })
t.Run(`limit_offset`, func(t *testing.T) {
t.Parallel()
q := `limit_offset(1, 1, sort_by_label((
label_set(time()*1, "foo", "y"),
label_set(time()*2, "foo", "a"),
label_set(time()*3, "foo", "x"),
), "foo"))`
r := netstorage.Result{
Values: []float64{3000, 3600, 4200, 4800, 5400, 6000},
Timestamps: timestampsExpected,
}
r.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("x"),
}}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`sum(label_graphite_group)`, func(t *testing.T) { t.Run(`sum(label_graphite_group)`, func(t *testing.T) {
t.Parallel() t.Parallel()
q := `sort(sum by (__name__) ( q := `sort(sum by (__name__) (
@ -5161,21 +5179,6 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1} resultExpected := []netstorage.Result{r1}
f(q, resultExpected) f(q, resultExpected)
}) })
t.Run(`limit_offset()`, func(t *testing.T) {
t.Parallel()
q := `limit_offset(1, 0, (label_set(10, "foo", "bar"), label_set(time()/150, "xbaz", "sss")))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10, 10, 10, 10, 10, 10},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{{
Key: []byte("foo"),
Value: []byte("bar"),
}}
resultExpected := []netstorage.Result{r1}
f(q, resultExpected)
})
t.Run(`limitk(10)`, func(t *testing.T) { t.Run(`limitk(10)`, func(t *testing.T) {
t.Parallel() t.Parallel()
q := `sort(limitk(10, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))` q := `sort(limitk(10, label_set(10, "foo", "bar") or label_set(time()/150, "baz", "sss")))`

View file

@ -70,6 +70,7 @@ var transformFuncs = map[string]transformFunc{
"label_transform": transformLabelTransform, "label_transform": transformLabelTransform,
"label_uppercase": transformLabelUppercase, "label_uppercase": transformLabelUppercase,
"label_value": transformLabelValue, "label_value": transformLabelValue,
"limit_offset": transformLimitOffset,
"ln": newTransformFuncOneArg(transformLn), "ln": newTransformFuncOneArg(transformLn),
"log2": newTransformFuncOneArg(transformLog2), "log2": newTransformFuncOneArg(transformLog2),
"log10": newTransformFuncOneArg(transformLog10), "log10": newTransformFuncOneArg(transformLog10),
@ -1771,6 +1772,29 @@ func transformLabelGraphiteGroup(tfa *transformFuncArg) ([]*timeseries, error) {
var dotSeparator = []byte(".") var dotSeparator = []byte(".")
func transformLimitOffset(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 3); err != nil {
return nil, err
}
limit, err := getIntNumber(args[0], 0)
if err != nil {
return nil, fmt.Errorf("cannot obtain limit arg: %w", err)
}
offset, err := getIntNumber(args[1], 1)
if err != nil {
return nil, fmt.Errorf("cannot obtain offset arg: %w", err)
}
rvs := args[2]
if len(rvs) >= offset {
rvs = rvs[offset:]
}
if len(rvs) > limit {
rvs = rvs[:limit]
}
return rvs, nil
}
func transformLn(v float64) float64 { func transformLn(v float64) float64 {
return math.Log(v) return math.Log(v)
} }

View file

@ -12,6 +12,7 @@ sort: 15
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): allow specifying `http` and `https` urls in the following command-line flags: `-promscrape.config`, `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig`. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): allow specifying `http` and `https` urls in the following command-line flags: `-promscrape.config`, `-remoteWrite.relabelConfig` and `-remoteWrite.urlRelabelConfig`.
* FEATURE: vminsert: allow specifying `http` and `https` urls in `-relabelConfig` command-line flag. * FEATURE: vminsert: allow specifying `http` and `https` urls in `-relabelConfig` command-line flag.
* FEATURE: vminsert: add `-maxLabelValueLen` command-line flag for the ability to configure the maximum length of label value. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1908). * FEATURE: vminsert: add `-maxLabelValueLen` command-line flag for the ability to configure the maximum length of label value. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1908).
* FEATURE: preserve the order of time series passed to [limit_offset](https://docs.victoriametrics.com/MetricsQL.html#limit_offset) function. This allows implementing series paging via `limit_offset(limit, offset, sort_by_label(...))`. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1920) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/951) issues.
* BUGFIX: fix `unaligned 64-bit atomic operation` panic on 32-bit architectures, which has been introduced in v1.70.0. * BUGFIX: fix `unaligned 64-bit atomic operation` panic on 32-bit architectures, which has been introduced in v1.70.0.
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): restore the ability to use `{{ $labels.alertname }}` in labels templating. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1921). * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): restore the ability to use `{{ $labels.alertname }}` in labels templating. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1921).

View file

@ -487,6 +487,10 @@ See also [implicit query conversions](#implicit-query-conversions).
`keep_next_value(q)` fills gaps with the value of the next non-empty point in every time series returned by `q`. See also [keep_last_value](#keep_last_value) and [interpolate](#interpolate). `keep_next_value(q)` fills gaps with the value of the next non-empty point in every time series returned by `q`. See also [keep_last_value](#keep_last_value) and [interpolate](#interpolate).
#### limit_offset
`limit_offset(limit, offset, q)` skips `offset` time series from series returned by `q` and then returns up to `limit` of the remaining time series per each group. This allows implementing simple paging for `q` time series. See also [limitk](#limitk).
#### ln #### ln
`ln(q)` calculates `ln(v)` for every point `v` of every time series returned by `q`. Metric names are stripped from the resulting series. This function is supported by PromQL. See also [exp](#exp) and [log2](#log2). `ln(q)` calculates `ln(v)` for every point `v` of every time series returned by `q`. Metric names are stripped from the resulting series. This function is supported by PromQL. See also [exp](#exp) and [log2](#log2).
@ -823,11 +827,6 @@ See also [implicit query conversions](#implicit-query-conversions).
`histogram(q)` calculates [VictoriaMetrics histogram](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) per each group of points with the same timestamp. Useful for visualizing big number of time series via a heatmap. See [this article](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) for more details. `histogram(q)` calculates [VictoriaMetrics histogram](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) per each group of points with the same timestamp. Useful for visualizing big number of time series via a heatmap. See [this article](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) for more details.
#### limit_offset
`limit_offset(limit, offset, q)` skips `offset` time series from series returned by `q` and then returns up to `limit` of the remaining time series. This allows implementing simple paging for `q` time series. See also [limitk](#limitk).
#### limitk #### limitk
`limitk(k, q) by (group_labels)` returns up to `k` time series per each `group_labels` out of time series returned by `q`. The returned set of time series remain the same across calls. See also [limit_offset](#limit_offset). `limitk(k, q) by (group_labels)` returns up to `k` time series per each `group_labels` out of time series returned by `q`. The returned set of time series remain the same across calls. See also [limit_offset](#limit_offset).

2
go.mod
View file

@ -9,7 +9,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.18.1 github.com/VictoriaMetrics/metrics v1.18.1
github.com/VictoriaMetrics/metricsql v0.31.0 github.com/VictoriaMetrics/metricsql v0.32.0
github.com/VividCortex/ewma v1.2.0 // indirect github.com/VividCortex/ewma v1.2.0 // indirect
github.com/aws/aws-sdk-go v1.42.19 github.com/aws/aws-sdk-go v1.42.19
github.com/census-instrumentation/opencensus-proto v0.3.0 // indirect github.com/census-instrumentation/opencensus-proto v0.3.0 // indirect

4
go.sum
View file

@ -109,8 +109,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR
github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE= github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE=
github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0= github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0=
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metricsql v0.31.0 h1:7cpjby64WVcRNBiMieEytuvAcU/jOOz+39RLigENz4E= github.com/VictoriaMetrics/metricsql v0.32.0 h1:yTZFB1FvbOsD5ahl6mxKYprHpZ248nVk3s8Kl7UBg5c=
github.com/VictoriaMetrics/metricsql v0.31.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8= github.com/VictoriaMetrics/metricsql v0.32.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=

View file

@ -19,7 +19,6 @@ var aggrFuncs = map[string]bool{
"geomean": true, "geomean": true,
"group": true, "group": true,
"histogram": true, "histogram": true,
"limit_offset": true,
"limitk": true, "limitk": true,
"mad": true, "mad": true,
"max": true, "max": true,

View file

@ -56,6 +56,7 @@ var transformFuncs = map[string]bool{
"label_transform": true, "label_transform": true,
"label_uppercase": true, "label_uppercase": true,
"label_value": true, "label_value": true,
"limit_offset": true,
"ln": true, "ln": true,
"log2": true, "log2": true,
"log10": true, "log10": true,

2
vendor/modules.txt vendored
View file

@ -22,7 +22,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.18.1 # github.com/VictoriaMetrics/metrics v1.18.1
## explicit ## explicit
github.com/VictoriaMetrics/metrics github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.31.0 # github.com/VictoriaMetrics/metricsql v0.32.0
## explicit ## explicit
github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop github.com/VictoriaMetrics/metricsql/binaryop