vmselect/promql: add alphanumeric sort by label (sort_by_label_numeric) (#2982)

* vmselect/promql: add alphanumeric sort by label (sort_by_label_numeric)

* vmselect/promql: fix tests, add documentation

* vmselect/promql: update test

* vmselect/promql: update for alphanumeric sorting, fix tests

* vmselect/promql: remove comments

* vmselect/promql: cleanup

* vmselect/promql: avoid memory allocations, update functions descriptions

* vmselect/promql: make linter happy (remove ineffectual assigment)

* vmselect/promql: add test case, fix behavior when strings are equal

* vendor: update github.com/VictoriaMetrics/metricsql from v0.44.1 to v0.45.0

this adds support for sort_by_label_numeric and sort_by_label_numeric_desc functions

* wip

* lib/promscrape: read response body into memory in stream parsing mode before parsing it

This reduces scrape duration for targets returning big responses.

The response body was already read into memory in stream parsing mode before this change,
so this commit shouldn't increase memory usage.

* wip

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
Dmytro Kozlov 2022-09-14 17:41:09 +03:00 committed by GitHub
parent 5306f79fd1
commit b75f1854c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 625 additions and 202 deletions

View file

@ -99,7 +99,8 @@ func maySortResults(e metricsql.Expr, tss []*timeseries) bool {
case *metricsql.FuncExpr:
switch strings.ToLower(v.Name) {
case "sort", "sort_desc",
"sort_by_label", "sort_by_label_desc":
"sort_by_label", "sort_by_label_desc",
"sort_by_label_numeric", "sort_by_label_numeric_desc":
return false
}
case *metricsql.AggrFuncExpr:

View file

@ -7738,6 +7738,178 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2, r3, r4}
f(q, resultExpected)
})
t.Run(`sort_by_label_numeric(multiple_labels_only_string)`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label_numeric((
label_set(1, "x", "b", "y", "aa"),
label_set(2, "x", "a", "y", "aa"),
), "y", "x")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2, 2, 2, 2, 2},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("x"),
Value: []byte("a"),
},
{
Key: []byte("y"),
Value: []byte("aa"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("x"),
Value: []byte("b"),
},
{
Key: []byte("y"),
Value: []byte("aa"),
},
}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`sort_by_label_numeric(multiple_labels_numbers_special_chars)`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label_numeric((
label_set(1, "x", "1:0:2", "y", "1:0:1"),
label_set(2, "x", "1:0:15", "y", "1:0:1"),
), "x", "y")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("x"),
Value: []byte("1:0:2"),
},
{
Key: []byte("y"),
Value: []byte("1:0:1"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2, 2, 2, 2, 2},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("x"),
Value: []byte("1:0:15"),
},
{
Key: []byte("y"),
Value: []byte("1:0:1"),
},
}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`sort_by_label_numeric_desc(multiple_labels_numbers_special_chars)`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label_numeric_desc((
label_set(1, "x", "1:0:2", "y", "1:0:1"),
label_set(2, "x", "1:0:15", "y", "1:0:1"),
), "x", "y")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2, 2, 2, 2, 2},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("x"),
Value: []byte("1:0:15"),
},
{
Key: []byte("y"),
Value: []byte("1:0:1"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("x"),
Value: []byte("1:0:2"),
},
{
Key: []byte("y"),
Value: []byte("1:0:1"),
},
}
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`sort_by_label_numeric(alias_numbers_with_special_chars)`, func(t *testing.T) {
t.Parallel()
q := `sort_by_label_numeric((
label_set(4, "a", "DS50:1/0/15"),
label_set(1, "a", "DS50:1/0/0"),
label_set(2, "a", "DS50:1/0/1"),
label_set(3, "a", "DS50:1/0/2"),
), "a")`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 1, 1, 1, 1, 1},
Timestamps: timestampsExpected,
}
r1.MetricName.Tags = []storage.Tag{
{
Key: []byte("a"),
Value: []byte("DS50:1/0/0"),
},
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{2, 2, 2, 2, 2, 2},
Timestamps: timestampsExpected,
}
r2.MetricName.Tags = []storage.Tag{
{
Key: []byte("a"),
Value: []byte("DS50:1/0/1"),
},
}
r3 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{3, 3, 3, 3, 3, 3},
Timestamps: timestampsExpected,
}
r3.MetricName.Tags = []storage.Tag{
{
Key: []byte("a"),
Value: []byte("DS50:1/0/2"),
},
}
r4 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{4, 4, 4, 4, 4, 4},
Timestamps: timestampsExpected,
}
r4.MetricName.Tags = []storage.Tag{
{
Key: []byte("a"),
Value: []byte("DS50:1/0/15"),
},
}
resultExpected := []netstorage.Result{r1, r2, r3, r4}
f(q, resultExpected)
})
}
func TestExecError(t *testing.T) {
@ -7811,6 +7983,8 @@ func TestExecError(t *testing.T) {
f(`sort_desc()`)
f(`sort_by_label()`)
f(`sort_by_label_desc()`)
f(`sort_by_label_numeric()`)
f(`sort_by_label_numeric_desc()`)
f(`timestamp()`)
f(`timestamp_with_name()`)
f(`vector()`)
@ -7933,6 +8107,7 @@ func TestExecError(t *testing.T) {
f(`round(1, 1 or label_set(2, "xx", "foo"))`)
f(`histogram_quantile(1 or label_set(2, "xx", "foo"), 1)`)
f(`histogram_quantiles("foo", 1 or label_set(2, "xxx", "foo"), 2)`)
f(`sort_by_label_numeric(1, 2)`)
f(`label_set(1, 2, 3)`)
f(`label_set(1, "foo", (label_set(1, "foo", bar") or label_set(2, "xxx", "yy")))`)
f(`label_set(1, "foo", 3)`)

View file

@ -14,6 +14,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metricsql"
)
@ -105,6 +106,8 @@ var transformFuncs = map[string]transformFunc{
"sort": newTransformFuncSort(false),
"sort_by_label": newTransformFuncSortByLabel(false),
"sort_by_label_desc": newTransformFuncSortByLabel(true),
"sort_by_label_numeric": newTransformFuncNumericSort(false),
"sort_by_label_numeric_desc": newTransformFuncNumericSort(true),
"sort_desc": newTransformFuncSort(true),
"sqrt": newTransformFuncOneArg(transformSqrt),
"start": newTransformFuncZeroArgs(transformStart),
@ -1992,6 +1995,130 @@ func newTransformFuncSortByLabel(isDesc bool) transformFunc {
}
}
func newTransformFuncNumericSort(isDesc bool) transformFunc {
return func(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if len(args) < 2 {
return nil, fmt.Errorf("expecting at least 2 args; got %d args", len(args))
}
var labels []string
for i, arg := range args[1:] {
label, err := getString(arg, i+1)
if err != nil {
return nil, fmt.Errorf("cannot parse label #%d for sorting: %w", i+1, err)
}
labels = append(labels, label)
}
rvs := args[0]
sort.SliceStable(rvs, func(i, j int) bool {
for _, label := range labels {
a := rvs[i].MetricName.GetTagValue(label)
b := rvs[j].MetricName.GetTagValue(label)
if string(a) == string(b) {
continue
}
aStr := bytesutil.ToUnsafeString(a)
bStr := bytesutil.ToUnsafeString(b)
if isDesc {
return numericLess(bStr, aStr)
}
return numericLess(aStr, bStr)
}
return false
})
return rvs, nil
}
}
func numericLess(a, b string) bool {
for {
if len(b) == 0 {
return false
}
if len(a) == 0 {
return true
}
aPrefix := getNumPrefix(a)
bPrefix := getNumPrefix(b)
a = a[len(aPrefix):]
b = b[len(bPrefix):]
if len(aPrefix) > 0 || len(bPrefix) > 0 {
if len(aPrefix) == 0 {
return false
}
if len(bPrefix) == 0 {
return true
}
aNum := mustParseNum(aPrefix)
bNum := mustParseNum(bPrefix)
if aNum != bNum {
return aNum < bNum
}
}
aPrefix = getNonNumPrefix(a)
bPrefix = getNonNumPrefix(b)
a = a[len(aPrefix):]
b = b[len(bPrefix):]
if aPrefix != bPrefix {
return aPrefix < bPrefix
}
}
}
func getNumPrefix(s string) string {
i := 0
if len(s) > 0 {
switch s[0] {
case '-', '+':
i++
}
}
hasNum := false
hasDot := false
for i < len(s) {
if !isDecimalChar(s[i]) {
if !hasDot && s[i] == '.' {
hasDot = true
i++
continue
}
if !hasNum {
return ""
}
return s[:i]
}
hasNum = true
i++
}
if !hasNum {
return ""
}
return s
}
func getNonNumPrefix(s string) string {
i := 0
for i < len(s) {
if isDecimalChar(s[i]) {
return s[:i]
}
i++
}
return s
}
func isDecimalChar(ch byte) bool {
return ch >= '0' && ch <= '9'
}
func mustParseNum(s string) float64 {
f, err := strconv.ParseFloat(s, 64)
if err != nil {
logger.Panicf("BUG: unexpected error when parsing the number %q: %s", s, err)
}
return f
}
func newTransformFuncSort(isDesc bool) transformFunc {
return func(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args

View file

@ -3,6 +3,7 @@ package promql
import (
"fmt"
"reflect"
"strconv"
"strings"
"testing"
@ -220,3 +221,110 @@ func timeseriesToPromMetrics(tss []*timeseries) string {
}
return strings.Join(a, "\n")
}
func TestGetNumPrefix(t *testing.T) {
f := func(s, prefixExpected string) {
t.Helper()
prefix := getNumPrefix(s)
if prefix != prefixExpected {
t.Fatalf("unexpected getNumPrefix(%q): got %q; want %q", s, prefix, prefixExpected)
}
if len(prefix) > 0 {
if _, err := strconv.ParseFloat(prefix, 64); err != nil {
t.Fatalf("cannot parse num %q: %s", prefix, err)
}
}
}
f("", "")
f("foo", "")
f("-", "")
f(".", "")
f("-.", "")
f("+..", "")
f("1", "1")
f("12", "12")
f("1foo", "1")
f("-123", "-123")
f("-123bar", "-123")
f("+123", "+123")
f("+123.", "+123.")
f("+123..", "+123.")
f("+123.-", "+123.")
f("12.34..", "12.34")
f("-12.34..", "-12.34")
f("-12.-34..", "-12.")
}
func TestNumericLess(t *testing.T) {
f := func(a, b string, want bool) {
t.Helper()
if got := numericLess(a, b); got != want {
t.Fatalf("unexpected numericLess(%q, %q): got %v; want %v", a, b, got, want)
}
}
// empty strings
f("", "", false)
f("", "321", true)
f("321", "", false)
f("", "abc", true)
f("abc", "", false)
f("foo", "123", false)
f("123", "foo", true)
// same length numbers
f("123", "321", true)
f("321", "123", false)
f("123", "123", false)
// same length strings
f("a", "b", true)
f("b", "a", false)
f("a", "a", false)
// identical string prefix
f("foo123", "foo", false)
f("foo", "foo123", true)
f("foo", "foo", false)
// identical num prefix
f("123foo", "123bar", false)
f("123bar", "123foo", true)
f("123bar", "123bar", false)
// numbers with special chars
f("1:0:0", "1:0:2", true)
// numbers with special chars and different number rank
f("1:0:15", "1:0:2", false)
// multiple zeroes"
f("0", "00", false)
// only chars
f("aa", "ab", true)
// strings with different lengths
f("ab", "abc", true)
// multiple zeroes after equal char
f("a0001", "a0000001", false)
// short first string with numbers and highest rank
f("a10", "abcdefgh2", true)
// less as second string
f("a1b", "a01b", false)
// equal strings by length with different number rank
f("a001b01", "a01b001", false)
// different numbers rank
f("a01b001", "a001b01", false)
// different numbers rank
f("a01b001", "a001b01", false)
// highest char and number
f("a1", "a1x", true)
// highest number reverse chars
f("1b", "1ax", false)
// numbers with leading zero
f("082", "83", true)
// numbers with leading zero and chars
f("083a", "9a", false)
f("083a", "94a", true)
// negative number
f("-123", "123", true)
f("-123", "+123", true)
f("-123", "-123", false)
f("123", "-123", false)
// fractional number
f("12.9", "12.56", false)
f("12.56", "12.9", true)
f("12.9", "12.9", false)
}

View file

@ -23,8 +23,9 @@ The following tip changes can be tested by building VictoriaMetrics components f
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add `vm-native-step-interval` command line flag for `vm-native` mode. New option allows splitting the import process into chunks by time interval. This helps migrating data sets with high churn rate and provides better control over the process. See [feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2733).
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add `top queries` tab, which shows various stats for recently executed queries. See [these docs](https://docs.victoriametrics.com/#top-queries) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2707).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `debug` mode to the alerting rule settings for printing additional information into logs during evaluation. See `debug` param in [alerting rule config](https://docs.victoriametrics.com/vmalert.html#alerting-rules).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): minimize the time needed for reading large responses from scrape targets in [stream parsing mode](https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode). This should reduce scrape durations for such targets as [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) running in a big Kubernetes cluster.
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add experimental feature for displaying last 10 states of the rule (recording or alerting) evaluation. The state is available on the Rule page, which can be opened by clicking on `Details` link next to Rule's name on the `/groups` page.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): minimize the time needed for reading large responses from scrape targets in [stream parsing mode](https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode). This should reduce scrape durations for such targets as [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) running in a big Kubernetes cluster.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [sort_by_label_numeric](https://docs.victoriametrics.com/MetricsQL.html#sort_by_label_numeric) and [sort_by_label_numeric_desc](https://docs.victoriametrics.com/MetricsQL.html#sort_by_label_numeric_desc) functions for [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html) of input time series by the specified labels. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2938).
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly calculate `rate_over_sum(m[d])` as `sum_over_time(m[d])/d`. Previously the `sum_over_time(m[d])` could be improperly divided by smaller than `d` time range. See [rate_over_sum() docs](https://docs.victoriametrics.com/MetricsQL.html#rate_over_sum) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3045).
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly calculate query results at `vmselect`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3067). The issue has been introduced in [v1.81.0](https://docs.victoriametrics.com/CHANGELOG.html#v1810).

View file

@ -673,6 +673,16 @@ The list of supported transform functions:
`sort_by_label_desc(q, label1, ... labelN)` sorts series in descending order by the given set of labels. For example, `sort_by_label(foo, "bar")` would sort `foo` series by values of the label `bar` in these series. See also [sort_by_label](#sort_by_label).
#### sort_by_label_numeric
`sort_by_label_numeric(q, label1, ... labelN)` sorts series in ascending order by the given set of labels using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html). For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")` would return series in the following order of `bar` label values: `1`, `2`, `15` and `101`.
See also [sort_by_label_numeric_desc](#sort_by_label_numeric_desc) and [sort_by_label](#sort_by_label).
#### sort_by_label_numeric_desc
`sort_by_label_numeric_desc(q, label1, ... labelN)` sorts series in descending order by the given set of labels using [numeric sort](https://www.gnu.org/software/coreutils/manual/html_node/Version-sort-is-not-the-same-as-numeric-sort.html). For example, if `foo` series have `bar` label with values `1`, `101`, `15` and `2`, then `sort_by_label_numeric(foo, "bar")` would return series in the following order of `bar` label values: `101`, `15`, `2` and `1`.
See also [sort_by_label_numeric](#sort_by_label_numeric) and [sort_by_label_desc](#sort_by_label_desc).
#### sort_desc
`sort_desc(q)` sorts series in descending order by the last point in every time series returned by `q`. This function is supported by PromQL. See also [sort](#sort).

2
go.mod
View file

@ -10,7 +10,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.22.2
github.com/VictoriaMetrics/metricsql v0.44.1
github.com/VictoriaMetrics/metricsql v0.45.0
github.com/aws/aws-sdk-go v1.44.96
github.com/cespare/xxhash/v2 v2.1.2

4
go.sum
View file

@ -111,8 +111,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metrics v1.22.2 h1:A6LsNidYwkAHetxsvNFaUWjtzu5ltdgNEoS6i7Bn+6I=
github.com/VictoriaMetrics/metrics v1.22.2/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc=
github.com/VictoriaMetrics/metricsql v0.44.1 h1:qGoRt0g84uMUscVjS7P3uDZKmjJubWKaIx9v0iHKgck=
github.com/VictoriaMetrics/metricsql v0.44.1/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VictoriaMetrics/metricsql v0.45.0 h1:kVQHnkDJm4qyJ8f5msTclmwqAtlUdPbbEJ7zoa/FTNs=
github.com/VictoriaMetrics/metricsql v0.45.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=

View file

@ -11,5 +11,4 @@
// }
// // Now expr contains parsed MetricsQL as `*Expr` structs.
// // See Parse examples for more details.
//
package metricsql

View file

@ -91,6 +91,8 @@ var transformFuncs = map[string]bool{
"sort": true,
"sort_by_label": true,
"sort_by_label_desc": true,
"sort_by_label_numeric": true,
"sort_by_label_numeric_desc": true,
"sort_desc": true,
"sqrt": true,
"start": true,

2
vendor/modules.txt vendored
View file

@ -27,7 +27,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.22.2
## explicit; go 1.15
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.44.1
# github.com/VictoriaMetrics/metricsql v0.45.0
## explicit; go 1.13
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop