lib/logstorage: support order alias for sort pipe

Now the following queries are equivalents:

    _time:5s | sort by (_time)

    _time:5s | order by (_time)

This is needed for convenience, since `order by` is commonly used in other query languages such as SQL.
This commit is contained in:
Aliaksandr Valialkin 2024-09-29 09:33:31 +02:00
parent 14a0396f53
commit 8772aea24b
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
6 changed files with 15 additions and 6 deletions

View file

@ -16,6 +16,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
## tip
* FEATURE: [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/): accept Unix timestamps in seconds in the ingested logs.
* FEATURE: [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe): allow using `order` alias instead of `sort`. For example, `_time:5s | order by (_time)` query works the same as `_time:5s | sort by (_time)`.
## [v0.31.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.31.0-victorialogs)

View file

@ -2110,6 +2110,12 @@ The `by` keyword can be skipped in `sort ...` pipe. For example, the following q
_time:5m | sort (foo, bar) desc
```
The `order` alias can be used instead of `sort`, so the following query is equivalent to the previous one:
```logsql
_time:5m | order by (foo, bar) desc
```
Sorting of big number of logs can consume a lot of CPU time and memory. Sometimes it is enough to return the first `N` entries with the biggest
or the smallest values. This can be done by adding `limit N` to the end of `sort ...` pipe.
Such a query consumes lower amounts of memory when sorting big number of logs, since it keeps in memory only `N` log entries.

View file

@ -1150,9 +1150,11 @@ func TestParseQuerySuccess(t *testing.T) {
// sort pipe
f(`* | sort`, `* | sort`)
f(`* | order`, `* | sort`)
f(`* | sort desc`, `* | sort desc`)
f(`* | sort by()`, `* | sort`)
f(`* | sort bY (foo)`, `* | sort by (foo)`)
f(`* | ORDer bY (foo)`, `* | sort by (foo)`)
f(`* | sORt bY (_time, _stream DEsc, host)`, `* | sort by (_time, _stream desc, host)`)
f(`* | sort bY (foo desc, bar,) desc`, `* | sort by (foo desc, bar) desc`)
f(`* | sort limit 10`, `* | sort limit 10`)

View file

@ -213,7 +213,7 @@ func parsePipe(lex *lexer) (pipe, error) {
return nil, fmt.Errorf("cannot parse 'replace_regexp' pipe: %w", err)
}
return pr, nil
case lex.isKeyword("sort"):
case lex.isKeyword("sort"), lex.isKeyword("order"):
ps, err := parsePipeSort(lex)
if err != nil {
return nil, fmt.Errorf("cannot parse 'sort' pipe: %w", err)
@ -309,7 +309,7 @@ var pipeNames = func() map[string]struct{} {
"rename", "mv",
"replace",
"replace_regexp",
"sort",
"sort", "order",
"stats",
"stream_context",
"top",

View file

@ -748,8 +748,8 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
}
func parsePipeSort(lex *lexer) (*pipeSort, error) {
if !lex.isKeyword("sort") {
return nil, fmt.Errorf("expecting 'sort'; got %q", lex.token)
if !lex.isKeyword("sort") && !lex.isKeyword("order") {
return nil, fmt.Errorf("expecting 'sort' or 'order'; got %q", lex.token)
}
lex.nextToken()

View file

@ -308,7 +308,7 @@ func TestPipeSort(t *testing.T) {
})
// Sort by multiple fields with offset and limit
f("sort by (a, b) desc offset 2 limit 100", [][]Field{
f("order by (a, b) desc offset 2 limit 100", [][]Field{
{
{"_msg", `abc`},
{"a", `2`},
@ -360,5 +360,5 @@ func TestPipeSortUpdateNeededFields(t *testing.T) {
// needed fields intersect with src
f("sort by(s1,s2)", "s1,f1,f2", "", "s1,s2,f1,f2", "")
f("sort by(s1,s2) rank as x", "s1,f1,f2,x", "", "s1,s2,f1,f2", "")
f("order by(s1,s2) rank as x", "s1,f1,f2,x", "", "s1,s2,f1,f2", "")
}