lib/logstorage: add an ability to return rank from top pipe results

This commit is contained in:
Aliaksandr Valialkin 2024-10-29 15:37:07 +01:00
parent 7a62eefa34
commit 3c06d083ea
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
4 changed files with 126 additions and 4 deletions

View file

@ -19,6 +19,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
* FEATURE: improve performance for queries over large volume of logs with big number of [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) (aka `wide events`).
* FEATURE: improve performance for [`/select/logsql/field_values` HTTP endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values).
* FEATURE: improve performance for [`field_values` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) when it is applied directly to [log filter](https://docs.victoriametrics.com/victorialogs/logsql/#filters).
* FEATURE: add an ability to return `rank` field from [`top` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe). For example, the following query returns `1..5` rank per each returned `ip` with the biggest number of logs over the last 5 minute: `_time:5m | top 5 by (ip) with rank`.
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix various glitches with updating query responses. The issue was introduced in [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7279).

View file

@ -2400,6 +2400,18 @@ For example, the following query is equivalent to the previous one:
_time:5m | fields ip | top
```
It is possible to set `rank` field per each returned entry for `top` pipe by adding `with rank`. For example, the following query sets the `rank` field per each returned `ip`:
```logsql
_time:5m | top 10 by (ip) with rank
```
The `rank` field can have other name. For example, the following query uses the `position` field name instead of `rank` field name in the output:
```logsql
_time:5m | top 10 by (ip) with rank as position
```
See also:
- [`uniq` pipe](#uniq-pipe)

View file

@ -5,6 +5,7 @@ import (
"fmt"
"slices"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
@ -34,8 +35,11 @@ type pipeTop struct {
// limitStr is string representation of the limit.
limitStr string
// if hitsFieldName isn't empty, then the number of hits per each unique value is returned in this field.
// the number of hits per each unique value is returned in this field.
hitsFieldName string
// if rankFieldName isn't empty, then the rank per each unique value is returned in this field.
rankFieldName string
}
func (pt *pipeTop) String() string {
@ -46,6 +50,12 @@ func (pt *pipeTop) String() string {
if len(pt.byFields) > 0 {
s += " by (" + fieldNamesString(pt.byFields) + ")"
}
if pt.rankFieldName != "" {
s += " with rank"
if pt.rankFieldName != "rank" {
s += " as " + pt.rankFieldName
}
}
return s
}
@ -273,8 +283,20 @@ func (ptp *pipeTopProcessor) flush() error {
return dst
}
addRankField := func(dst []Field, rank int) []Field {
if ptp.pt.rankFieldName == "" {
return dst
}
rankStr := strconv.Itoa(rank + 1)
dst = append(dst, Field{
Name: ptp.pt.rankFieldName,
Value: rankStr,
})
return dst
}
if len(byFields) == 0 {
for _, e := range entries {
for i, e := range entries {
if needStop(ptp.stopCh) {
return nil
}
@ -300,11 +322,12 @@ func (ptp *pipeTopProcessor) flush() error {
})
}
rowFields = addHitsField(rowFields, e.hits)
rowFields = addRankField(rowFields, i)
wctx.writeRow(rowFields)
}
} else if len(byFields) == 1 {
fieldName := byFields[0]
for _, e := range entries {
for i, e := range entries {
if needStop(ptp.stopCh) {
return nil
}
@ -314,10 +337,11 @@ func (ptp *pipeTopProcessor) flush() error {
Value: e.k,
})
rowFields = addHitsField(rowFields, e.hits)
rowFields = addRankField(rowFields, i)
wctx.writeRow(rowFields)
}
} else {
for _, e := range entries {
for i, e := range entries {
if needStop(ptp.stopCh) {
return nil
}
@ -339,6 +363,7 @@ func (ptp *pipeTopProcessor) flush() error {
fieldIdx++
}
rowFields = addHitsField(rowFields, e.hits)
rowFields = addRankField(rowFields, i)
wctx.writeRow(rowFields)
}
}
@ -660,5 +685,26 @@ func parsePipeTop(lex *lexer) (*pipeTop, error) {
hitsFieldName: hitsFieldName,
}
if !lex.isKeyword("with") {
return pt, nil
}
lex.nextToken()
if !lex.isKeyword("rank") {
return nil, fmt.Errorf("missing 'rank' word after 'with' in [%s]", pt)
}
lex.nextToken()
pt.rankFieldName = "rank"
if lex.isKeyword("as") {
lex.nextToken()
if lex.isKeyword("", "|", ")", "(") {
return nil, fmt.Errorf("missing rank name in [%s as]", pt)
}
}
if !lex.isKeyword("", "|", ")") {
pt.rankFieldName = lex.token
lex.nextToken()
}
return pt, nil
}

View file

@ -11,11 +11,15 @@ func TestParsePipeTopSuccess(t *testing.T) {
}
f(`top`)
f(`top with rank`)
f(`top 5`)
f(`top 5 with rank as foo`)
f(`top by (x)`)
f(`top 5 by (x)`)
f(`top by (x, y)`)
f(`top 5 by (x, y)`)
f(`top by (x) with rank`)
f(`top by (x) with rank as foo`)
}
func TestParsePipeTopFailure(t *testing.T) {
@ -30,6 +34,8 @@ func TestParsePipeTopFailure(t *testing.T) {
f(`top 5foo`)
f(`top foo`)
f(`top by`)
f(`top (x) with`)
f(`top (x) with rank as`)
}
func TestPipeTop(t *testing.T) {
@ -66,6 +72,36 @@ func TestPipeTop(t *testing.T) {
},
})
f("top with rank", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"a", "2"},
{"b", "3"},
{"hits", "2"},
{"rank", "1"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
{"hits", "1"},
{"rank", "2"},
},
})
f("top 1", [][]Field{
{
{"a", `2`},
@ -134,6 +170,33 @@ func TestPipeTop(t *testing.T) {
},
})
f("top by (b) with rank as x", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"b", "3"},
{"hits", "2"},
{"x", "1"},
},
{
{"b", "54"},
{"hits", "1"},
{"x", "2"},
},
})
f("top by (hits)", [][]Field{
{
{"a", `2`},