lib/logstorage: add an ability to return rank from top pipe results

This commit is contained in:
Aliaksandr Valialkin 2024-10-29 15:37:07 +01:00
parent 7a62eefa34
commit 3c06d083ea
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
4 changed files with 126 additions and 4 deletions

View file

@ -19,6 +19,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
* FEATURE: improve performance for queries over large volume of logs with big number of [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) (aka `wide events`). * FEATURE: improve performance for queries over large volume of logs with big number of [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) (aka `wide events`).
* FEATURE: improve performance for [`/select/logsql/field_values` HTTP endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values). * FEATURE: improve performance for [`/select/logsql/field_values` HTTP endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values).
* FEATURE: improve performance for [`field_values` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) when it is applied directly to [log filter](https://docs.victoriametrics.com/victorialogs/logsql/#filters). * FEATURE: improve performance for [`field_values` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#field_values-pipe) when it is applied directly to [log filter](https://docs.victoriametrics.com/victorialogs/logsql/#filters).
* FEATURE: add an ability to return `rank` field from [`top` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe). For example, the following query returns `1..5` rank per each returned `ip` with the biggest number of logs over the last 5 minute: `_time:5m | top 5 by (ip) with rank`.
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix various glitches with updating query responses. The issue was introduced in [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7279). * BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix various glitches with updating query responses. The issue was introduced in [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7279).

View file

@ -2400,6 +2400,18 @@ For example, the following query is equivalent to the previous one:
_time:5m | fields ip | top _time:5m | fields ip | top
``` ```
It is possible to set `rank` field per each returned entry for `top` pipe by adding `with rank`. For example, the following query sets the `rank` field per each returned `ip`:
```logsql
_time:5m | top 10 by (ip) with rank
```
The `rank` field can have other name. For example, the following query uses the `position` field name instead of `rank` field name in the output:
```logsql
_time:5m | top 10 by (ip) with rank as position
```
See also: See also:
- [`uniq` pipe](#uniq-pipe) - [`uniq` pipe](#uniq-pipe)

View file

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"slices" "slices"
"sort" "sort"
"strconv"
"strings" "strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
@ -34,8 +35,11 @@ type pipeTop struct {
// limitStr is string representation of the limit. // limitStr is string representation of the limit.
limitStr string limitStr string
// if hitsFieldName isn't empty, then the number of hits per each unique value is returned in this field. // the number of hits per each unique value is returned in this field.
hitsFieldName string hitsFieldName string
// if rankFieldName isn't empty, then the rank per each unique value is returned in this field.
rankFieldName string
} }
func (pt *pipeTop) String() string { func (pt *pipeTop) String() string {
@ -46,6 +50,12 @@ func (pt *pipeTop) String() string {
if len(pt.byFields) > 0 { if len(pt.byFields) > 0 {
s += " by (" + fieldNamesString(pt.byFields) + ")" s += " by (" + fieldNamesString(pt.byFields) + ")"
} }
if pt.rankFieldName != "" {
s += " with rank"
if pt.rankFieldName != "rank" {
s += " as " + pt.rankFieldName
}
}
return s return s
} }
@ -273,8 +283,20 @@ func (ptp *pipeTopProcessor) flush() error {
return dst return dst
} }
addRankField := func(dst []Field, rank int) []Field {
if ptp.pt.rankFieldName == "" {
return dst
}
rankStr := strconv.Itoa(rank + 1)
dst = append(dst, Field{
Name: ptp.pt.rankFieldName,
Value: rankStr,
})
return dst
}
if len(byFields) == 0 { if len(byFields) == 0 {
for _, e := range entries { for i, e := range entries {
if needStop(ptp.stopCh) { if needStop(ptp.stopCh) {
return nil return nil
} }
@ -300,11 +322,12 @@ func (ptp *pipeTopProcessor) flush() error {
}) })
} }
rowFields = addHitsField(rowFields, e.hits) rowFields = addHitsField(rowFields, e.hits)
rowFields = addRankField(rowFields, i)
wctx.writeRow(rowFields) wctx.writeRow(rowFields)
} }
} else if len(byFields) == 1 { } else if len(byFields) == 1 {
fieldName := byFields[0] fieldName := byFields[0]
for _, e := range entries { for i, e := range entries {
if needStop(ptp.stopCh) { if needStop(ptp.stopCh) {
return nil return nil
} }
@ -314,10 +337,11 @@ func (ptp *pipeTopProcessor) flush() error {
Value: e.k, Value: e.k,
}) })
rowFields = addHitsField(rowFields, e.hits) rowFields = addHitsField(rowFields, e.hits)
rowFields = addRankField(rowFields, i)
wctx.writeRow(rowFields) wctx.writeRow(rowFields)
} }
} else { } else {
for _, e := range entries { for i, e := range entries {
if needStop(ptp.stopCh) { if needStop(ptp.stopCh) {
return nil return nil
} }
@ -339,6 +363,7 @@ func (ptp *pipeTopProcessor) flush() error {
fieldIdx++ fieldIdx++
} }
rowFields = addHitsField(rowFields, e.hits) rowFields = addHitsField(rowFields, e.hits)
rowFields = addRankField(rowFields, i)
wctx.writeRow(rowFields) wctx.writeRow(rowFields)
} }
} }
@ -660,5 +685,26 @@ func parsePipeTop(lex *lexer) (*pipeTop, error) {
hitsFieldName: hitsFieldName, hitsFieldName: hitsFieldName,
} }
if !lex.isKeyword("with") {
return pt, nil
}
lex.nextToken()
if !lex.isKeyword("rank") {
return nil, fmt.Errorf("missing 'rank' word after 'with' in [%s]", pt)
}
lex.nextToken()
pt.rankFieldName = "rank"
if lex.isKeyword("as") {
lex.nextToken()
if lex.isKeyword("", "|", ")", "(") {
return nil, fmt.Errorf("missing rank name in [%s as]", pt)
}
}
if !lex.isKeyword("", "|", ")") {
pt.rankFieldName = lex.token
lex.nextToken()
}
return pt, nil return pt, nil
} }

View file

@ -11,11 +11,15 @@ func TestParsePipeTopSuccess(t *testing.T) {
} }
f(`top`) f(`top`)
f(`top with rank`)
f(`top 5`) f(`top 5`)
f(`top 5 with rank as foo`)
f(`top by (x)`) f(`top by (x)`)
f(`top 5 by (x)`) f(`top 5 by (x)`)
f(`top by (x, y)`) f(`top by (x, y)`)
f(`top 5 by (x, y)`) f(`top 5 by (x, y)`)
f(`top by (x) with rank`)
f(`top by (x) with rank as foo`)
} }
func TestParsePipeTopFailure(t *testing.T) { func TestParsePipeTopFailure(t *testing.T) {
@ -30,6 +34,8 @@ func TestParsePipeTopFailure(t *testing.T) {
f(`top 5foo`) f(`top 5foo`)
f(`top foo`) f(`top foo`)
f(`top by`) f(`top by`)
f(`top (x) with`)
f(`top (x) with rank as`)
} }
func TestPipeTop(t *testing.T) { func TestPipeTop(t *testing.T) {
@ -66,6 +72,36 @@ func TestPipeTop(t *testing.T) {
}, },
}) })
f("top with rank", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"a", "2"},
{"b", "3"},
{"hits", "2"},
{"rank", "1"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
{"hits", "1"},
{"rank", "2"},
},
})
f("top 1", [][]Field{ f("top 1", [][]Field{
{ {
{"a", `2`}, {"a", `2`},
@ -134,6 +170,33 @@ func TestPipeTop(t *testing.T) {
}, },
}) })
f("top by (b) with rank as x", [][]Field{
{
{"a", `2`},
{"b", `3`},
},
{
{"a", "2"},
{"b", "3"},
},
{
{"a", `2`},
{"b", `54`},
{"c", "d"},
},
}, [][]Field{
{
{"b", "3"},
{"hits", "2"},
{"x", "1"},
},
{
{"b", "54"},
{"hits", "1"},
{"x", "2"},
},
})
f("top by (hits)", [][]Field{ f("top by (hits)", [][]Field{
{ {
{"a", `2`}, {"a", `2`},