This commit is contained in:
Aliaksandr Valialkin 2024-05-14 22:31:21 +02:00
parent f26d593c7b
commit 87183112f3
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 101 additions and 3 deletions

View file

@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip ## tip
* FEATURE: add [`quantile`](https://docs.victoriametrics.com/victorialogs/logsql/#quantile-stats) and [`median`](https://docs.victoriametrics.com/victorialogs/logsql/#median-stats) [stats functions](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe).
## [v0.6.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.6.1-victorialogs) ## [v0.6.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.6.1-victorialogs)
Released at 2024-05-14 Released at 2024-05-14

View file

@ -1371,6 +1371,7 @@ LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
- [`count_empty`](#count_empty-stats) calculates the number logs with empty [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`count_empty`](#count_empty-stats) calculates the number logs with empty [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`count_uniq`](#count_uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`count_uniq`](#count_uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`median`](#median-stats) calcualtes the [median](https://en.wikipedia.org/wiki/Median) value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`quantile`](#quantile-stats) calculates the given quantile for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`quantile`](#quantile-stats) calculates the given quantile for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
- [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
@ -1393,9 +1394,10 @@ _time:5m | stats avg(duration) avg_duration
See also: See also:
- [`median`](#median-stats)
- [`quantile`](#quantile-stats)
- [`min`](#min-stats) - [`min`](#min-stats)
- [`max`](#max-stats) - [`max`](#max-stats)
- [`quantile`](#quantile-stats)
- [`sum`](#sum-stats) - [`sum`](#sum-stats)
- [`count`](#count-stats) - [`count`](#count-stats)
@ -1500,6 +1502,23 @@ See also:
- [`sum`](#sum-stats) - [`sum`](#sum-stats)
- [`count`](#count-stats) - [`count`](#count-stats)
### median stats
`median(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the [median](https://en.wikipedia.org/wiki/Median) value across
the give numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
For example, the following query return median for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
over logs for the last 5 minutes:
```logsql
_time:5m | stats median(duration) median_duration
```
See also:
- [`quantile`](#quantile-stats)
- [`avg`](#avg-stats)
### min stats ### min stats
`min(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the minimum value across `min(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the minimum value across
@ -1541,6 +1560,7 @@ See also:
- [`min`](#min-stats) - [`min`](#min-stats)
- [`max`](#max-stats) - [`max`](#max-stats)
- [`median`](#median-stats)
- [`avg`](#avg-stats) - [`avg`](#avg-stats)
### sum stats ### sum stats
@ -1674,8 +1694,6 @@ Stats over the selected logs can be calculated via [`stats` pipe](#stats-pipe).
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
and fields created by [transformations](#transformations): and fields created by [transformations](#transformations):
- The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field.
It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats. It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats.
For example, `sum(response_size) if (is_admin:true)` calculates the total response size for admins only. For example, `sum(response_size) if (is_admin:true)` calculates the total response size for admins only.

View file

@ -940,6 +940,13 @@ func TestParseQuerySuccess(t *testing.T) {
f(`* | stats quantile(0.99, *) bar`, `* | stats quantile(0.99, *) as bar`) f(`* | stats quantile(0.99, *) bar`, `* | stats quantile(0.99, *) as bar`)
f(`* | stats quantile(0.99, a, *, b) bar`, `* | stats quantile(0.99, *) as bar`) f(`* | stats quantile(0.99, a, *, b) bar`, `* | stats quantile(0.99, *) as bar`)
// stats pipe median
f(`* | stats Median(foo) bar`, `* | stats median(foo) as bar`)
f(`* | stats BY(x, y, ) MEDIAN(foo,bar,) bar`, `* | stats by (x, y) median(foo, bar) as bar`)
f(`* | stats median() x`, `* | stats median(*) as x`)
f(`* | stats median(*) x`, `* | stats median(*) as x`)
f(`* | stats median(foo,*,bar) x`, `* | stats median(*) as x`)
// stats pipe multiple funcs // stats pipe multiple funcs
f(`* | stats count() "foo.bar:baz", count_uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", count_uniq(a) as bar`) f(`* | stats count() "foo.bar:baz", count_uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", count_uniq(a) as bar`)
f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`) f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`)

View file

@ -546,6 +546,12 @@ func parseStatsFunc(lex *lexer) (statsFunc, string, error) {
return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err) return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err)
} }
sf = sqs sf = sqs
case lex.isKeyword("median"):
sms, err := parseStatsMedian(lex)
if err != nil {
return nil, "", fmt.Errorf("cannot parse 'median' func: %w", err)
}
sf = sms
default: default:
return nil, "", fmt.Errorf("unknown stats func %q", lex.token) return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
} }

View file

@ -0,0 +1,65 @@
package logstorage
import (
"slices"
"unsafe"
)
type statsMedian struct {
fields []string
containsStar bool
}
func (sm *statsMedian) String() string {
return "median(" + fieldNamesString(sm.fields) + ")"
}
func (sm *statsMedian) neededFields() []string {
return sm.fields
}
func (sm *statsMedian) newStatsProcessor() (statsProcessor, int) {
smp := &statsMedianProcessor{
sqp: &statsQuantileProcessor{
sq: &statsQuantile{
fields: sm.fields,
containsStar: sm.containsStar,
phi: 0.5,
},
},
}
return smp, int(unsafe.Sizeof(*smp)) + int(unsafe.Sizeof(*smp.sqp)) + int(unsafe.Sizeof(*smp.sqp.sq))
}
type statsMedianProcessor struct {
sqp *statsQuantileProcessor
}
func (smp *statsMedianProcessor) updateStatsForAllRows(br *blockResult) int {
return smp.sqp.updateStatsForAllRows(br)
}
func (smp *statsMedianProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
return smp.sqp.updateStatsForRow(br, rowIdx)
}
func (smp *statsMedianProcessor) mergeState(sfp statsProcessor) {
src := sfp.(*statsMedianProcessor)
smp.sqp.mergeState(src.sqp)
}
func (smp *statsMedianProcessor) finalizeStats() string {
return smp.sqp.finalizeStats()
}
func parseStatsMedian(lex *lexer) (*statsMedian, error) {
fields, err := parseFieldNamesForStatsFunc(lex, "median")
if err != nil {
return nil, err
}
sm := &statsMedian{
fields: fields,
containsStar: slices.Contains(fields, "*"),
}
return sm, nil
}