mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-20 15:16:42 +00:00
wip
This commit is contained in:
parent
f26d593c7b
commit
87183112f3
5 changed files with 101 additions and 3 deletions
|
@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
|
||||||
|
|
||||||
## tip
|
## tip
|
||||||
|
|
||||||
|
* FEATURE: add [`quantile`](https://docs.victoriametrics.com/victorialogs/logsql/#quantile-stats) and [`median`](https://docs.victoriametrics.com/victorialogs/logsql/#median-stats) [stats functions](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe).
|
||||||
|
|
||||||
## [v0.6.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.6.1-victorialogs)
|
## [v0.6.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.6.1-victorialogs)
|
||||||
|
|
||||||
Released at 2024-05-14
|
Released at 2024-05-14
|
||||||
|
|
|
@ -1371,6 +1371,7 @@ LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
|
||||||
- [`count_empty`](#count_empty-stats) calculates the number logs with empty [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`count_empty`](#count_empty-stats) calculates the number logs with empty [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`count_uniq`](#count_uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`count_uniq`](#count_uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
|
- [`median`](#median-stats) calcualtes the [median](https://en.wikipedia.org/wiki/Median) value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`quantile`](#quantile-stats) calculates the given quantile for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`quantile`](#quantile-stats) calculates the given quantile for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
|
@ -1393,9 +1394,10 @@ _time:5m | stats avg(duration) avg_duration
|
||||||
|
|
||||||
See also:
|
See also:
|
||||||
|
|
||||||
|
- [`median`](#median-stats)
|
||||||
|
- [`quantile`](#quantile-stats)
|
||||||
- [`min`](#min-stats)
|
- [`min`](#min-stats)
|
||||||
- [`max`](#max-stats)
|
- [`max`](#max-stats)
|
||||||
- [`quantile`](#quantile-stats)
|
|
||||||
- [`sum`](#sum-stats)
|
- [`sum`](#sum-stats)
|
||||||
- [`count`](#count-stats)
|
- [`count`](#count-stats)
|
||||||
|
|
||||||
|
@ -1500,6 +1502,23 @@ See also:
|
||||||
- [`sum`](#sum-stats)
|
- [`sum`](#sum-stats)
|
||||||
- [`count`](#count-stats)
|
- [`count`](#count-stats)
|
||||||
|
|
||||||
|
### median stats
|
||||||
|
|
||||||
|
`median(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the [median](https://en.wikipedia.org/wiki/Median) value across
|
||||||
|
the give numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
|
|
||||||
|
For example, the following query return median for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||||
|
over logs for the last 5 minutes:
|
||||||
|
|
||||||
|
```logsql
|
||||||
|
_time:5m | stats median(duration) median_duration
|
||||||
|
```
|
||||||
|
|
||||||
|
See also:
|
||||||
|
|
||||||
|
- [`quantile`](#quantile-stats)
|
||||||
|
- [`avg`](#avg-stats)
|
||||||
|
|
||||||
### min stats
|
### min stats
|
||||||
|
|
||||||
`min(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the minimum value across
|
`min(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the minimum value across
|
||||||
|
@ -1541,6 +1560,7 @@ See also:
|
||||||
|
|
||||||
- [`min`](#min-stats)
|
- [`min`](#min-stats)
|
||||||
- [`max`](#max-stats)
|
- [`max`](#max-stats)
|
||||||
|
- [`median`](#median-stats)
|
||||||
- [`avg`](#avg-stats)
|
- [`avg`](#avg-stats)
|
||||||
|
|
||||||
### sum stats
|
### sum stats
|
||||||
|
@ -1674,8 +1694,6 @@ Stats over the selected logs can be calculated via [`stats` pipe](#stats-pipe).
|
||||||
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||||
and fields created by [transformations](#transformations):
|
and fields created by [transformations](#transformations):
|
||||||
|
|
||||||
- The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field.
|
|
||||||
|
|
||||||
It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats.
|
It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats.
|
||||||
For example, `sum(response_size) if (is_admin:true)` calculates the total response size for admins only.
|
For example, `sum(response_size) if (is_admin:true)` calculates the total response size for admins only.
|
||||||
|
|
||||||
|
|
|
@ -940,6 +940,13 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||||
f(`* | stats quantile(0.99, *) bar`, `* | stats quantile(0.99, *) as bar`)
|
f(`* | stats quantile(0.99, *) bar`, `* | stats quantile(0.99, *) as bar`)
|
||||||
f(`* | stats quantile(0.99, a, *, b) bar`, `* | stats quantile(0.99, *) as bar`)
|
f(`* | stats quantile(0.99, a, *, b) bar`, `* | stats quantile(0.99, *) as bar`)
|
||||||
|
|
||||||
|
// stats pipe median
|
||||||
|
f(`* | stats Median(foo) bar`, `* | stats median(foo) as bar`)
|
||||||
|
f(`* | stats BY(x, y, ) MEDIAN(foo,bar,) bar`, `* | stats by (x, y) median(foo, bar) as bar`)
|
||||||
|
f(`* | stats median() x`, `* | stats median(*) as x`)
|
||||||
|
f(`* | stats median(*) x`, `* | stats median(*) as x`)
|
||||||
|
f(`* | stats median(foo,*,bar) x`, `* | stats median(*) as x`)
|
||||||
|
|
||||||
// stats pipe multiple funcs
|
// stats pipe multiple funcs
|
||||||
f(`* | stats count() "foo.bar:baz", count_uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", count_uniq(a) as bar`)
|
f(`* | stats count() "foo.bar:baz", count_uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", count_uniq(a) as bar`)
|
||||||
f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`)
|
f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`)
|
||||||
|
|
|
@ -546,6 +546,12 @@ func parseStatsFunc(lex *lexer) (statsFunc, string, error) {
|
||||||
return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err)
|
return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err)
|
||||||
}
|
}
|
||||||
sf = sqs
|
sf = sqs
|
||||||
|
case lex.isKeyword("median"):
|
||||||
|
sms, err := parseStatsMedian(lex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, "", fmt.Errorf("cannot parse 'median' func: %w", err)
|
||||||
|
}
|
||||||
|
sf = sms
|
||||||
default:
|
default:
|
||||||
return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
|
return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
|
||||||
}
|
}
|
||||||
|
|
65
lib/logstorage/stats_median.go
Normal file
65
lib/logstorage/stats_median.go
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
package logstorage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"slices"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
type statsMedian struct {
|
||||||
|
fields []string
|
||||||
|
containsStar bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sm *statsMedian) String() string {
|
||||||
|
return "median(" + fieldNamesString(sm.fields) + ")"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sm *statsMedian) neededFields() []string {
|
||||||
|
return sm.fields
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sm *statsMedian) newStatsProcessor() (statsProcessor, int) {
|
||||||
|
smp := &statsMedianProcessor{
|
||||||
|
sqp: &statsQuantileProcessor{
|
||||||
|
sq: &statsQuantile{
|
||||||
|
fields: sm.fields,
|
||||||
|
containsStar: sm.containsStar,
|
||||||
|
phi: 0.5,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return smp, int(unsafe.Sizeof(*smp)) + int(unsafe.Sizeof(*smp.sqp)) + int(unsafe.Sizeof(*smp.sqp.sq))
|
||||||
|
}
|
||||||
|
|
||||||
|
type statsMedianProcessor struct {
|
||||||
|
sqp *statsQuantileProcessor
|
||||||
|
}
|
||||||
|
|
||||||
|
func (smp *statsMedianProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||||
|
return smp.sqp.updateStatsForAllRows(br)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (smp *statsMedianProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
|
||||||
|
return smp.sqp.updateStatsForRow(br, rowIdx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (smp *statsMedianProcessor) mergeState(sfp statsProcessor) {
|
||||||
|
src := sfp.(*statsMedianProcessor)
|
||||||
|
smp.sqp.mergeState(src.sqp)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (smp *statsMedianProcessor) finalizeStats() string {
|
||||||
|
return smp.sqp.finalizeStats()
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseStatsMedian(lex *lexer) (*statsMedian, error) {
|
||||||
|
fields, err := parseFieldNamesForStatsFunc(lex, "median")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
sm := &statsMedian{
|
||||||
|
fields: fields,
|
||||||
|
containsStar: slices.Contains(fields, "*"),
|
||||||
|
}
|
||||||
|
return sm, nil
|
||||||
|
}
|
Loading…
Reference in a new issue