From 87183112f3284b5cff8cf95b3e5e7b8d081e2df0 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 14 May 2024 22:31:21 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/CHANGELOG.md | 2 ++ docs/VictoriaLogs/LogsQL.md | 24 +++++++++++-- lib/logstorage/parser_test.go | 7 ++++ lib/logstorage/pipe_stats.go | 6 ++++ lib/logstorage/stats_median.go | 65 ++++++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 lib/logstorage/stats_median.go diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 7f5788449..f88962963 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta ## tip +* FEATURE: add [`quantile`](https://docs.victoriametrics.com/victorialogs/logsql/#quantile-stats) and [`median`](https://docs.victoriametrics.com/victorialogs/logsql/#median-stats) [stats functions](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe). + ## [v0.6.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.6.1-victorialogs) Released at 2024-05-14 diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index 330183be6..b4de1dd9a 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -1371,6 +1371,7 @@ LogsQL supports the following functions for [`stats` pipe](#stats-pipe): - [`count_empty`](#count_empty-stats) calculates the number logs with empty [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`count_uniq`](#count_uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). +- [`median`](#median-stats) calcualtes the [median](https://en.wikipedia.org/wiki/Median) value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`quantile`](#quantile-stats) calculates the given quantile for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). - [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). @@ -1393,9 +1394,10 @@ _time:5m | stats avg(duration) avg_duration See also: +- [`median`](#median-stats) +- [`quantile`](#quantile-stats) - [`min`](#min-stats) - [`max`](#max-stats) -- [`quantile`](#quantile-stats) - [`sum`](#sum-stats) - [`count`](#count-stats) @@ -1500,6 +1502,23 @@ See also: - [`sum`](#sum-stats) - [`count`](#count-stats) +### median stats + +`median(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the [median](https://en.wikipedia.org/wiki/Median) value across +the give numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). + +For example, the following query return median for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) +over logs for the last 5 minutes: + +```logsql +_time:5m | stats median(duration) median_duration +``` + +See also: + +- [`quantile`](#quantile-stats) +- [`avg`](#avg-stats) + ### min stats `min(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the minimum value across @@ -1541,6 +1560,7 @@ See also: - [`min`](#min-stats) - [`max`](#max-stats) +- [`median`](#median-stats) - [`avg`](#avg-stats) ### sum stats @@ -1674,8 +1694,6 @@ Stats over the selected logs can be calculated via [`stats` pipe](#stats-pipe). LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) and fields created by [transformations](#transformations): -- The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field. - It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats. For example, `sum(response_size) if (is_admin:true)` calculates the total response size for admins only. diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index 78046ec20..0595c2de0 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -940,6 +940,13 @@ func TestParseQuerySuccess(t *testing.T) { f(`* | stats quantile(0.99, *) bar`, `* | stats quantile(0.99, *) as bar`) f(`* | stats quantile(0.99, a, *, b) bar`, `* | stats quantile(0.99, *) as bar`) + // stats pipe median + f(`* | stats Median(foo) bar`, `* | stats median(foo) as bar`) + f(`* | stats BY(x, y, ) MEDIAN(foo,bar,) bar`, `* | stats by (x, y) median(foo, bar) as bar`) + f(`* | stats median() x`, `* | stats median(*) as x`) + f(`* | stats median(*) x`, `* | stats median(*) as x`) + f(`* | stats median(foo,*,bar) x`, `* | stats median(*) as x`) + // stats pipe multiple funcs f(`* | stats count() "foo.bar:baz", count_uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", count_uniq(a) as bar`) f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`) diff --git a/lib/logstorage/pipe_stats.go b/lib/logstorage/pipe_stats.go index bcb2a4388..685a44358 100644 --- a/lib/logstorage/pipe_stats.go +++ b/lib/logstorage/pipe_stats.go @@ -546,6 +546,12 @@ func parseStatsFunc(lex *lexer) (statsFunc, string, error) { return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err) } sf = sqs + case lex.isKeyword("median"): + sms, err := parseStatsMedian(lex) + if err != nil { + return nil, "", fmt.Errorf("cannot parse 'median' func: %w", err) + } + sf = sms default: return nil, "", fmt.Errorf("unknown stats func %q", lex.token) } diff --git a/lib/logstorage/stats_median.go b/lib/logstorage/stats_median.go new file mode 100644 index 000000000..30cc3f1b1 --- /dev/null +++ b/lib/logstorage/stats_median.go @@ -0,0 +1,65 @@ +package logstorage + +import ( + "slices" + "unsafe" +) + +type statsMedian struct { + fields []string + containsStar bool +} + +func (sm *statsMedian) String() string { + return "median(" + fieldNamesString(sm.fields) + ")" +} + +func (sm *statsMedian) neededFields() []string { + return sm.fields +} + +func (sm *statsMedian) newStatsProcessor() (statsProcessor, int) { + smp := &statsMedianProcessor{ + sqp: &statsQuantileProcessor{ + sq: &statsQuantile{ + fields: sm.fields, + containsStar: sm.containsStar, + phi: 0.5, + }, + }, + } + return smp, int(unsafe.Sizeof(*smp)) + int(unsafe.Sizeof(*smp.sqp)) + int(unsafe.Sizeof(*smp.sqp.sq)) +} + +type statsMedianProcessor struct { + sqp *statsQuantileProcessor +} + +func (smp *statsMedianProcessor) updateStatsForAllRows(br *blockResult) int { + return smp.sqp.updateStatsForAllRows(br) +} + +func (smp *statsMedianProcessor) updateStatsForRow(br *blockResult, rowIdx int) int { + return smp.sqp.updateStatsForRow(br, rowIdx) +} + +func (smp *statsMedianProcessor) mergeState(sfp statsProcessor) { + src := sfp.(*statsMedianProcessor) + smp.sqp.mergeState(src.sqp) +} + +func (smp *statsMedianProcessor) finalizeStats() string { + return smp.sqp.finalizeStats() +} + +func parseStatsMedian(lex *lexer) (*statsMedian, error) { + fields, err := parseFieldNamesForStatsFunc(lex, "median") + if err != nil { + return nil, err + } + sm := &statsMedian{ + fields: fields, + containsStar: slices.Contains(fields, "*"), + } + return sm, nil +}