mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
447a7f0bdf
commit
f26d593c7b
5 changed files with 317 additions and 0 deletions
|
@ -1372,6 +1372,7 @@ LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
|
||||||
- [`count_uniq`](#count_uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`count_uniq`](#count_uniq-stats) calculates the number of unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`max`](#max-stats) calcualtes the maximum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`min`](#min-stats) calculates the minumum value over the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
|
- [`quantile`](#quantile-stats) calculates the given quantile for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`sum`](#sum-stats) calculates the sum for the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`sum_len`](#sum_len-stats) calculates the sum of lengths for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`sum_len`](#sum_len-stats) calculates the sum of lengths for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
- [`uniq_values`](#uniq_values-stats) returns unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
- [`uniq_values`](#uniq_values-stats) returns unique non-empty values for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||||
|
@ -1394,6 +1395,7 @@ See also:
|
||||||
|
|
||||||
- [`min`](#min-stats)
|
- [`min`](#min-stats)
|
||||||
- [`max`](#max-stats)
|
- [`max`](#max-stats)
|
||||||
|
- [`quantile`](#quantile-stats)
|
||||||
- [`sum`](#sum-stats)
|
- [`sum`](#sum-stats)
|
||||||
- [`count`](#count-stats)
|
- [`count`](#count-stats)
|
||||||
|
|
||||||
|
@ -1493,6 +1495,7 @@ _time:5m | stats max(duration) max_duration
|
||||||
See also:
|
See also:
|
||||||
|
|
||||||
- [`min`](#min-stats)
|
- [`min`](#min-stats)
|
||||||
|
- [`quantile`](#quantile-stats)
|
||||||
- [`avg`](#avg-stats)
|
- [`avg`](#avg-stats)
|
||||||
- [`sum`](#sum-stats)
|
- [`sum`](#sum-stats)
|
||||||
- [`count`](#count-stats)
|
- [`count`](#count-stats)
|
||||||
|
@ -1513,10 +1516,33 @@ _time:5m | stats min(duration) min_duration
|
||||||
See also:
|
See also:
|
||||||
|
|
||||||
- [`max`](#max-stats)
|
- [`max`](#max-stats)
|
||||||
|
- [`quantile`](#quantile-stats)
|
||||||
- [`avg`](#avg-stats)
|
- [`avg`](#avg-stats)
|
||||||
- [`sum`](#sum-stats)
|
- [`sum`](#sum-stats)
|
||||||
- [`count`](#count-stats)
|
- [`count`](#count-stats)
|
||||||
|
|
||||||
|
### quantile stats
|
||||||
|
|
||||||
|
`quantile(phi, field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates `phi` [percentile](https://en.wikipedia.org/wiki/Percentile) over numeric values
|
||||||
|
for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). The `phi` must be in the range `0 ... 1`, where `0` means `0th` percentile,
|
||||||
|
while `1` means `100th` percentile.
|
||||||
|
|
||||||
|
For example, the following query calculates `50th`, `90th` and `99th` percentiles for the `request_duration_seconds` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||||
|
over logs for the last 5 minutes:
|
||||||
|
|
||||||
|
```logsql
|
||||||
|
_time:5m | stats
|
||||||
|
quantile(0.5, request_duration_seconds) p50,
|
||||||
|
quantile(0.9, request_duration_seconds) p90,
|
||||||
|
quantile(0.99, request_duration_seconds) p99
|
||||||
|
```
|
||||||
|
|
||||||
|
See also:
|
||||||
|
|
||||||
|
- [`min`](#min-stats)
|
||||||
|
- [`max`](#max-stats)
|
||||||
|
- [`avg`](#avg-stats)
|
||||||
|
|
||||||
### sum stats
|
### sum stats
|
||||||
|
|
||||||
`sum(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the sum of numeric values across
|
`sum(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the sum of numeric values across
|
||||||
|
|
|
@ -933,6 +933,13 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||||
f(`* | stats sum_len(*) x`, `* | stats sum_len(*) as x`)
|
f(`* | stats sum_len(*) x`, `* | stats sum_len(*) as x`)
|
||||||
f(`* | stats sum_len(foo,*,bar) x`, `* | stats sum_len(*) as x`)
|
f(`* | stats sum_len(foo,*,bar) x`, `* | stats sum_len(*) as x`)
|
||||||
|
|
||||||
|
// stats pipe quantile
|
||||||
|
f(`* | stats quantile(0, foo) bar`, `* | stats quantile(0, foo) as bar`)
|
||||||
|
f(`* | stats quantile(1, foo) bar`, `* | stats quantile(1, foo) as bar`)
|
||||||
|
f(`* | stats quantile(0.5, a, b, c) bar`, `* | stats quantile(0.5, a, b, c) as bar`)
|
||||||
|
f(`* | stats quantile(0.99, *) bar`, `* | stats quantile(0.99, *) as bar`)
|
||||||
|
f(`* | stats quantile(0.99, a, *, b) bar`, `* | stats quantile(0.99, *) as bar`)
|
||||||
|
|
||||||
// stats pipe multiple funcs
|
// stats pipe multiple funcs
|
||||||
f(`* | stats count() "foo.bar:baz", count_uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", count_uniq(a) as bar`)
|
f(`* | stats count() "foo.bar:baz", count_uniq(a) bar`, `* | stats count(*) as "foo.bar:baz", count_uniq(a) as bar`)
|
||||||
f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`)
|
f(`* | stats by (x, y) count(*) foo, count_uniq(a,b) bar`, `* | stats by (x, y) count(*) as foo, count_uniq(a, b) as bar`)
|
||||||
|
@ -1286,6 +1293,14 @@ func TestParseQueryFailure(t *testing.T) {
|
||||||
f(`foo | stats sum_len`)
|
f(`foo | stats sum_len`)
|
||||||
f(`foo | stats sum_len()`)
|
f(`foo | stats sum_len()`)
|
||||||
|
|
||||||
|
// invalid stats quantile
|
||||||
|
f(`foo | stats quantile`)
|
||||||
|
f(`foo | stats quantile() foo`)
|
||||||
|
f(`foo | stats quantile(bar, baz) foo`)
|
||||||
|
f(`foo | stats quantile(0.5) foo`)
|
||||||
|
f(`foo | stats quantile(-1, x) foo`)
|
||||||
|
f(`foo | stats quantile(10, x) foo`)
|
||||||
|
|
||||||
// invalid stats grouping fields
|
// invalid stats grouping fields
|
||||||
f(`foo | stats by(foo:bar) count() baz`)
|
f(`foo | stats by(foo:bar) count() baz`)
|
||||||
f(`foo | stats by(foo:/bar) count() baz`)
|
f(`foo | stats by(foo:/bar) count() baz`)
|
||||||
|
|
|
@ -540,6 +540,12 @@ func parseStatsFunc(lex *lexer) (statsFunc, string, error) {
|
||||||
return nil, "", fmt.Errorf("cannot parse 'sum_len' func: %w", err)
|
return nil, "", fmt.Errorf("cannot parse 'sum_len' func: %w", err)
|
||||||
}
|
}
|
||||||
sf = sss
|
sf = sss
|
||||||
|
case lex.isKeyword("quantile"):
|
||||||
|
sqs, err := parseStatsQuantile(lex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err)
|
||||||
|
}
|
||||||
|
sf = sqs
|
||||||
default:
|
default:
|
||||||
return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
|
return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
|
||||||
}
|
}
|
||||||
|
|
215
lib/logstorage/stats_quantile.go
Normal file
215
lib/logstorage/stats_quantile.go
Normal file
|
@ -0,0 +1,215 @@
|
||||||
|
package logstorage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"slices"
|
||||||
|
"strconv"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/valyala/fastrand"
|
||||||
|
)
|
||||||
|
|
||||||
|
type statsQuantile struct {
|
||||||
|
fields []string
|
||||||
|
containsStar bool
|
||||||
|
|
||||||
|
phi float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sq *statsQuantile) String() string {
|
||||||
|
return fmt.Sprintf("quantile(%g, %s)", sq.phi, fieldNamesString(sq.fields))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sq *statsQuantile) neededFields() []string {
|
||||||
|
return sq.fields
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sq *statsQuantile) newStatsProcessor() (statsProcessor, int) {
|
||||||
|
sqp := &statsQuantileProcessor{
|
||||||
|
sq: sq,
|
||||||
|
}
|
||||||
|
return sqp, int(unsafe.Sizeof(*sqp))
|
||||||
|
}
|
||||||
|
|
||||||
|
type statsQuantileProcessor struct {
|
||||||
|
sq *statsQuantile
|
||||||
|
|
||||||
|
h histogram
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sqp *statsQuantileProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||||
|
h := &sqp.h
|
||||||
|
stateSizeIncrease := 0
|
||||||
|
|
||||||
|
if sqp.sq.containsStar {
|
||||||
|
for _, c := range br.getColumns() {
|
||||||
|
for _, v := range c.getValues(br) {
|
||||||
|
f, ok := tryParseFloat64(v)
|
||||||
|
if ok {
|
||||||
|
stateSizeIncrease += h.update(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for _, field := range sqp.sq.fields {
|
||||||
|
c := br.getColumnByName(field)
|
||||||
|
for _, v := range c.getValues(br) {
|
||||||
|
f, ok := tryParseFloat64(v)
|
||||||
|
if ok {
|
||||||
|
stateSizeIncrease += h.update(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stateSizeIncrease
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sqp *statsQuantileProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
|
||||||
|
h := &sqp.h
|
||||||
|
stateSizeIncrease := 0
|
||||||
|
|
||||||
|
if sqp.sq.containsStar {
|
||||||
|
for _, c := range br.getColumns() {
|
||||||
|
f := c.getFloatValueAtRow(rowIdx)
|
||||||
|
if !math.IsNaN(f) {
|
||||||
|
stateSizeIncrease += h.update(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for _, field := range sqp.sq.fields {
|
||||||
|
c := br.getColumnByName(field)
|
||||||
|
f := c.getFloatValueAtRow(rowIdx)
|
||||||
|
if !math.IsNaN(f) {
|
||||||
|
stateSizeIncrease += h.update(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stateSizeIncrease
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sqp *statsQuantileProcessor) mergeState(sfp statsProcessor) {
|
||||||
|
src := sfp.(*statsQuantileProcessor)
|
||||||
|
sqp.h.mergeState(&src.h)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sqp *statsQuantileProcessor) finalizeStats() string {
|
||||||
|
q := sqp.h.quantile(sqp.sq.phi)
|
||||||
|
return strconv.FormatFloat(q, 'f', -1, 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseStatsQuantile(lex *lexer) (*statsQuantile, error) {
|
||||||
|
if !lex.isKeyword("quantile") {
|
||||||
|
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "quantile")
|
||||||
|
}
|
||||||
|
lex.nextToken()
|
||||||
|
|
||||||
|
fields, err := parseFieldNamesInParens(lex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("cannot parse 'quantile' args: %w", err)
|
||||||
|
}
|
||||||
|
if len(fields) < 2 {
|
||||||
|
return nil, fmt.Errorf("'quantile' must have at least two args: phi and field name")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse phi
|
||||||
|
phi, ok := tryParseFloat64(fields[0])
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("phi arg in 'quantile' must be floating point number; got %q", fields[0])
|
||||||
|
}
|
||||||
|
if phi < 0 || phi > 1 {
|
||||||
|
return nil, fmt.Errorf("phi arg in 'quantile' must be in the range [0..1]; got %q", fields[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse fields
|
||||||
|
fields = fields[1:]
|
||||||
|
if slices.Contains(fields, "*") {
|
||||||
|
fields = []string{"*"}
|
||||||
|
}
|
||||||
|
|
||||||
|
sq := &statsQuantile{
|
||||||
|
fields: fields,
|
||||||
|
containsStar: slices.Contains(fields, "*"),
|
||||||
|
|
||||||
|
phi: phi,
|
||||||
|
}
|
||||||
|
return sq, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type histogram struct {
|
||||||
|
a []float64
|
||||||
|
min float64
|
||||||
|
max float64
|
||||||
|
count uint64
|
||||||
|
|
||||||
|
rng fastrand.RNG
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *histogram) update(f float64) int {
|
||||||
|
if h.count == 0 || f < h.min {
|
||||||
|
h.min = f
|
||||||
|
}
|
||||||
|
if h.count == 0 || f > h.max {
|
||||||
|
h.max = f
|
||||||
|
}
|
||||||
|
|
||||||
|
h.count++
|
||||||
|
if len(h.a) < maxHistogramSamples {
|
||||||
|
h.a = append(h.a, f)
|
||||||
|
return int(unsafe.Sizeof(f))
|
||||||
|
}
|
||||||
|
|
||||||
|
if n := h.rng.Uint32n(uint32(h.count)); n < uint32(len(h.a)) {
|
||||||
|
h.a[n] = f
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
const maxHistogramSamples = 100_000
|
||||||
|
|
||||||
|
func (h *histogram) mergeState(src *histogram) {
|
||||||
|
if src.count == 0 {
|
||||||
|
// Nothing to merge
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if h.count == 0 {
|
||||||
|
h.a = append(h.a, src.a...)
|
||||||
|
h.min = src.min
|
||||||
|
h.max = src.max
|
||||||
|
h.count = src.count
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
h.a = append(h.a, src.a...)
|
||||||
|
if src.min < h.min {
|
||||||
|
h.min = src.min
|
||||||
|
}
|
||||||
|
if src.max > h.max {
|
||||||
|
h.max = src.max
|
||||||
|
}
|
||||||
|
h.count += src.count
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *histogram) quantile(phi float64) float64 {
|
||||||
|
if len(h.a) == 0 {
|
||||||
|
return nan
|
||||||
|
}
|
||||||
|
if len(h.a) == 1 {
|
||||||
|
return h.a[0]
|
||||||
|
}
|
||||||
|
if phi <= 0 {
|
||||||
|
return h.min
|
||||||
|
}
|
||||||
|
if phi >= 1 {
|
||||||
|
return h.max
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.Sort(h.a)
|
||||||
|
idx := int(phi * float64(len(h.a)))
|
||||||
|
if idx == len(h.a) {
|
||||||
|
return h.max
|
||||||
|
}
|
||||||
|
return h.a[idx]
|
||||||
|
}
|
55
lib/logstorage/stats_quantile_test.go
Normal file
55
lib/logstorage/stats_quantile_test.go
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
package logstorage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHistogramQuantile(t *testing.T) {
|
||||||
|
f := func(a []float64, phi, qExpected float64) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
var h histogram
|
||||||
|
for _, f := range a {
|
||||||
|
h.update(f)
|
||||||
|
}
|
||||||
|
q := h.quantile(phi)
|
||||||
|
|
||||||
|
if math.IsNaN(qExpected) {
|
||||||
|
if !math.IsNaN(q) {
|
||||||
|
t.Fatalf("unexpected result for q=%v, phi=%v; got %v; want %v", a, phi, q, qExpected)
|
||||||
|
}
|
||||||
|
} else if q != qExpected {
|
||||||
|
t.Fatalf("unexpected result for q=%v, phi=%v; got %v; want %v", a, phi, q, qExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f(nil, -1, nan)
|
||||||
|
f(nil, 0, nan)
|
||||||
|
f(nil, 0.5, nan)
|
||||||
|
f(nil, 1, nan)
|
||||||
|
f(nil, 10, nan)
|
||||||
|
|
||||||
|
f([]float64{123}, -1, 123)
|
||||||
|
f([]float64{123}, 0, 123)
|
||||||
|
f([]float64{123}, 0.5, 123)
|
||||||
|
f([]float64{123}, 1, 123)
|
||||||
|
f([]float64{123}, 10, 123)
|
||||||
|
|
||||||
|
f([]float64{5, 1}, -1, 1)
|
||||||
|
f([]float64{5, 1}, 0, 1)
|
||||||
|
f([]float64{5, 1}, 0.5-1e-5, 1)
|
||||||
|
f([]float64{5, 1}, 0.5, 5)
|
||||||
|
f([]float64{5, 1}, 1, 5)
|
||||||
|
f([]float64{5, 1}, 10, 5)
|
||||||
|
|
||||||
|
f([]float64{5, 1, 3}, -1, 1)
|
||||||
|
f([]float64{5, 1, 3}, 0, 1)
|
||||||
|
f([]float64{5, 1, 3}, 1.0/3-1e-5, 1)
|
||||||
|
f([]float64{5, 1, 3}, 1.0/3, 3)
|
||||||
|
f([]float64{5, 1, 3}, 2.0/3-1e-5, 3)
|
||||||
|
f([]float64{5, 1, 3}, 2.0/3, 5)
|
||||||
|
f([]float64{5, 1, 3}, 1-1e-5, 5)
|
||||||
|
f([]float64{5, 1, 3}, 1, 5)
|
||||||
|
f([]float64{5, 1, 3}, 10, 5)
|
||||||
|
}
|
Loading…
Reference in a new issue