mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
wip
This commit is contained in:
parent
d62bac5609
commit
619dff6861
5 changed files with 235 additions and 3 deletions
|
@ -1092,14 +1092,21 @@ LogsQL supports calculating the following stats:
|
|||
- Sum for the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values. Non-numeric values are ignored. Examples:
|
||||
- `error | stats sum(duration) duration_total` - returns the sum of `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values
|
||||
across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `error` [word](#word).
|
||||
- `GET | stats by (path) sum(response_size)` - returns the sum of `response_size` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values
|
||||
- `GET | stats by (path) sum(response_size) response_size_sum` - returns the sum of `response_size` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) values
|
||||
across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `GET` [word](#word), grouped
|
||||
by `path` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) value.
|
||||
|
||||
- The maximum value across the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Non-numeric values are ignored. Examples:
|
||||
- `error | stats max(duration) duration_max` - returns the maximum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `error` [word](#word).
|
||||
- `GET | stats by (path) max(response_size)` - returns the maximum value for the `response_size` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
- `GET | stats by (path) max(response_size) max_response_size` - returns the maximum value for the `response_size` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `GET` [word](#word), grouped
|
||||
by `path` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) value.
|
||||
|
||||
- The minimum value across the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Non-numeric values are ignored. Examples:
|
||||
- `error | stats min(duration) duration_min` - returns the minimum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `error` [word](#word).
|
||||
- `GET | stats by (path) min(response_size) min_response_size` - returns the minimum value for the `response_size` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `GET` [word](#word), grouped
|
||||
by `path` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) value.
|
||||
|
||||
|
@ -1117,7 +1124,7 @@ error | stats by (namespace)
|
|||
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
and fields created by [transformations](#transformations):
|
||||
|
||||
- The min, max, avg, and sum for the given field.
|
||||
- The avg for the given field.
|
||||
- The median and [percentile](https://en.wikipedia.org/wiki/Percentile) for the given field.
|
||||
|
||||
It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats.
|
||||
|
|
|
@ -838,6 +838,109 @@ func (c *blockResultColumn) getMaxValue(br *blockResult) float64 {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *blockResultColumn) getMinValue(br *blockResult) float64 {
|
||||
if c.isConst {
|
||||
v := c.encodedValues[0]
|
||||
f, ok := tryParseFloat64(v)
|
||||
if !ok {
|
||||
return nan
|
||||
}
|
||||
return f
|
||||
}
|
||||
if c.isTime {
|
||||
return nan
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
min := math.Inf(1)
|
||||
f := float64(0)
|
||||
ok := false
|
||||
values := c.encodedValues
|
||||
for i := range values {
|
||||
if i == 0 || values[i-1] != values[i] {
|
||||
f, ok = tryParseFloat64(values[i])
|
||||
}
|
||||
if ok && f < min {
|
||||
min = f
|
||||
}
|
||||
}
|
||||
if math.IsInf(min, 1) {
|
||||
return nan
|
||||
}
|
||||
return min
|
||||
case valueTypeDict:
|
||||
a := encoding.GetFloat64s(len(c.dictValues))
|
||||
dictValuesFloat := a.A
|
||||
for i, v := range c.dictValues {
|
||||
f, ok := tryParseFloat64(v)
|
||||
if !ok {
|
||||
f = nan
|
||||
}
|
||||
dictValuesFloat[i] = f
|
||||
}
|
||||
min := math.Inf(1)
|
||||
for _, v := range c.encodedValues {
|
||||
dictIdx := v[0]
|
||||
f := dictValuesFloat[dictIdx]
|
||||
if f < min {
|
||||
min = f
|
||||
}
|
||||
}
|
||||
encoding.PutFloat64s(a)
|
||||
if math.IsInf(min, 1) {
|
||||
return nan
|
||||
}
|
||||
return min
|
||||
case valueTypeUint8:
|
||||
min := math.Inf(1)
|
||||
for _, v := range c.encodedValues {
|
||||
f := float64(v[0])
|
||||
if f < min {
|
||||
min = f
|
||||
}
|
||||
}
|
||||
return min
|
||||
case valueTypeUint16:
|
||||
min := math.Inf(1)
|
||||
for _, v := range c.encodedValues {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
f := float64(encoding.UnmarshalUint16(b))
|
||||
if f < min {
|
||||
min = f
|
||||
}
|
||||
}
|
||||
return min
|
||||
case valueTypeUint32:
|
||||
min := math.Inf(1)
|
||||
for _, v := range c.encodedValues {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
f := float64(encoding.UnmarshalUint32(b))
|
||||
if f < min {
|
||||
min = f
|
||||
}
|
||||
}
|
||||
return min
|
||||
case valueTypeUint64:
|
||||
min := math.Inf(1)
|
||||
for _, v := range c.encodedValues {
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
f := float64(encoding.UnmarshalUint64(b))
|
||||
if f < min {
|
||||
min = f
|
||||
}
|
||||
}
|
||||
return min
|
||||
case valueTypeIPv4:
|
||||
return nan
|
||||
case valueTypeTimestampISO8601:
|
||||
return nan
|
||||
default:
|
||||
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
||||
return nan
|
||||
}
|
||||
}
|
||||
|
||||
func (c *blockResultColumn) sumValues(br *blockResult) float64 {
|
||||
if c.isConst {
|
||||
v := c.encodedValues[0]
|
||||
|
|
|
@ -846,6 +846,10 @@ func TestParseQuerySuccess(t *testing.T) {
|
|||
f(`* | stats Max(foo) bar`, `* | stats max(foo) as bar`)
|
||||
f(`* | stats BY(x, y, ) MAX(foo,bar,) bar`, `* | stats by (x, y) max(foo, bar) as bar`)
|
||||
|
||||
// stats pipe min
|
||||
f(`* | stats Min(foo) bar`, `* | stats min(foo) as bar`)
|
||||
f(`* | stats BY(x, y, ) MIN(foo,bar,) bar`, `* | stats by (x, y) min(foo, bar) as bar`)
|
||||
|
||||
// stats pipe uniq
|
||||
f(`* | stats uniq(foo) bar`, `* | stats uniq(foo) as bar`)
|
||||
f(`* | stats by(x, y) uniq(foo,bar) as baz`, `* | stats by (x, y) uniq(foo, bar) as baz`)
|
||||
|
@ -1108,6 +1112,11 @@ func TestParseQueryFailure(t *testing.T) {
|
|||
f(`foo | stats max()`)
|
||||
f(`foo | stats max() as abc`)
|
||||
|
||||
// invalid stats min
|
||||
f(`foo | stats min`)
|
||||
f(`foo | stats min()`)
|
||||
f(`foo | stats min() as abc`)
|
||||
|
||||
// invalid stats uniq
|
||||
f(`foo | stats uniq`)
|
||||
f(`foo | stats uniq()`)
|
||||
|
|
|
@ -440,6 +440,12 @@ func parseStatsFunc(lex *lexer) (statsFunc, string, error) {
|
|||
return nil, "", fmt.Errorf("cannot parse 'max' func: %w", err)
|
||||
}
|
||||
sf = sms
|
||||
case lex.isKeyword("min"):
|
||||
sms, err := parseStatsMin(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'min' func: %w", err)
|
||||
}
|
||||
sf = sms
|
||||
default:
|
||||
return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
|
||||
}
|
||||
|
|
107
lib/logstorage/stats_min.go
Normal file
107
lib/logstorage/stats_min.go
Normal file
|
@ -0,0 +1,107 @@
|
|||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"strconv"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type statsMin struct {
|
||||
fields []string
|
||||
containsStar bool
|
||||
}
|
||||
|
||||
func (sm *statsMin) String() string {
|
||||
return "min(" + fieldNamesString(sm.fields) + ")"
|
||||
}
|
||||
|
||||
func (sm *statsMin) neededFields() []string {
|
||||
return sm.fields
|
||||
}
|
||||
|
||||
func (sm *statsMin) newStatsProcessor() (statsProcessor, int) {
|
||||
smp := &statsMinProcessor{
|
||||
sm: sm,
|
||||
}
|
||||
return smp, int(unsafe.Sizeof(*smp))
|
||||
}
|
||||
|
||||
type statsMinProcessor struct {
|
||||
sm *statsMin
|
||||
|
||||
min float64
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
if smp.sm.containsStar {
|
||||
// Find the minimum value across all the columns
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getMinValue(br)
|
||||
if f < smp.min {
|
||||
smp.min = f
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Find the minimum value across the requested columns
|
||||
for _, field := range smp.sm.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getMinValue(br)
|
||||
if f < smp.min {
|
||||
smp.min = f
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
|
||||
if smp.sm.containsStar {
|
||||
// Find the minimum value across all the fields for the given row
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if f < smp.min {
|
||||
smp.min = f
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Find the minimum value across the requested fields for the given row
|
||||
for _, field := range smp.sm.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if f < smp.min {
|
||||
smp.min = f
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) mergeState(sfp statsProcessor) {
|
||||
src := sfp.(*statsMinProcessor)
|
||||
if src.min < smp.min {
|
||||
smp.min = src.min
|
||||
}
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) finalizeStats() string {
|
||||
return strconv.FormatFloat(smp.min, 'g', -1, 64)
|
||||
}
|
||||
|
||||
func parseStatsMin(lex *lexer) (*statsMin, error) {
|
||||
lex.nextToken()
|
||||
fields, err := parseFieldNamesInParens(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'min' args: %w", err)
|
||||
}
|
||||
if len(fields) == 0 {
|
||||
return nil, fmt.Errorf("'min' must contain at least one arg")
|
||||
}
|
||||
sm := &statsMin{
|
||||
fields: fields,
|
||||
containsStar: slices.Contains(fields, "*"),
|
||||
}
|
||||
return sm, nil
|
||||
}
|
Loading…
Reference in a new issue