wip

2025-01-20 15:16:42 +00:00 · 2024-05-01 01:58:35 +02:00 · 2024-05-01 01:58:35 +02:00 · d62bac5609
commit d62bac5609
parent dc6ec4bdbb
5 changed files with 257 additions and 13 deletions
--- a/docs/VictoriaLogs/LogsQL.md
+++ b/docs/VictoriaLogs/LogsQL.md
@ -1096,6 +1096,13 @@ LogsQL supports calculating the following stats:
  across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `GET` [word](#word), grouped
  by `path` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) value.
 - The maximum value across the given numeric [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Non-numeric values are ignored. Examples:
  - `error | stats max(duration) duration_max` - returns the maximum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
  across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `error` [word](#word).
  - `GET | stats by (path) max(response_size)` - returns the maximum value for the `response_size` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
  across [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with the `GET` [word](#word), grouped
  by `path` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) value.
 Stats calculations can be combined. For example, the following query calculates the number of log messages with the `error` [word](#word),
 the number of unique values for `ip` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) and the sum of `duration`
 [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model), grouped by `namespace` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model):
--- a/lib/logstorage/block_result.go
+++ b/lib/logstorage/block_result.go
@ -686,20 +686,29 @@ func (c *blockResultColumn) getValues(br *blockResult) []string {
 func (c *blockResultColumn) getFloatValueAtRow(rowIdx int) float64 {
 	if c.isConst {
 		v := c.encodedValues[0]
-		f, _ := tryParseFloat64(v)
+		f, ok := tryParseFloat64(v)
 		if !ok {
 			return nan
 		}
 		return f
 	}
 	if c.isTime {
-		return 0
+		return nan
 	}
 	switch c.valueType {
 	case valueTypeString:
-		f, _ := tryParseFloat64(c.encodedValues[rowIdx])
+		f, ok := tryParseFloat64(c.encodedValues[rowIdx])
 		if !ok {
 			return nan
 		}
 		return f
 	case valueTypeDict:
 		dictIdx := c.encodedValues[rowIdx][0]
-		f, _ := tryParseFloat64(c.dictValues[dictIdx])
+		f, ok := tryParseFloat64(c.dictValues[dictIdx])
 		if !ok {
 			return nan
 		}
 		return f
 	case valueTypeUint8:
 		return float64(c.encodedValues[rowIdx][0])
@ -717,20 +726,123 @@ func (c *blockResultColumn) getFloatValueAtRow(rowIdx int) float64 {
 		n := encoding.UnmarshalUint64(b)
 		return math.Float64frombits(n)
 	case valueTypeIPv4:
-		return 0
+		return nan
 	case valueTypeTimestampISO8601:
-		return 0
+		return nan
 	default:
 		logger.Panicf("BUG: unknown valueType=%d", c.valueType)
-		return 0
+		return nan
 	}
 }
 func (c *blockResultColumn) getMaxValue(br *blockResult) float64 {
 	if c.isConst {
 		v := c.encodedValues[0]
 		f, ok := tryParseFloat64(v)
 		if !ok {
 			return nan
 		}
 		return f
 	}
 	if c.isTime {
 		return nan
 	}
 	switch c.valueType {
 	case valueTypeString:
 		max := math.Inf(-1)
 		f := float64(0)
 		ok := false
 		values := c.encodedValues
 		for i := range values {
 			if i == 0 || values[i-1] != values[i] {
 				f, ok = tryParseFloat64(values[i])
 			}
 			if ok && f > max {
 				max = f
 			}
 		}
 		if math.IsInf(max, -1) {
 			return nan
 		}
 		return max
 	case valueTypeDict:
 		a := encoding.GetFloat64s(len(c.dictValues))
 		dictValuesFloat := a.A
 		for i, v := range c.dictValues {
 			f, ok := tryParseFloat64(v)
 			if !ok {
 				f = nan
 			}
 			dictValuesFloat[i] = f
 		}
 		max := math.Inf(-1)
 		for _, v := range c.encodedValues {
 			dictIdx := v[0]
 			f := dictValuesFloat[dictIdx]
 			if f > max {
 				max = f
 			}
 		}
 		encoding.PutFloat64s(a)
 		if math.IsInf(max, -1) {
 			return nan
 		}
 		return max
 	case valueTypeUint8:
 		max := math.Inf(-1)
 		for _, v := range c.encodedValues {
 			f := float64(v[0])
 			if f > max {
 				max = f
 			}
 		}
 		return max
 	case valueTypeUint16:
 		max := math.Inf(-1)
 		for _, v := range c.encodedValues {
 			b := bytesutil.ToUnsafeBytes(v)
 			f := float64(encoding.UnmarshalUint16(b))
 			if f > max {
 				max = f
 			}
 		}
 		return max
 	case valueTypeUint32:
 		max := math.Inf(-1)
 		for _, v := range c.encodedValues {
 			b := bytesutil.ToUnsafeBytes(v)
 			f := float64(encoding.UnmarshalUint32(b))
 			if f > max {
 				max = f
 			}
 		}
 		return max
 	case valueTypeUint64:
 		max := math.Inf(-1)
 		for _, v := range c.encodedValues {
 			b := bytesutil.ToUnsafeBytes(v)
 			f := float64(encoding.UnmarshalUint64(b))
 			if f > max {
 				max = f
 			}
 		}
 		return max
 	case valueTypeIPv4:
 		return nan
 	case valueTypeTimestampISO8601:
 		return nan
 	default:
 		logger.Panicf("BUG: unknown valueType=%d", c.valueType)
 		return nan
 	}
 }
 func (c *blockResultColumn) sumValues(br *blockResult) float64 {
 	if c.isConst {
 		v := c.encodedValues[0]
-		f, _ := tryParseFloat64(v)
+		f, ok := tryParseFloat64(v)
-		if f == 0 || math.IsNaN(f) {
+		if !ok {
 			return 0
 		}
 		return f * float64(len(br.timestamps))
@ -743,12 +855,13 @@ func (c *blockResultColumn) sumValues(br *blockResult) float64 {
 	case valueTypeString:
 		sum := float64(0)
 		f := float64(0)
 		ok := false
 		values := c.encodedValues
 		for i := range values {
 			if i == 0 || values[i-1] != values[i] {
-				f, _ = tryParseFloat64(values[i])
+				f, ok = tryParseFloat64(values[i])
 			}
-			if !math.IsNaN(f) {
+			if ok {
 				sum += f
 			}
 		}
@ -757,8 +870,8 @@ func (c *blockResultColumn) sumValues(br *blockResult) float64 {
 		a := encoding.GetFloat64s(len(c.dictValues))
 		dictValuesFloat := a.A
 		for i, v := range c.dictValues {
-			f, _ := tryParseFloat64(v)
+			f, ok := tryParseFloat64(v)
-			if math.IsNaN(f) {
+			if !ok {
 				f = 0
 			}
 			dictValuesFloat[i] = f
@ -817,3 +930,5 @@ func (c *blockResultColumn) sumValues(br *blockResult) float64 {
 		return 0
 	}
 }
 var nan = math.NaN()
--- a/lib/logstorage/parser_test.go
+++ b/lib/logstorage/parser_test.go
@ -842,6 +842,10 @@ func TestParseQuerySuccess(t *testing.T) {
 	f(`* | stats Sum(foo) bar`, `* | stats sum(foo) as bar`)
 	f(`* | stats BY(x, y, ) SUM(foo,bar,) bar`, `* | stats by (x, y) sum(foo, bar) as bar`)
 	// stats pipe max
 	f(`* | stats Max(foo) bar`, `* | stats max(foo) as bar`)
 	f(`* | stats BY(x, y, ) MAX(foo,bar,) bar`, `* | stats by (x, y) max(foo, bar) as bar`)
 	// stats pipe uniq
 	f(`* | stats uniq(foo) bar`, `* | stats uniq(foo) as bar`)
 	f(`* | stats by(x, y) uniq(foo,bar) as baz`, `* | stats by (x, y) uniq(foo, bar) as baz`)
@ -1099,6 +1103,11 @@ func TestParseQueryFailure(t *testing.T) {
 	f(`foo | stats sum()`)
 	f(`foo | stats sum() as abc`)
 	// invalid stats max
 	f(`foo | stats max`)
 	f(`foo | stats max()`)
 	f(`foo | stats max() as abc`)
 	// invalid stats uniq
 	f(`foo | stats uniq`)
 	f(`foo | stats uniq()`)
--- a/lib/logstorage/pipe_stats.go
+++ b/lib/logstorage/pipe_stats.go
@ -434,6 +434,12 @@ func parseStatsFunc(lex *lexer) (statsFunc, string, error) {
 			return nil, "", fmt.Errorf("cannot parse 'sum' func: %w", err)
 		}
 		sf = sfs
 	case lex.isKeyword("max"):
 		sms, err := parseStatsMax(lex)
 		if err != nil {
 			return nil, "", fmt.Errorf("cannot parse 'max' func: %w", err)
 		}
 		sf = sms
 	default:
 		return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
 	}
--- a/lib/logstorage/stats_max.go
+++ b/lib/logstorage/stats_max.go
@ -0,0 +1,107 @@
 package logstorage
 import (
 	"fmt"
 	"slices"
 	"strconv"
 	"unsafe"
 )
 type statsMax struct {
 	fields       []string
 	containsStar bool
 }
 func (sm *statsMax) String() string {
 	return "max(" + fieldNamesString(sm.fields) + ")"
 }
 func (sm *statsMax) neededFields() []string {
 	return sm.fields
 }
 func (sm *statsMax) newStatsProcessor() (statsProcessor, int) {
 	smp := &statsMaxProcessor{
 		sm: sm,
 	}
 	return smp, int(unsafe.Sizeof(*smp))
 }
 type statsMaxProcessor struct {
 	sm *statsMax
 	max float64
 }
 func (smp *statsMaxProcessor) updateStatsForAllRows(br *blockResult) int {
 	if smp.sm.containsStar {
 		// Find the maximum value across all the columns
 		for _, c := range br.getColumns() {
 			f := c.getMaxValue(br)
 			if f > smp.max {
 				smp.max = f
 			}
 		}
 		return 0
 	}
 	// Find the maximum value across the requested columns
 	for _, field := range smp.sm.fields {
 		c := br.getColumnByName(field)
 		f := c.getMaxValue(br)
 		if f > smp.max {
 			smp.max = f
 		}
 	}
 	return 0
 }
 func (smp *statsMaxProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
 	if smp.sm.containsStar {
 		// Find the maximum value across all the fields for the given row
 		for _, c := range br.getColumns() {
 			f := c.getFloatValueAtRow(rowIdx)
 			if f > smp.max {
 				smp.max = f
 			}
 		}
 		return 0
 	}
 	// Find the maximum value across the requested fields for the given row
 	for _, field := range smp.sm.fields {
 		c := br.getColumnByName(field)
 		f := c.getFloatValueAtRow(rowIdx)
 		if f > smp.max {
 			smp.max = f
 		}
 	}
 	return 0
 }
 func (smp *statsMaxProcessor) mergeState(sfp statsProcessor) {
 	src := sfp.(*statsMaxProcessor)
 	if src.max > smp.max {
 		smp.max = src.max
 	}
 }
 func (smp *statsMaxProcessor) finalizeStats() string {
 	return strconv.FormatFloat(smp.max, 'g', -1, 64)
 }
 func parseStatsMax(lex *lexer) (*statsMax, error) {
 	lex.nextToken()
 	fields, err := parseFieldNamesInParens(lex)
 	if err != nil {
 		return nil, fmt.Errorf("cannot parse 'max' args: %w", err)
 	}
 	if len(fields) == 0 {
 		return nil, fmt.Errorf("'max' must contain at least one arg")
 	}
 	sm := &statsMax{
 		fields:       fields,
 		containsStar: slices.Contains(fields, "*"),
 	}
 	return sm, nil
 }