wip

2024-12-31 15:06:26 +00:00 · 2024-06-04 18:07:45 +02:00 · 2024-06-04 18:07:45 +02:00 · 8a14e2daef
commit 8a14e2daef
parent 1c1e7564fa
7 changed files with 146 additions and 36 deletions
--- a/docs/VictoriaLogs/CHANGELOG.md
+++ b/docs/VictoriaLogs/CHANGELOG.md
@ -19,7 +19,9 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta

 ## tip

-* FEATURE: add ability to format numeric fields into string representation of time, duration and ipv4 with [`format` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe).
+* FEATURE: add support for bitwise `and`, `or` and `xor` operations at [`math` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#math-pipe).
+* FEATURE: add support for automatic conversion of [RFC3339 time](https://www.rfc-editor.org/rfc/rfc3339) and IPv4 addresses into numeric representation at [`math` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#math-pipe).
+* FEATURE: add ability to format numeric fields into string representation of time, duration and IPv4 with [`format` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe).
 * FEATURE: set `format` field to `rfc3164` or `rfc5424` depending on the [Syslog format](https://en.wikipedia.org/wiki/Syslog) parsed via [`unpack_syslog` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe).

 ## [v0.16.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.16.0-victorialogs)
--- a/docs/VictoriaLogs/LogsQL.md
+++ b/docs/VictoriaLogs/LogsQL.md
@ -1657,6 +1657,9 @@ The following mathematical operations are supported by `math` pipe:
 - `arg1 / arg2` - divides `arg1` by `arg2`
 - `arg1 % arg2` - returns the remainder of the division of `arg1` by `arg2`
 - `arg1 ^ arg2` - returns the power of `arg1` by `arg2`
+- `arg1 & arg2` - returns bitwise `and` for `arg1` and `arg2`. It is expected that `arg1` and `arg2` are in the range `[0 .. 2^53-1]`
+- `arg1 | arg2` - returns bitwise `or` for `arg1` and `arg2`. It is expected that `arg1` and `arg2` are in the range `[0 .. 2^53-1]`
+- `arg1 xor arg2` - returns bitwise `xor` for `arg1` and `arg2`. It is expected that `arg1` and `arg2` are in the range `[0 .. 2^53-1]`
 - `arg1 default arg2` - returns `arg2` if `arg1` is non-[numeric](#numeric-values) or equals to `NaN`
 - `abs(arg)` - returns an absolute value for the given `arg`
 - `exp(arg)` - powers [`e`](https://en.wikipedia.org/wiki/E_(mathematical_constant)) by `arg`.
@ -1669,9 +1672,11 @@ The following mathematical operations are supported by `math` pipe:
 Every `argX` argument in every mathematical operation can contain one of the following values:

 - The name of [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). For example, `errors_total / requests_total`.
-  If the log field contains value, which cannot be parsed into [supported numeric value](#numeric-values), then it is replaced with `NaN`.
- Any [supported numeric value](#numeric-values). For example, `response_size_bytes / 1MiB`.
- Another mathematical expression. Optionally, it may be put inside `(...)`. For example, `(a + b) * c`.
+  The log field is parsed into numeric value if it contains [supported numeric value](#numeric-values). The log field is parsed into [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time)
+  in nanoseconds if it contains [rfc3339 time](https://www.rfc-editor.org/rfc/rfc3339). The log field is parsed into `uint32` number if it contains IPv4 address.
+  The log field is parsed into `NaN` in other cases.
+- Any [supported numeric value](#numeric-values), [rfc3339 time](https://www.rfc-editor.org/rfc/rfc3339) or IPv4 address. For example, `1MiB`, `"2024-05-15T10:20:30.934324Z"` or `"12.34.56.78"`.
+- Another mathematical expression, which can be put inside `(...)`. For example, `(a + b) * c`.

 See also:

--- a/lib/logstorage/filter_range.go
+++ b/lib/logstorage/filter_range.go
@ -2,6 +2,7 @@ package logstorage

 import (
 	"math"
+	"strconv"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 )
@ -312,6 +313,16 @@ func tryParseNumber(s string) (float64, bool) {
 	if ok {
 		return float64(bytes), true
 	}
+	if isNumberPrefix(s) {
+		f, err := strconv.ParseFloat(s, 64)
+		if err == nil {
+			return f, true
+		}
+		n, err := strconv.ParseInt(s, 0, 64)
+		if err == nil {
+			return float64(n), true
+		}
+	}
 	return 0, false
 }

--- a/lib/logstorage/parser.go
+++ b/lib/logstorage/parser.go
@ -1316,10 +1316,20 @@ func parseFloat64(lex *lexer) (float64, string, error) {

 	// Try parsing s as integer.
 	// This handles 0x..., 0b... and 0... prefixes, alongside '_' delimiters.
-	n, err := parseInt(s)
+	n, err := strconv.ParseInt(s, 0, 64)
 	if err == nil {
 		return float64(n), s, nil
 	}
+
+	nn, ok := tryParseBytes(s)
+	if ok {
+		return float64(nn), s, nil
+	}
+	nn, ok = tryParseDuration(s)
+	if ok {
+		return float64(nn), s, nil
+	}
+
 	return 0, "", fmt.Errorf("cannot parse %q as float64: %w", s, err)
 }

@ -1616,6 +1626,12 @@ func isNumberPrefix(s string) bool {
 			return false
 		}
 	}
+	if s[0] == '.' {
+		s = s[1:]
+		if len(s) == 0 {
+			return false
+		}
+	}
 	return s[0] >= '0' && s[0] <= '9'
 }

@ -1713,28 +1729,6 @@ func parseUint(s string) (uint64, error) {
 	return uint64(nn), nil
 }

-func parseInt(s string) (int64, error) {
-	switch {
-	case strings.EqualFold(s, "inf"), strings.EqualFold(s, "+inf"):
-		return math.MaxInt64, nil
-	case strings.EqualFold(s, "-inf"):
-		return math.MinInt64, nil
-	}
-
-	n, err := strconv.ParseInt(s, 0, 64)
-	if err == nil {
-		return n, nil
-	}
-	nn, ok := tryParseBytes(s)
-	if !ok {
-		nn, ok = tryParseDuration(s)
-		if !ok {
-			return 0, fmt.Errorf("cannot parse %q as integer: %w", s, err)
-		}
-	}
-	return nn, nil
-}
-
 func nextafter(f, xInf float64) float64 {
 	if math.IsInf(f, 0) {
 		return f
--- a/lib/logstorage/pipe_format_test.go
+++ b/lib/logstorage/pipe_format_test.go
@ -54,6 +54,11 @@ func TestPipeFormat(t *testing.T) {
 			{"bar", `210123456789`},
 			{"baz", "1234567890"},
 		},
+		{
+			{"foo", `abc`},
+			{"bar", `de`},
+			{"baz", "ghkl"},
+		},
 	}, [][]Field{
 		{
 			{"foo", `1717328141123456789`},
@ -61,6 +66,12 @@ func TestPipeFormat(t *testing.T) {
 			{"baz", "1234567890"},
 			{"x", "time=2024-06-02T11:35:41.123456789Z, duration=3m30.123456789s, ip=73.150.2.210"},
 		},
+		{
+			{"foo", `abc`},
+			{"bar", `de`},
+			{"baz", "ghkl"},
+			{"x", "time=abc, duration=de, ip=ghkl"},
+		},
 	})

 	// skip_empty_results
--- a/lib/logstorage/pipe_math.go
+++ b/lib/logstorage/pipe_math.go
@ -161,6 +161,18 @@ var mathBinaryOps = map[string]mathBinaryOp{
 		priority: 3,
 		f:        mathFuncMinus,
 	},
+	"&": {
+		priority: 4,
+		f:        mathFuncAnd,
+	},
+	"xor": {
+		priority: 5,
+		f:        mathFuncXor,
+	},
+	"|": {
+		priority: 6,
+		f:        mathFuncOr,
+	},
 	"default": {
 		priority: 10,
 		f:        mathFuncDefault,
@ -294,11 +306,7 @@ func (shard *pipeMathProcessorShard) executeExpr(me *mathExpr, br *blockResult)
 		var f float64
 		for i, v := range values {
 			if i == 0 || v != values[i-1] {
-				var ok bool
-				f, ok = tryParseFloat64(v)
-				if !ok {
-					f = nan
-				}
+				f = parseMathNumber(v)
 			}
 			r[i] = f
 		}
@ -495,7 +503,7 @@ func parseMathExprOperand(lex *lexer) (*mathExpr, error) {
 		// just skip unary plus
 		lex.nextToken()
 		return parseMathExprOperand(lex)
-	case lex.isNumber():
+	case isNumberPrefix(lex.token):
 		return parseMathExprConstNumber(lex)
 	default:
 		return parseMathExprFieldName(lex)
@ -637,15 +645,15 @@ func parseMathExprUnaryMinus(lex *lexer) (*mathExpr, error) {
 }

 func parseMathExprConstNumber(lex *lexer) (*mathExpr, error) {
-	if !lex.isNumber() {
+	if !isNumberPrefix(lex.token) {
 		return nil, fmt.Errorf("cannot parse number from %q", lex.token)
 	}
 	numStr, err := getCompoundMathToken(lex)
 	if err != nil {
 		return nil, fmt.Errorf("cannot parse number: %w", err)
 	}
-	f, ok := tryParseNumber(numStr)
-	if !ok {
+	f := parseMathNumber(numStr)
+	if math.IsNaN(f) {
 		return nil, fmt.Errorf("cannot parse number from %q", numStr)
 	}
 	me := &mathExpr{
@ -688,6 +696,42 @@ func getCompoundMathToken(lex *lexer) (string, error) {
 	return rawS + suffix, nil
 }

+func mathFuncAnd(result []float64, args [][]float64) {
+	a := args[0]
+	b := args[1]
+	for i := range result {
+		if math.IsNaN(a[i]) || math.IsNaN(b[i]) {
+			result[i] = nan
+		} else {
+			result[i] = float64(uint64(a[i]) & uint64(b[i]))
+		}
+	}
+}
+
+func mathFuncOr(result []float64, args [][]float64) {
+	a := args[0]
+	b := args[1]
+	for i := range result {
+		if math.IsNaN(a[i]) || math.IsNaN(b[i]) {
+			result[i] = nan
+		} else {
+			result[i] = float64(uint64(a[i]) | uint64(b[i]))
+		}
+	}
+}
+
+func mathFuncXor(result []float64, args [][]float64) {
+	a := args[0]
+	b := args[1]
+	for i := range result {
+		if math.IsNaN(a[i]) || math.IsNaN(b[i]) {
+			result[i] = nan
+		} else {
+			result[i] = float64(uint64(a[i]) ^ uint64(b[i]))
+		}
+	}
+}
+
 func mathFuncPlus(result []float64, args [][]float64) {
 	a := args[0]
 	b := args[1]
@ -829,3 +873,19 @@ func round(f, nearest float64) float64 {
 	f, _ = math.Modf(f * p10)
 	return f / p10
 }
+
+func parseMathNumber(s string) float64 {
+	f, ok := tryParseNumber(s)
+	if ok {
+		return f
+	}
+	nsecs, ok := tryParseTimestampRFC3339Nano(s)
+	if ok {
+		return float64(nsecs)
+	}
+	ipNum, ok := tryParseIPv4(s)
+	if ok {
+		return float64(ipNum)
+	}
+	return nan
+}
--- a/lib/logstorage/pipe_math_test.go
+++ b/lib/logstorage/pipe_math_test.go
@ -50,6 +50,33 @@ func TestPipeMath(t *testing.T) {
 		expectPipeResults(t, pipeStr, rows, rowsExpected)
 	}

+	f(`math
+		'2024-05-30T01:02:03Z' + 10e9 as time,
+		10m5s + 10e9 as duration,
+		'123.45.67.89' + 1000 as ip,
+		time - time % time_step as time_rounded,
+		duration - duration % duration_step as duration_rounded,
+		(ip & ip_mask | 0x1234) xor 5678 as subnet
+	`, [][]Field{
+		{
+			{"time_step", "30m"},
+			{"duration_step", "30s"},
+			{"ip_mask", "0xffffff00"},
+		},
+	}, [][]Field{
+		{
+			{"time_step", "30m"},
+			{"duration_step", "30s"},
+			{"ip_mask", "0xffffff00"},
+			{"time", "1717030933000000000"},
+			{"duration", "615000000000"},
+			{"ip", "2066564929"},
+			{"time_rounded", "1717030800000000000"},
+			{"duration_rounded", "600000000000"},
+			{"subnet", "2066563354"},
+		},
+	})
+
 	f("math b+1 as a, a*2 as b, b-10.5+c as c", [][]Field{
 		{
 			{"a", "v1"},