From 8a14e2daef9630aa8b7d92645370ee2b2bbd1eee Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 4 Jun 2024 18:07:45 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/CHANGELOG.md | 4 +- docs/VictoriaLogs/LogsQL.md | 11 +++-- lib/logstorage/filter_range.go | 11 +++++ lib/logstorage/parser.go | 40 +++++++-------- lib/logstorage/pipe_format_test.go | 11 +++++ lib/logstorage/pipe_math.go | 78 ++++++++++++++++++++++++++---- lib/logstorage/pipe_math_test.go | 27 +++++++++++ 7 files changed, 146 insertions(+), 36 deletions(-) diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 7448bef9d..8a99318ae 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -19,7 +19,9 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip -* FEATURE: add ability to format numeric fields into string representation of time, duration and ipv4 with [`format` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe). +* FEATURE: add support for bitwise `and`, `or` and `xor` operations at [`math` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#math-pipe). +* FEATURE: add support for automatic conversion of [RFC3339 time](https://www.rfc-editor.org/rfc/rfc3339) and IPv4 addresses into numeric representation at [`math` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#math-pipe). +* FEATURE: add ability to format numeric fields into string representation of time, duration and IPv4 with [`format` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe). * FEATURE: set `format` field to `rfc3164` or `rfc5424` depending on the [Syslog format](https://en.wikipedia.org/wiki/Syslog) parsed via [`unpack_syslog` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe). ## [v0.16.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.16.0-victorialogs) diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index 2140b5a6a..fb3371fb6 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -1657,6 +1657,9 @@ The following mathematical operations are supported by `math` pipe: - `arg1 / arg2` - divides `arg1` by `arg2` - `arg1 % arg2` - returns the remainder of the division of `arg1` by `arg2` - `arg1 ^ arg2` - returns the power of `arg1` by `arg2` +- `arg1 & arg2` - returns bitwise `and` for `arg1` and `arg2`. It is expected that `arg1` and `arg2` are in the range `[0 .. 2^53-1]` +- `arg1 | arg2` - returns bitwise `or` for `arg1` and `arg2`. It is expected that `arg1` and `arg2` are in the range `[0 .. 2^53-1]` +- `arg1 xor arg2` - returns bitwise `xor` for `arg1` and `arg2`. It is expected that `arg1` and `arg2` are in the range `[0 .. 2^53-1]` - `arg1 default arg2` - returns `arg2` if `arg1` is non-[numeric](#numeric-values) or equals to `NaN` - `abs(arg)` - returns an absolute value for the given `arg` - `exp(arg)` - powers [`e`](https://en.wikipedia.org/wiki/E_(mathematical_constant)) by `arg`. @@ -1669,9 +1672,11 @@ The following mathematical operations are supported by `math` pipe: Every `argX` argument in every mathematical operation can contain one of the following values: - The name of [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). For example, `errors_total / requests_total`. - If the log field contains value, which cannot be parsed into [supported numeric value](#numeric-values), then it is replaced with `NaN`. -- Any [supported numeric value](#numeric-values). For example, `response_size_bytes / 1MiB`. -- Another mathematical expression. Optionally, it may be put inside `(...)`. For example, `(a + b) * c`. + The log field is parsed into numeric value if it contains [supported numeric value](#numeric-values). The log field is parsed into [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) + in nanoseconds if it contains [rfc3339 time](https://www.rfc-editor.org/rfc/rfc3339). The log field is parsed into `uint32` number if it contains IPv4 address. + The log field is parsed into `NaN` in other cases. +- Any [supported numeric value](#numeric-values), [rfc3339 time](https://www.rfc-editor.org/rfc/rfc3339) or IPv4 address. For example, `1MiB`, `"2024-05-15T10:20:30.934324Z"` or `"12.34.56.78"`. +- Another mathematical expression, which can be put inside `(...)`. For example, `(a + b) * c`. See also: diff --git a/lib/logstorage/filter_range.go b/lib/logstorage/filter_range.go index 3ca6f0125..6186e5fb9 100644 --- a/lib/logstorage/filter_range.go +++ b/lib/logstorage/filter_range.go @@ -2,6 +2,7 @@ package logstorage import ( "math" + "strconv" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" ) @@ -312,6 +313,16 @@ func tryParseNumber(s string) (float64, bool) { if ok { return float64(bytes), true } + if isNumberPrefix(s) { + f, err := strconv.ParseFloat(s, 64) + if err == nil { + return f, true + } + n, err := strconv.ParseInt(s, 0, 64) + if err == nil { + return float64(n), true + } + } return 0, false } diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index 2f58a9111..1c2707991 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -1316,10 +1316,20 @@ func parseFloat64(lex *lexer) (float64, string, error) { // Try parsing s as integer. // This handles 0x..., 0b... and 0... prefixes, alongside '_' delimiters. - n, err := parseInt(s) + n, err := strconv.ParseInt(s, 0, 64) if err == nil { return float64(n), s, nil } + + nn, ok := tryParseBytes(s) + if ok { + return float64(nn), s, nil + } + nn, ok = tryParseDuration(s) + if ok { + return float64(nn), s, nil + } + return 0, "", fmt.Errorf("cannot parse %q as float64: %w", s, err) } @@ -1616,6 +1626,12 @@ func isNumberPrefix(s string) bool { return false } } + if s[0] == '.' { + s = s[1:] + if len(s) == 0 { + return false + } + } return s[0] >= '0' && s[0] <= '9' } @@ -1713,28 +1729,6 @@ func parseUint(s string) (uint64, error) { return uint64(nn), nil } -func parseInt(s string) (int64, error) { - switch { - case strings.EqualFold(s, "inf"), strings.EqualFold(s, "+inf"): - return math.MaxInt64, nil - case strings.EqualFold(s, "-inf"): - return math.MinInt64, nil - } - - n, err := strconv.ParseInt(s, 0, 64) - if err == nil { - return n, nil - } - nn, ok := tryParseBytes(s) - if !ok { - nn, ok = tryParseDuration(s) - if !ok { - return 0, fmt.Errorf("cannot parse %q as integer: %w", s, err) - } - } - return nn, nil -} - func nextafter(f, xInf float64) float64 { if math.IsInf(f, 0) { return f diff --git a/lib/logstorage/pipe_format_test.go b/lib/logstorage/pipe_format_test.go index d1ff5cf07..449139080 100644 --- a/lib/logstorage/pipe_format_test.go +++ b/lib/logstorage/pipe_format_test.go @@ -54,6 +54,11 @@ func TestPipeFormat(t *testing.T) { {"bar", `210123456789`}, {"baz", "1234567890"}, }, + { + {"foo", `abc`}, + {"bar", `de`}, + {"baz", "ghkl"}, + }, }, [][]Field{ { {"foo", `1717328141123456789`}, @@ -61,6 +66,12 @@ func TestPipeFormat(t *testing.T) { {"baz", "1234567890"}, {"x", "time=2024-06-02T11:35:41.123456789Z, duration=3m30.123456789s, ip=73.150.2.210"}, }, + { + {"foo", `abc`}, + {"bar", `de`}, + {"baz", "ghkl"}, + {"x", "time=abc, duration=de, ip=ghkl"}, + }, }) // skip_empty_results diff --git a/lib/logstorage/pipe_math.go b/lib/logstorage/pipe_math.go index 11bd5d5cd..5bfbc56a4 100644 --- a/lib/logstorage/pipe_math.go +++ b/lib/logstorage/pipe_math.go @@ -161,6 +161,18 @@ var mathBinaryOps = map[string]mathBinaryOp{ priority: 3, f: mathFuncMinus, }, + "&": { + priority: 4, + f: mathFuncAnd, + }, + "xor": { + priority: 5, + f: mathFuncXor, + }, + "|": { + priority: 6, + f: mathFuncOr, + }, "default": { priority: 10, f: mathFuncDefault, @@ -294,11 +306,7 @@ func (shard *pipeMathProcessorShard) executeExpr(me *mathExpr, br *blockResult) var f float64 for i, v := range values { if i == 0 || v != values[i-1] { - var ok bool - f, ok = tryParseFloat64(v) - if !ok { - f = nan - } + f = parseMathNumber(v) } r[i] = f } @@ -495,7 +503,7 @@ func parseMathExprOperand(lex *lexer) (*mathExpr, error) { // just skip unary plus lex.nextToken() return parseMathExprOperand(lex) - case lex.isNumber(): + case isNumberPrefix(lex.token): return parseMathExprConstNumber(lex) default: return parseMathExprFieldName(lex) @@ -637,15 +645,15 @@ func parseMathExprUnaryMinus(lex *lexer) (*mathExpr, error) { } func parseMathExprConstNumber(lex *lexer) (*mathExpr, error) { - if !lex.isNumber() { + if !isNumberPrefix(lex.token) { return nil, fmt.Errorf("cannot parse number from %q", lex.token) } numStr, err := getCompoundMathToken(lex) if err != nil { return nil, fmt.Errorf("cannot parse number: %w", err) } - f, ok := tryParseNumber(numStr) - if !ok { + f := parseMathNumber(numStr) + if math.IsNaN(f) { return nil, fmt.Errorf("cannot parse number from %q", numStr) } me := &mathExpr{ @@ -688,6 +696,42 @@ func getCompoundMathToken(lex *lexer) (string, error) { return rawS + suffix, nil } +func mathFuncAnd(result []float64, args [][]float64) { + a := args[0] + b := args[1] + for i := range result { + if math.IsNaN(a[i]) || math.IsNaN(b[i]) { + result[i] = nan + } else { + result[i] = float64(uint64(a[i]) & uint64(b[i])) + } + } +} + +func mathFuncOr(result []float64, args [][]float64) { + a := args[0] + b := args[1] + for i := range result { + if math.IsNaN(a[i]) || math.IsNaN(b[i]) { + result[i] = nan + } else { + result[i] = float64(uint64(a[i]) | uint64(b[i])) + } + } +} + +func mathFuncXor(result []float64, args [][]float64) { + a := args[0] + b := args[1] + for i := range result { + if math.IsNaN(a[i]) || math.IsNaN(b[i]) { + result[i] = nan + } else { + result[i] = float64(uint64(a[i]) ^ uint64(b[i])) + } + } +} + func mathFuncPlus(result []float64, args [][]float64) { a := args[0] b := args[1] @@ -829,3 +873,19 @@ func round(f, nearest float64) float64 { f, _ = math.Modf(f * p10) return f / p10 } + +func parseMathNumber(s string) float64 { + f, ok := tryParseNumber(s) + if ok { + return f + } + nsecs, ok := tryParseTimestampRFC3339Nano(s) + if ok { + return float64(nsecs) + } + ipNum, ok := tryParseIPv4(s) + if ok { + return float64(ipNum) + } + return nan +} diff --git a/lib/logstorage/pipe_math_test.go b/lib/logstorage/pipe_math_test.go index 7795263ac..c6d73488b 100644 --- a/lib/logstorage/pipe_math_test.go +++ b/lib/logstorage/pipe_math_test.go @@ -50,6 +50,33 @@ func TestPipeMath(t *testing.T) { expectPipeResults(t, pipeStr, rows, rowsExpected) } + f(`math + '2024-05-30T01:02:03Z' + 10e9 as time, + 10m5s + 10e9 as duration, + '123.45.67.89' + 1000 as ip, + time - time % time_step as time_rounded, + duration - duration % duration_step as duration_rounded, + (ip & ip_mask | 0x1234) xor 5678 as subnet + `, [][]Field{ + { + {"time_step", "30m"}, + {"duration_step", "30s"}, + {"ip_mask", "0xffffff00"}, + }, + }, [][]Field{ + { + {"time_step", "30m"}, + {"duration_step", "30s"}, + {"ip_mask", "0xffffff00"}, + {"time", "1717030933000000000"}, + {"duration", "615000000000"}, + {"ip", "2066564929"}, + {"time_rounded", "1717030800000000000"}, + {"duration_rounded", "600000000000"}, + {"subnet", "2066563354"}, + }, + }) + f("math b+1 as a, a*2 as b, b-10.5+c as c", [][]Field{ { {"a", "v1"},