From f9d0b21bb95e180a14d8abe1ded219d0be5d73f0 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 27 Apr 2024 21:15:56 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/CHANGELOG.md | 2 ++ docs/VictoriaLogs/LogsQL.md | 19 +++++++++++++++++++ lib/logstorage/filters.go | 4 +++- lib/logstorage/parser.go | 33 ++++++++++++++++++++++++++++++--- lib/logstorage/parser_test.go | 23 +++++++++++++++-------- lib/logstorage/pipes.go | 4 ++-- 6 files changed, 71 insertions(+), 14 deletions(-) diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index fd50f0c71..974f4dd08 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -24,6 +24,8 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta * FEATURE: add support for calculating the number of matching logs and the number of logs with non-empty [fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). Grouping by arbitrary set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) is supported. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats) for details. * FEATURE: add support for limiting the number of returned results. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#limiters). * FEATURE: optimize performance for [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/), which contains multiple filters for [words](https://docs.victoriametrics.com/victorialogs/logsql/#word-filter) or [phrases](https://docs.victoriametrics.com/victorialogs/logsql/#phrase-filter) delimited with [`AND` operator](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter). For example, `foo AND bar` query must find [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) with `foo` and `bar` words at faster speed. +* FEATURE: allow using `_` inside numbers. For example, `score:range[1_000, 5_000_000]` for [`range` filter](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter). +* FEATURE: allow numbers in hexadecimal and binary form. For example, `response_size:range[0xff, 0b10001101101]` for [`range` filter](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter). * BUGFIX: prevent from additional CPU usage for up to a few seconds after canceling the query. * BUGFIX: prevent from returning log entries with emtpy `_stream` field in the form `"_stream":""` in [search query results](https://docs.victoriametrics.com/victorialogs/querying/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6042). diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index be7cbeb18..265d74009 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -827,6 +827,12 @@ parentheses with square brackets. For example: - `range(1, 10]` includes `10` in the matching range - `range[1, 10]` includes `1` and `10` in the matching range +The range boundaries can be expressed in the following forms: + +- Hexadecimal form. For example, `range(0xff, 0xABCD)`. +- Binary form. Form example, `range(0b100110, 0b11111101)` +- Integer form with `_` delimiters for better readability. For example, `range(1_000, 2_345_678)`. + Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds` [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text. @@ -938,6 +944,19 @@ This query doesn't match the following log messages: - `foo`, since it is too short - `foo bar baz abc`, sinc it is too long +It is possible to use `inf` as the upper bound. For example, the following query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) +with the length bigger or equal to 5 chars: + +```logsql +len_range(5, inf) +``` + +The range boundaries can be expressed in the following forms: + +- Hexadecimal form. For example, `len_range(0xff, 0xABCD)`. +- Binary form. Form example, `len_range(0b100110, 0b11111101)` +- Integer form with `_` delimiters for better readability. For example, `len_range(1_000, 2_345_678)`. + By default the `len_range()` is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). Put the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `len_range()` in order to apply the filter to the needed field. For example, the following query matches log entries with the `foo` field length in the range `[10, 20]` chars: diff --git a/lib/logstorage/filters.go b/lib/logstorage/filters.go index acc638b3f..48b8c0fec 100644 --- a/lib/logstorage/filters.go +++ b/lib/logstorage/filters.go @@ -1083,10 +1083,12 @@ type lenRangeFilter struct { fieldName string minLen uint64 maxLen uint64 + + stringRepr string } func (rf *lenRangeFilter) String() string { - return quoteFieldNameIfNeeded(rf.fieldName) + fmt.Sprintf("len_range(%d,%d)", rf.minLen, rf.maxLen) + return quoteFieldNameIfNeeded(rf.fieldName) + "len_range" + rf.stringRepr } func (rf *lenRangeFilter) apply(bs *blockSearch, bm *filterBitmap) { diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index 3c16e50fb..bac0288a3 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -522,18 +522,21 @@ func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) { if len(args) != 2 { return nil, fmt.Errorf("unexpected number of args for %s(); got %d; want 2", funcName, len(args)) } - minLen, err := strconv.ParseUint(args[0], 10, 64) + minLen, err := parseUint(args[0]) if err != nil { return nil, fmt.Errorf("cannot parse minLen at %s(): %w", funcName, err) } - maxLen, err := strconv.ParseUint(args[1], 10, 64) + maxLen, err := parseUint(args[1]) if err != nil { return nil, fmt.Errorf("cannot parse maxLen at %s(): %w", funcName, err) } + stringRepr := "(" + args[0] + ", " + args[1] + ")" rf := &lenRangeFilter{ fieldName: fieldName, minLen: minLen, maxLen: maxLen, + + stringRepr: stringRepr, } return rf, nil }) @@ -715,7 +718,7 @@ func parseRangeFilter(lex *lexer, fieldName string) (filter, error) { stringRepr += "(" minValue = math.Nextafter(minValue, math.Inf(1)) } - stringRepr += minValueStr + "," + maxValueStr + stringRepr += minValueStr + ", " + maxValueStr if includeMaxValue { stringRepr += "]" } else { @@ -737,6 +740,12 @@ func parseFloat64(lex *lexer) (float64, string, error) { s := getCompoundToken(lex) f, err := strconv.ParseFloat(s, 64) if err != nil { + // Try parsing s as integer. + // This handles 0x..., 0b... and 0... prefixes. + n, err := parseInt(s) + if err == nil { + return float64(n), s, nil + } return 0, "", fmt.Errorf("cannot parse %q as float64: %w", lex.token, err) } return f, s, nil @@ -1170,3 +1179,21 @@ var reservedKeywords = func() map[string]struct{} { } return m }() + +func parseUint(s string) (uint64, error) { + if strings.EqualFold(s, "inf") || strings.EqualFold(s, "+inf") { + return math.MaxUint64, nil + } + return strconv.ParseUint(s, 0, 64) +} + +func parseInt(s string) (int64, error) { + switch { + case strings.EqualFold(s, "inf"), strings.EqualFold(s, "+inf"): + return math.MaxInt64, nil + case strings.EqualFold(s, "-inf"): + return math.MinInt64, nil + default: + return strconv.ParseInt(s, 0, 64) + } +} diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index 62605dd86..9fbb9ac5e 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -743,16 +743,23 @@ func TestParseQuerySuccess(t *testing.T) { f(`ipv4_range(1.2.3.4,)`, `ipv4_range(1.2.3.4, 1.2.3.4)`) // len_range filter - f(`len_range(10, 20)`, `len_range(10,20)`) - f(`foo:len_range("10", 20, )`, `foo:len_range(10,20)`) + f(`len_range(10, 20)`, `len_range(10, 20)`) + f(`foo:len_range("10", 20, )`, `foo:len_range(10, 20)`) + f(`len_RANGe(10, inf)`, `len_range(10, inf)`) + f(`len_range(10, +InF)`, `len_range(10, +InF)`) + f(`len_range(10, 1_000_000)`, `len_range(10, 1_000_000)`) + f(`len_range(0x10,0b100101)`, `len_range(0x10, 0b100101)`) // range filter - f(`range(1.234, 5656.43454)`, `range(1.234,5656.43454)`) - f(`foo:range(-2343.344, 2343.4343)`, `foo:range(-2343.344,2343.4343)`) - f(`range(-1.234e-5 , 2.34E+3)`, `range(-1.234e-5,2.34E+3)`) - f(`range[123, 456)`, `range[123,456)`) - f(`range(123, 445]`, `range(123,445]`) - f(`range("1.234e-4", -23)`, `range(1.234e-4,-23)`) + f(`range(1.234, 5656.43454)`, `range(1.234, 5656.43454)`) + f(`foo:range(-2343.344, 2343.4343)`, `foo:range(-2343.344, 2343.4343)`) + f(`range(-1.234e-5 , 2.34E+3)`, `range(-1.234e-5, 2.34E+3)`) + f(`range[123, 456)`, `range[123, 456)`) + f(`range(123, 445]`, `range(123, 445]`) + f(`range("1.234e-4", -23)`, `range(1.234e-4, -23)`) + f(`range(1_000, 0o7532)`, `range(1_000, 0o7532)`) + f(`range(0x1ff, inf)`, `range(0x1ff, inf)`) + f(`range(-INF,+inF)`, `range(-INF, +inF)`) // re filter f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`) diff --git a/lib/logstorage/pipes.go b/lib/logstorage/pipes.go index 1a4db2794..6afc417b0 100644 --- a/lib/logstorage/pipes.go +++ b/lib/logstorage/pipes.go @@ -829,7 +829,7 @@ func parseHeadPipe(lex *lexer) (*headPipe, error) { if !lex.mustNextToken() { return nil, fmt.Errorf("missing the number of head rows to return") } - n, err := strconv.ParseUint(lex.token, 10, 64) + n, err := parseUint(lex.token) if err != nil { return nil, fmt.Errorf("cannot parse the number of head rows to return %q: %w", lex.token, err) } @@ -896,7 +896,7 @@ func parseSkipPipe(lex *lexer) (*skipPipe, error) { if !lex.mustNextToken() { return nil, fmt.Errorf("missing the number of rows to skip") } - n, err := strconv.ParseUint(lex.token, 10, 64) + n, err := parseUint(lex.token) if err != nil { return nil, fmt.Errorf("cannot parse the number of rows to skip %q: %w", lex.token, err) }