From ff260230ea33419287e6c4886532fe81da130220 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 22 May 2024 19:32:17 +0200 Subject: [PATCH] wip --- docs/VictoriaLogs/CHANGELOG.md | 3 ++ docs/VictoriaLogs/LogsQL.md | 72 ++++++++++++++------------------- lib/logstorage/filter_regexp.go | 2 +- lib/logstorage/parser.go | 20 ++++++++- lib/logstorage/parser_test.go | 11 +++-- 5 files changed, 59 insertions(+), 49 deletions(-) diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 66971ff6a..238a0f1aa 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -19,6 +19,9 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta ## tip +* FEATURE: allow using `~"some_regexp"` [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter) instead of `re("some_regexp")`. +* FEATURE: allow using `="some phrase"` [exact filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) instead of `exact("some phrase")`. +* FEATURE: allow using `="some prefix"*` [exact prefix filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-prefix-filter) instead of `exact("some prefix"*)`. * FEATURE: add ability to generate output fields according to the provided format string. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe). * FEATURE: add ability to extract fields with [`extract` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-extract). * FEATURE: add ability to unpack JSON fields with [`unpack_json` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_json). diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index 994170adf..81ca6807a 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -306,10 +306,10 @@ with `app` field equal to `nginx`: _stream:{app="nginx"} ``` -This query is equivalent to the following [exact()](#exact-filter) query, but the upper query usually works much faster: +This query is equivalent to the following [`exact` filter](#exact-filter) query, but the upper query usually works much faster: ```logsql -app:exact("nginx") +app:="nginx" ``` Performance tips: @@ -449,7 +449,7 @@ This query matches the following [log messages](https://docs.victoriametrics.com This query doesn't match the following log messages: - `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details. -- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `re("err")` for this case. See [these docs](#regexp-filter) for details. +- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `~"err"` for this case. See [these docs](#regexp-filter) for details. Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing phrases with `unexpected fail` prefix: @@ -537,44 +537,38 @@ See also: The [word filter](#word-filter) and [phrase filter](#phrase-filter) return [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain the given word or phrase inside them. The message may contain additional text other than the requested word or phrase. If you need searching for log messages -or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact(...)` filter. +or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact` filter. For example, the following query returns log messages wih the exact value `fatal error: cannot find /foo/bar`: ```logsql -exact("fatal error: cannot find /foo/bar") +="fatal error: cannot find /foo/bar" ``` The query doesn't match the following log messages: - `fatal error: cannot find /foo/bar/baz` or `some-text fatal error: cannot find /foo/bar`, since they contain an additional text - other than the specified in the `exact()` filter. Use `"fatal error: cannot find /foo/bar"` query in this case. See [these docs](#phrase-filter) for details. + other than the specified in the `exact` filter. Use `"fatal error: cannot find /foo/bar"` query in this case. See [these docs](#phrase-filter) for details. -- `FATAL ERROR: cannot find /foo/bar`, since the `exact()` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case. +- `FATAL ERROR: cannot find /foo/bar`, since the `exact` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case. See [these docs](#case-insensitive-filter) for details. -By default the `exact()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). -Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact()` filter and put a colon after it +By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact` filter and put a colon after it if it must be searched in the given field. For example, the following query returns log entries with the exact `error` value at `log.level` field: ```logsql -log.level:exact("error") +log.level:="error" ``` Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example: ```logsql -log.уровень:exact("ошибка") +log.уровень:="ошибка" ``` The field name can be put inside quotes if it contains special chars, which may clash with the query syntax. For example, the following query matches the `error` value in the field `log:level`: -```logsql -"log:level":exact("error") -``` - -The `exact(...)` filter can be replaced with `=...` for convenience. For example, the following query is equivalent to the previous one: - ```logsql "log:level":="error" ``` @@ -591,11 +585,11 @@ See also: ### Exact prefix filter -Sometimes it is needed to find log messages starting with some prefix. This can be done with the `exact("prefix"*)` filter. +Sometimes it is needed to find log messages starting with some prefix. This can be done with the `="prefix"*` filter. For example, the following query matches log messages, which start from `Processing request` prefix: ```logsql -exact("Processing request"*) +="Processing request"* ``` This filter matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): @@ -605,34 +599,28 @@ This filter matches the following [log messages](https://docs.victoriametrics.co It doesn't match the following log messages: -- `processing request foobar`, since the log message starts with lowercase `p`. Use `exact("processing request"*) OR exact("Processing request"*)` +- `processing request foobar`, since the log message starts with lowercase `p`. Use `="processing request"* OR ="Processing request"*` query in this case. See [these docs](#logical-filter) for details. - `start: Processing request`, since the log message doesn't start with `Processing request`. Use `"Processing request"` query in this case. See [these docs](#phrase-filter) for details. -By default the `exact()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). -Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact()` filter and put a colon after it +By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact` filter and put a colon after it if it must be searched in the given field. For example, the following query returns log entries with `log.level` field, which starts with `err` prefix: ```logsql -log.level:exact("err"*) +log.level:="err"* ``` Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example: ```logsql -log.уровень:exact("ошиб"*) +log.уровень:="ошиб"* ``` The field name can be put inside quotes if it contains special chars, which may clash with the query syntax. For example, the following query matches `log:level` values starting with `err` prefix: -```logsql -"log:level":exact("err"*) -``` - -The `exact(...)` filter can be replaced with `=...` for convenience. For example, the following query is equivalent to the previous one: - ```logsql "log:level":="err"* ``` @@ -653,7 +641,7 @@ combined into a single [logical filter](#logical-filter). For example, the follo containing either `error` or `fatal` exact values: ```logsql -log.level:(exact("error") OR exact("fatal")) +log.level:(="error" OR ="fatal") ``` While this solution works OK, LogsQL provides simpler and faster solution for this case - the `in()` filter. @@ -702,7 +690,7 @@ The query matches the following [log messages](https://docs.victoriametrics.com/ The query doesn't match the following log messages: -- `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `re("(?i)error")` for this case. See [these docs](#regexp-filter) for details. +- `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `~"(?i)error"` for this case. See [these docs](#regexp-filter) for details. - `too many Errors`, since the `Errors` [word](#word) has superflouos suffix `s`. Use `i(error*)` for this case. By default the `i()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). @@ -776,11 +764,11 @@ See also: ### Regexp filter -LogsQL supports regular expression filter with [re2 syntax](https://github.com/google/re2/wiki/Syntax) via `re(...)` expression. +LogsQL supports regular expression filter with [re2 syntax](https://github.com/google/re2/wiki/Syntax) via `~"regex"` syntax. For example, the following query returns all the log messages containing `err` or `warn` susbstrings: ```logsql -re("err|warn") +~"err|warn" ``` The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain either `err` or `warn` substrings: @@ -791,33 +779,33 @@ The query matches the following [log messages](https://docs.victoriametrics.com/ The query doesn't match the following log messages: -- `ERROR: cannot open file`, since the `ERROR` word is in uppercase letters. Use `re("(?i)(err|warn)")` query for case-insensitive regexp search. +- `ERROR: cannot open file`, since the `ERROR` word is in uppercase letters. Use `~"(?i)(err|warn)"` query for case-insensitive regexp search. See [these docs](https://github.com/google/re2/wiki/Syntax) for details. See also [case-insenstive filter docs](#case-insensitive-filter). - `it is warmer than usual`, since it doesn't contain neither `err` nor `warn` substrings. -By default the `re()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). +By default the regexp filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter in order to apply it to the given field. For example, the following query matches `event.original` field containing either `err` or `warn` substrings: ```logsql -event.original:re("err|warn") +event.original:~"err|warn" ``` If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query. For example, the following query matches `event:original` field containing either `err` or `warn` substrings: ```logsql -"event:original":re("err|warn") +"event:original":~"err|warn" ``` Performance tips: - Prefer combining simple [word filter](#word-filter) with [logical filter](#logical-filter) instead of using regexp filter. - For example, the `re("error|warning")` query can be substituted with `error OR warning` query, which usually works much faster. - Note that the `re("error|warning")` matches `errors` as well as `warnings` [words](#word), while `error OR warning` matches + For example, the `~"error|warning"` query can be substituted with `error OR warning` query, which usually works much faster. + Note that the `~"error|warning"` matches `errors` as well as `warnings` [words](#word), while `error OR warning` matches only the specified [words](#word). See also [multi-exact filter](#multi-exact-filter). - Prefer moving the regexp filter to the end of the [logical filter](#logical-filter), so lightweighter filters are executed first. -- Prefer using `exact("some prefix"*)` instead of `re("^some prefix")`, since the [exact()](#exact-prefix-filter) works much faster than the `re()` filter. +- Prefer using `="some prefix"*` instead of `~"^some prefix"`, since the [`exact` filter](#exact-prefix-filter) works much faster than the regexp filter. - See [other performance tips](#performance-tips). See also: @@ -1043,7 +1031,7 @@ Performance tips: while moving less specific and the slowest filters (such as [regexp filter](#regexp-filter) and [case-insensitive filter](#case-insensitive-filter)) to the right. For example, if you need to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the `error` word, which match some `/foo/(bar|baz)` regexp, - it is better from performance PoV to use the query `error re("/foo/(bar|baz)")` instead of `re("/foo/(bar|baz)") error`. + it is better from performance PoV to use the query `error ~"/foo/(bar|baz)"` instead of `~"/foo/(bar|baz)" error`. The most specific filter means that it matches the lowest number of log entries comparing to other filters. diff --git a/lib/logstorage/filter_regexp.go b/lib/logstorage/filter_regexp.go index 31b383826..4d8ad0245 100644 --- a/lib/logstorage/filter_regexp.go +++ b/lib/logstorage/filter_regexp.go @@ -16,7 +16,7 @@ type filterRegexp struct { } func (fr *filterRegexp) String() string { - return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String()) + return fmt.Sprintf("%s~%q", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String()) } func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) { diff --git a/lib/logstorage/parser.go b/lib/logstorage/parser.go index 04e9927e2..536b0016e 100644 --- a/lib/logstorage/parser.go +++ b/lib/logstorage/parser.go @@ -597,6 +597,8 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) { return parseFilterLT(lex, fieldName) case lex.isKeyword("="): return parseFilterEQ(lex, fieldName) + case lex.isKeyword("~"): + return parseFilterTilda(lex, fieldName) case lex.isKeyword("not", "!"): return parseFilterNot(lex, fieldName) case lex.isKeyword("exact"): @@ -1017,6 +1019,20 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) { }) } +func parseFilterTilda(lex *lexer, fieldName string) (filter, error) { + lex.nextToken() + arg := getCompoundFuncArg(lex) + re, err := regexp.Compile(arg) + if err != nil { + return nil, fmt.Errorf("invalid regexp %q: %w", arg, err) + } + fr := &filterRegexp{ + fieldName: fieldName, + re: re, + } + return fr, nil +} + func parseFilterEQ(lex *lexer, fieldName string) (filter, error) { lex.nextToken() phrase := getCompoundFuncArg(lex) @@ -1024,13 +1040,13 @@ func parseFilterEQ(lex *lexer, fieldName string) (filter, error) { lex.nextToken() f := &filterExactPrefix{ fieldName: fieldName, - prefix: phrase, + prefix: phrase, } return f, nil } f := &filterExact{ fieldName: fieldName, - value: phrase, + value: phrase, } return f, nil } diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index 75e7f5f19..6e8c458a6 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -571,7 +571,7 @@ func TestParseQuerySuccess(t *testing.T) { f(`foo:(bar baz or not :xxx)`, `foo:bar foo:baz or !foo:xxx`) f(`(foo:bar and (foo:baz or aa:bb) and xx) and y`, `foo:bar (foo:baz or aa:bb) xx y`) f("level:error and _msg:(a or b)", "level:error (a or b)") - f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:re("foo") !bar`) + f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:~"foo" !bar`) f("!(foo bar or baz and not aa*)", `!(foo bar or baz !aa*)`) // prefix search @@ -761,9 +761,12 @@ func TestParseQuerySuccess(t *testing.T) { f(`>=10 <20`, `>=10 <20`) // re filter - f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`) - f("re(foo)", `re("foo")`) - f(`foo:re(foo-bar/baz.)`, `foo:re("foo-bar/baz.")`) + f("re('foo|ba(r.+)')", `~"foo|ba(r.+)"`) + f("re(foo)", `~"foo"`) + f(`foo:re(foo-bar/baz.)`, `foo:~"foo-bar/baz."`) + f(`~foo.bar.baz`, `~"foo.bar.baz"`) + f(`foo:~~foo~ba/ba>z`, `foo:~"~foo~ba/ba>z"`) + f(`foo:~'.*'`, `foo:~".*"`) // seq filter f(`seq()`, `seq()`)