This commit is contained in:
Aliaksandr Valialkin 2024-05-22 19:32:17 +02:00
parent b60cbd5c54
commit ff260230ea
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 59 additions and 49 deletions

View file

@ -19,6 +19,9 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip ## tip
* FEATURE: allow using `~"some_regexp"` [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter) instead of `re("some_regexp")`.
* FEATURE: allow using `="some phrase"` [exact filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) instead of `exact("some phrase")`.
* FEATURE: allow using `="some prefix"*` [exact prefix filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-prefix-filter) instead of `exact("some prefix"*)`.
* FEATURE: add ability to generate output fields according to the provided format string. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe). * FEATURE: add ability to generate output fields according to the provided format string. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe).
* FEATURE: add ability to extract fields with [`extract` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-extract). * FEATURE: add ability to extract fields with [`extract` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-extract).
* FEATURE: add ability to unpack JSON fields with [`unpack_json` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_json). * FEATURE: add ability to unpack JSON fields with [`unpack_json` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_json).

View file

@ -306,10 +306,10 @@ with `app` field equal to `nginx`:
_stream:{app="nginx"} _stream:{app="nginx"}
``` ```
This query is equivalent to the following [exact()](#exact-filter) query, but the upper query usually works much faster: This query is equivalent to the following [`exact` filter](#exact-filter) query, but the upper query usually works much faster:
```logsql ```logsql
app:exact("nginx") app:="nginx"
``` ```
Performance tips: Performance tips:
@ -449,7 +449,7 @@ This query matches the following [log messages](https://docs.victoriametrics.com
This query doesn't match the following log messages: This query doesn't match the following log messages:
- `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details. - `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details.
- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `re("err")` for this case. See [these docs](#regexp-filter) for details. - `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `~"err"` for this case. See [these docs](#regexp-filter) for details.
Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches
[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing phrases with `unexpected fail` prefix: [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing phrases with `unexpected fail` prefix:
@ -537,44 +537,38 @@ See also:
The [word filter](#word-filter) and [phrase filter](#phrase-filter) return [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), The [word filter](#word-filter) and [phrase filter](#phrase-filter) return [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
which contain the given word or phrase inside them. The message may contain additional text other than the requested word or phrase. If you need searching for log messages which contain the given word or phrase inside them. The message may contain additional text other than the requested word or phrase. If you need searching for log messages
or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact(...)` filter. or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact` filter.
For example, the following query returns log messages wih the exact value `fatal error: cannot find /foo/bar`: For example, the following query returns log messages wih the exact value `fatal error: cannot find /foo/bar`:
```logsql ```logsql
exact("fatal error: cannot find /foo/bar") ="fatal error: cannot find /foo/bar"
``` ```
The query doesn't match the following log messages: The query doesn't match the following log messages:
- `fatal error: cannot find /foo/bar/baz` or `some-text fatal error: cannot find /foo/bar`, since they contain an additional text - `fatal error: cannot find /foo/bar/baz` or `some-text fatal error: cannot find /foo/bar`, since they contain an additional text
other than the specified in the `exact()` filter. Use `"fatal error: cannot find /foo/bar"` query in this case. See [these docs](#phrase-filter) for details. other than the specified in the `exact` filter. Use `"fatal error: cannot find /foo/bar"` query in this case. See [these docs](#phrase-filter) for details.
- `FATAL ERROR: cannot find /foo/bar`, since the `exact()` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case. - `FATAL ERROR: cannot find /foo/bar`, since the `exact` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case.
See [these docs](#case-insensitive-filter) for details. See [these docs](#case-insensitive-filter) for details.
By default the `exact()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact()` filter and put a colon after it Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact` filter and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries with the exact `error` value at `log.level` field: if it must be searched in the given field. For example, the following query returns log entries with the exact `error` value at `log.level` field:
```logsql ```logsql
log.level:exact("error") log.level:="error"
``` ```
Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example: Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example:
```logsql ```logsql
log.уровень:exact("ошибка") log.уровень:="ошибка"
``` ```
The field name can be put inside quotes if it contains special chars, which may clash with the query syntax. The field name can be put inside quotes if it contains special chars, which may clash with the query syntax.
For example, the following query matches the `error` value in the field `log:level`: For example, the following query matches the `error` value in the field `log:level`:
```logsql
"log:level":exact("error")
```
The `exact(...)` filter can be replaced with `=...` for convenience. For example, the following query is equivalent to the previous one:
```logsql ```logsql
"log:level":="error" "log:level":="error"
``` ```
@ -591,11 +585,11 @@ See also:
### Exact prefix filter ### Exact prefix filter
Sometimes it is needed to find log messages starting with some prefix. This can be done with the `exact("prefix"*)` filter. Sometimes it is needed to find log messages starting with some prefix. This can be done with the `="prefix"*` filter.
For example, the following query matches log messages, which start from `Processing request` prefix: For example, the following query matches log messages, which start from `Processing request` prefix:
```logsql ```logsql
exact("Processing request"*) ="Processing request"*
``` ```
This filter matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field): This filter matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
@ -605,34 +599,28 @@ This filter matches the following [log messages](https://docs.victoriametrics.co
It doesn't match the following log messages: It doesn't match the following log messages:
- `processing request foobar`, since the log message starts with lowercase `p`. Use `exact("processing request"*) OR exact("Processing request"*)` - `processing request foobar`, since the log message starts with lowercase `p`. Use `="processing request"* OR ="Processing request"*`
query in this case. See [these docs](#logical-filter) for details. query in this case. See [these docs](#logical-filter) for details.
- `start: Processing request`, since the log message doesn't start with `Processing request`. Use `"Processing request"` query in this case. - `start: Processing request`, since the log message doesn't start with `Processing request`. Use `"Processing request"` query in this case.
See [these docs](#phrase-filter) for details. See [these docs](#phrase-filter) for details.
By default the `exact()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact()` filter and put a colon after it Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact` filter and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries with `log.level` field, which starts with `err` prefix: if it must be searched in the given field. For example, the following query returns log entries with `log.level` field, which starts with `err` prefix:
```logsql ```logsql
log.level:exact("err"*) log.level:="err"*
``` ```
Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example: Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example:
```logsql ```logsql
log.уровень:exact("ошиб"*) log.уровень:="ошиб"*
``` ```
The field name can be put inside quotes if it contains special chars, which may clash with the query syntax. The field name can be put inside quotes if it contains special chars, which may clash with the query syntax.
For example, the following query matches `log:level` values starting with `err` prefix: For example, the following query matches `log:level` values starting with `err` prefix:
```logsql
"log:level":exact("err"*)
```
The `exact(...)` filter can be replaced with `=...` for convenience. For example, the following query is equivalent to the previous one:
```logsql ```logsql
"log:level":="err"* "log:level":="err"*
``` ```
@ -653,7 +641,7 @@ combined into a single [logical filter](#logical-filter). For example, the follo
containing either `error` or `fatal` exact values: containing either `error` or `fatal` exact values:
```logsql ```logsql
log.level:(exact("error") OR exact("fatal")) log.level:(="error" OR ="fatal")
``` ```
While this solution works OK, LogsQL provides simpler and faster solution for this case - the `in()` filter. While this solution works OK, LogsQL provides simpler and faster solution for this case - the `in()` filter.
@ -702,7 +690,7 @@ The query matches the following [log messages](https://docs.victoriametrics.com/
The query doesn't match the following log messages: The query doesn't match the following log messages:
- `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `re("(?i)error")` for this case. See [these docs](#regexp-filter) for details. - `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `~"(?i)error"` for this case. See [these docs](#regexp-filter) for details.
- `too many Errors`, since the `Errors` [word](#word) has superflouos suffix `s`. Use `i(error*)` for this case. - `too many Errors`, since the `Errors` [word](#word) has superflouos suffix `s`. Use `i(error*)` for this case.
By default the `i()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). By default the `i()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
@ -776,11 +764,11 @@ See also:
### Regexp filter ### Regexp filter
LogsQL supports regular expression filter with [re2 syntax](https://github.com/google/re2/wiki/Syntax) via `re(...)` expression. LogsQL supports regular expression filter with [re2 syntax](https://github.com/google/re2/wiki/Syntax) via `~"regex"` syntax.
For example, the following query returns all the log messages containing `err` or `warn` susbstrings: For example, the following query returns all the log messages containing `err` or `warn` susbstrings:
```logsql ```logsql
re("err|warn") ~"err|warn"
``` ```
The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain either `err` or `warn` substrings: The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain either `err` or `warn` substrings:
@ -791,33 +779,33 @@ The query matches the following [log messages](https://docs.victoriametrics.com/
The query doesn't match the following log messages: The query doesn't match the following log messages:
- `ERROR: cannot open file`, since the `ERROR` word is in uppercase letters. Use `re("(?i)(err|warn)")` query for case-insensitive regexp search. - `ERROR: cannot open file`, since the `ERROR` word is in uppercase letters. Use `~"(?i)(err|warn)"` query for case-insensitive regexp search.
See [these docs](https://github.com/google/re2/wiki/Syntax) for details. See also [case-insenstive filter docs](#case-insensitive-filter). See [these docs](https://github.com/google/re2/wiki/Syntax) for details. See also [case-insenstive filter docs](#case-insensitive-filter).
- `it is warmer than usual`, since it doesn't contain neither `err` nor `warn` substrings. - `it is warmer than usual`, since it doesn't contain neither `err` nor `warn` substrings.
By default the `re()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field). By default the regexp filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
in order to apply it to the given field. For example, the following query matches `event.original` field containing either `err` or `warn` substrings: in order to apply it to the given field. For example, the following query matches `event.original` field containing either `err` or `warn` substrings:
```logsql ```logsql
event.original:re("err|warn") event.original:~"err|warn"
``` ```
If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query. If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query.
For example, the following query matches `event:original` field containing either `err` or `warn` substrings: For example, the following query matches `event:original` field containing either `err` or `warn` substrings:
```logsql ```logsql
"event:original":re("err|warn") "event:original":~"err|warn"
``` ```
Performance tips: Performance tips:
- Prefer combining simple [word filter](#word-filter) with [logical filter](#logical-filter) instead of using regexp filter. - Prefer combining simple [word filter](#word-filter) with [logical filter](#logical-filter) instead of using regexp filter.
For example, the `re("error|warning")` query can be substituted with `error OR warning` query, which usually works much faster. For example, the `~"error|warning"` query can be substituted with `error OR warning` query, which usually works much faster.
Note that the `re("error|warning")` matches `errors` as well as `warnings` [words](#word), while `error OR warning` matches Note that the `~"error|warning"` matches `errors` as well as `warnings` [words](#word), while `error OR warning` matches
only the specified [words](#word). See also [multi-exact filter](#multi-exact-filter). only the specified [words](#word). See also [multi-exact filter](#multi-exact-filter).
- Prefer moving the regexp filter to the end of the [logical filter](#logical-filter), so lightweighter filters are executed first. - Prefer moving the regexp filter to the end of the [logical filter](#logical-filter), so lightweighter filters are executed first.
- Prefer using `exact("some prefix"*)` instead of `re("^some prefix")`, since the [exact()](#exact-prefix-filter) works much faster than the `re()` filter. - Prefer using `="some prefix"*` instead of `~"^some prefix"`, since the [`exact` filter](#exact-prefix-filter) works much faster than the regexp filter.
- See [other performance tips](#performance-tips). - See [other performance tips](#performance-tips).
See also: See also:
@ -1043,7 +1031,7 @@ Performance tips:
while moving less specific and the slowest filters (such as [regexp filter](#regexp-filter) and [case-insensitive filter](#case-insensitive-filter)) while moving less specific and the slowest filters (such as [regexp filter](#regexp-filter) and [case-insensitive filter](#case-insensitive-filter))
to the right. For example, if you need to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) to the right. For example, if you need to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
with the `error` word, which match some `/foo/(bar|baz)` regexp, with the `error` word, which match some `/foo/(bar|baz)` regexp,
it is better from performance PoV to use the query `error re("/foo/(bar|baz)")` instead of `re("/foo/(bar|baz)") error`. it is better from performance PoV to use the query `error ~"/foo/(bar|baz)"` instead of `~"/foo/(bar|baz)" error`.
The most specific filter means that it matches the lowest number of log entries comparing to other filters. The most specific filter means that it matches the lowest number of log entries comparing to other filters.

View file

@ -16,7 +16,7 @@ type filterRegexp struct {
} }
func (fr *filterRegexp) String() string { func (fr *filterRegexp) String() string {
return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String()) return fmt.Sprintf("%s~%q", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String())
} }
func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) { func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) {

View file

@ -597,6 +597,8 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
return parseFilterLT(lex, fieldName) return parseFilterLT(lex, fieldName)
case lex.isKeyword("="): case lex.isKeyword("="):
return parseFilterEQ(lex, fieldName) return parseFilterEQ(lex, fieldName)
case lex.isKeyword("~"):
return parseFilterTilda(lex, fieldName)
case lex.isKeyword("not", "!"): case lex.isKeyword("not", "!"):
return parseFilterNot(lex, fieldName) return parseFilterNot(lex, fieldName)
case lex.isKeyword("exact"): case lex.isKeyword("exact"):
@ -1017,6 +1019,20 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
}) })
} }
func parseFilterTilda(lex *lexer, fieldName string) (filter, error) {
lex.nextToken()
arg := getCompoundFuncArg(lex)
re, err := regexp.Compile(arg)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q: %w", arg, err)
}
fr := &filterRegexp{
fieldName: fieldName,
re: re,
}
return fr, nil
}
func parseFilterEQ(lex *lexer, fieldName string) (filter, error) { func parseFilterEQ(lex *lexer, fieldName string) (filter, error) {
lex.nextToken() lex.nextToken()
phrase := getCompoundFuncArg(lex) phrase := getCompoundFuncArg(lex)
@ -1024,13 +1040,13 @@ func parseFilterEQ(lex *lexer, fieldName string) (filter, error) {
lex.nextToken() lex.nextToken()
f := &filterExactPrefix{ f := &filterExactPrefix{
fieldName: fieldName, fieldName: fieldName,
prefix: phrase, prefix: phrase,
} }
return f, nil return f, nil
} }
f := &filterExact{ f := &filterExact{
fieldName: fieldName, fieldName: fieldName,
value: phrase, value: phrase,
} }
return f, nil return f, nil
} }

View file

@ -571,7 +571,7 @@ func TestParseQuerySuccess(t *testing.T) {
f(`foo:(bar baz or not :xxx)`, `foo:bar foo:baz or !foo:xxx`) f(`foo:(bar baz or not :xxx)`, `foo:bar foo:baz or !foo:xxx`)
f(`(foo:bar and (foo:baz or aa:bb) and xx) and y`, `foo:bar (foo:baz or aa:bb) xx y`) f(`(foo:bar and (foo:baz or aa:bb) and xx) and y`, `foo:bar (foo:baz or aa:bb) xx y`)
f("level:error and _msg:(a or b)", "level:error (a or b)") f("level:error and _msg:(a or b)", "level:error (a or b)")
f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:re("foo") !bar`) f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:~"foo" !bar`)
f("!(foo bar or baz and not aa*)", `!(foo bar or baz !aa*)`) f("!(foo bar or baz and not aa*)", `!(foo bar or baz !aa*)`)
// prefix search // prefix search
@ -761,9 +761,12 @@ func TestParseQuerySuccess(t *testing.T) {
f(`>=10 <20`, `>=10 <20`) f(`>=10 <20`, `>=10 <20`)
// re filter // re filter
f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`) f("re('foo|ba(r.+)')", `~"foo|ba(r.+)"`)
f("re(foo)", `re("foo")`) f("re(foo)", `~"foo"`)
f(`foo:re(foo-bar/baz.)`, `foo:re("foo-bar/baz.")`) f(`foo:re(foo-bar/baz.)`, `foo:~"foo-bar/baz."`)
f(`~foo.bar.baz`, `~"foo.bar.baz"`)
f(`foo:~~foo~ba/ba>z`, `foo:~"~foo~ba/ba>z"`)
f(`foo:~'.*'`, `foo:~".*"`)
// seq filter // seq filter
f(`seq()`, `seq()`) f(`seq()`, `seq()`)