This commit is contained in:
Aliaksandr Valialkin 2024-05-22 19:32:17 +02:00
parent b60cbd5c54
commit ff260230ea
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
5 changed files with 59 additions and 49 deletions

View file

@ -19,6 +19,9 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
## tip
* FEATURE: allow using `~"some_regexp"` [regexp filter](https://docs.victoriametrics.com/victorialogs/logsql/#regexp-filter) instead of `re("some_regexp")`.
* FEATURE: allow using `="some phrase"` [exact filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-filter) instead of `exact("some phrase")`.
* FEATURE: allow using `="some prefix"*` [exact prefix filter](https://docs.victoriametrics.com/victorialogs/logsql/#exact-prefix-filter) instead of `exact("some prefix"*)`.
* FEATURE: add ability to generate output fields according to the provided format string. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#format-pipe).
* FEATURE: add ability to extract fields with [`extract` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-extract).
* FEATURE: add ability to unpack JSON fields with [`unpack_json` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe) only if the given condition is met. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-unpack_json).

View file

@ -306,10 +306,10 @@ with `app` field equal to `nginx`:
_stream:{app="nginx"}
```
This query is equivalent to the following [exact()](#exact-filter) query, but the upper query usually works much faster:
This query is equivalent to the following [`exact` filter](#exact-filter) query, but the upper query usually works much faster:
```logsql
app:exact("nginx")
app:="nginx"
```
Performance tips:
@ -449,7 +449,7 @@ This query matches the following [log messages](https://docs.victoriametrics.com
This query doesn't match the following log messages:
- `Error: foobar`, since the `Error` [word](#word) starts with capital letter. Use `i(err*)` for this case. See [these docs](#case-insensitive-filter) for details.
- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `re("err")` for this case. See [these docs](#regexp-filter) for details.
- `fooerror`, since the `fooerror` [word](#word) doesn't start with `err`. Use `~"err"` for this case. See [these docs](#regexp-filter) for details.
Prefix filter can be applied to [phrases](#phrase-filter). For example, the following query matches
[log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) containing phrases with `unexpected fail` prefix:
@ -537,44 +537,38 @@ See also:
The [word filter](#word-filter) and [phrase filter](#phrase-filter) return [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
which contain the given word or phrase inside them. The message may contain additional text other than the requested word or phrase. If you need searching for log messages
or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact(...)` filter.
or [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field) with the exact value, then use the `exact` filter.
For example, the following query returns log messages wih the exact value `fatal error: cannot find /foo/bar`:
```logsql
exact("fatal error: cannot find /foo/bar")
="fatal error: cannot find /foo/bar"
```
The query doesn't match the following log messages:
- `fatal error: cannot find /foo/bar/baz` or `some-text fatal error: cannot find /foo/bar`, since they contain an additional text
other than the specified in the `exact()` filter. Use `"fatal error: cannot find /foo/bar"` query in this case. See [these docs](#phrase-filter) for details.
other than the specified in the `exact` filter. Use `"fatal error: cannot find /foo/bar"` query in this case. See [these docs](#phrase-filter) for details.
- `FATAL ERROR: cannot find /foo/bar`, since the `exact()` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case.
- `FATAL ERROR: cannot find /foo/bar`, since the `exact` filter is case-sensitive. Use `i("fatal error: cannot find /foo/bar")` in this case.
See [these docs](#case-insensitive-filter) for details.
By default the `exact()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact()` filter and put a colon after it
By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact` filter and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries with the exact `error` value at `log.level` field:
```logsql
log.level:exact("error")
log.level:="error"
```
Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example:
```logsql
log.уровень:exact("ошибка")
log.уровень:="ошибка"
```
The field name can be put inside quotes if it contains special chars, which may clash with the query syntax.
For example, the following query matches the `error` value in the field `log:level`:
```logsql
"log:level":exact("error")
```
The `exact(...)` filter can be replaced with `=...` for convenience. For example, the following query is equivalent to the previous one:
```logsql
"log:level":="error"
```
@ -591,11 +585,11 @@ See also:
### Exact prefix filter
Sometimes it is needed to find log messages starting with some prefix. This can be done with the `exact("prefix"*)` filter.
Sometimes it is needed to find log messages starting with some prefix. This can be done with the `="prefix"*` filter.
For example, the following query matches log messages, which start from `Processing request` prefix:
```logsql
exact("Processing request"*)
="Processing request"*
```
This filter matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field):
@ -605,34 +599,28 @@ This filter matches the following [log messages](https://docs.victoriametrics.co
It doesn't match the following log messages:
- `processing request foobar`, since the log message starts with lowercase `p`. Use `exact("processing request"*) OR exact("Processing request"*)`
- `processing request foobar`, since the log message starts with lowercase `p`. Use `="processing request"* OR ="Processing request"*`
query in this case. See [these docs](#logical-filter) for details.
- `start: Processing request`, since the log message doesn't start with `Processing request`. Use `"Processing request"` query in this case.
See [these docs](#phrase-filter) for details.
By default the `exact()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact()` filter and put a colon after it
By default the `exact` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the `exact` filter and put a colon after it
if it must be searched in the given field. For example, the following query returns log entries with `log.level` field, which starts with `err` prefix:
```logsql
log.level:exact("err"*)
log.level:="err"*
```
Both the field name and the phrase can contain arbitrary [utf-8](https://en.wikipedia.org/wiki/UTF-8)-encoded chars. For example:
```logsql
log.уровень:exact("ошиб"*)
log.уровень:="ошиб"*
```
The field name can be put inside quotes if it contains special chars, which may clash with the query syntax.
For example, the following query matches `log:level` values starting with `err` prefix:
```logsql
"log:level":exact("err"*)
```
The `exact(...)` filter can be replaced with `=...` for convenience. For example, the following query is equivalent to the previous one:
```logsql
"log:level":="err"*
```
@ -653,7 +641,7 @@ combined into a single [logical filter](#logical-filter). For example, the follo
containing either `error` or `fatal` exact values:
```logsql
log.level:(exact("error") OR exact("fatal"))
log.level:(="error" OR ="fatal")
```
While this solution works OK, LogsQL provides simpler and faster solution for this case - the `in()` filter.
@ -702,7 +690,7 @@ The query matches the following [log messages](https://docs.victoriametrics.com/
The query doesn't match the following log messages:
- `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `re("(?i)error")` for this case. See [these docs](#regexp-filter) for details.
- `FooError`, since the `FooError` [word](#word) has superflouos prefix `Foo`. Use `~"(?i)error"` for this case. See [these docs](#regexp-filter) for details.
- `too many Errors`, since the `Errors` [word](#word) has superflouos suffix `s`. Use `i(error*)` for this case.
By default the `i()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
@ -776,11 +764,11 @@ See also:
### Regexp filter
LogsQL supports regular expression filter with [re2 syntax](https://github.com/google/re2/wiki/Syntax) via `re(...)` expression.
LogsQL supports regular expression filter with [re2 syntax](https://github.com/google/re2/wiki/Syntax) via `~"regex"` syntax.
For example, the following query returns all the log messages containing `err` or `warn` susbstrings:
```logsql
re("err|warn")
~"err|warn"
```
The query matches the following [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), which contain either `err` or `warn` substrings:
@ -791,33 +779,33 @@ The query matches the following [log messages](https://docs.victoriametrics.com/
The query doesn't match the following log messages:
- `ERROR: cannot open file`, since the `ERROR` word is in uppercase letters. Use `re("(?i)(err|warn)")` query for case-insensitive regexp search.
- `ERROR: cannot open file`, since the `ERROR` word is in uppercase letters. Use `~"(?i)(err|warn)"` query for case-insensitive regexp search.
See [these docs](https://github.com/google/re2/wiki/Syntax) for details. See also [case-insenstive filter docs](#case-insensitive-filter).
- `it is warmer than usual`, since it doesn't contain neither `err` nor `warn` substrings.
By default the `re()` filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
By default the regexp filter is applied to the [`_msg` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field).
Specify the needed [field name](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) in front of the filter
in order to apply it to the given field. For example, the following query matches `event.original` field containing either `err` or `warn` substrings:
```logsql
event.original:re("err|warn")
event.original:~"err|warn"
```
If the field name contains special chars, which may clash with the query syntax, then it may be put into quotes in the query.
For example, the following query matches `event:original` field containing either `err` or `warn` substrings:
```logsql
"event:original":re("err|warn")
"event:original":~"err|warn"
```
Performance tips:
- Prefer combining simple [word filter](#word-filter) with [logical filter](#logical-filter) instead of using regexp filter.
For example, the `re("error|warning")` query can be substituted with `error OR warning` query, which usually works much faster.
Note that the `re("error|warning")` matches `errors` as well as `warnings` [words](#word), while `error OR warning` matches
For example, the `~"error|warning"` query can be substituted with `error OR warning` query, which usually works much faster.
Note that the `~"error|warning"` matches `errors` as well as `warnings` [words](#word), while `error OR warning` matches
only the specified [words](#word). See also [multi-exact filter](#multi-exact-filter).
- Prefer moving the regexp filter to the end of the [logical filter](#logical-filter), so lightweighter filters are executed first.
- Prefer using `exact("some prefix"*)` instead of `re("^some prefix")`, since the [exact()](#exact-prefix-filter) works much faster than the `re()` filter.
- Prefer using `="some prefix"*` instead of `~"^some prefix"`, since the [`exact` filter](#exact-prefix-filter) works much faster than the regexp filter.
- See [other performance tips](#performance-tips).
See also:
@ -1043,7 +1031,7 @@ Performance tips:
while moving less specific and the slowest filters (such as [regexp filter](#regexp-filter) and [case-insensitive filter](#case-insensitive-filter))
to the right. For example, if you need to find [log messages](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field)
with the `error` word, which match some `/foo/(bar|baz)` regexp,
it is better from performance PoV to use the query `error re("/foo/(bar|baz)")` instead of `re("/foo/(bar|baz)") error`.
it is better from performance PoV to use the query `error ~"/foo/(bar|baz)"` instead of `~"/foo/(bar|baz)" error`.
The most specific filter means that it matches the lowest number of log entries comparing to other filters.

View file

@ -16,7 +16,7 @@ type filterRegexp struct {
}
func (fr *filterRegexp) String() string {
return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String())
return fmt.Sprintf("%s~%q", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String())
}
func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) {

View file

@ -597,6 +597,8 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
return parseFilterLT(lex, fieldName)
case lex.isKeyword("="):
return parseFilterEQ(lex, fieldName)
case lex.isKeyword("~"):
return parseFilterTilda(lex, fieldName)
case lex.isKeyword("not", "!"):
return parseFilterNot(lex, fieldName)
case lex.isKeyword("exact"):
@ -1017,6 +1019,20 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
})
}
func parseFilterTilda(lex *lexer, fieldName string) (filter, error) {
lex.nextToken()
arg := getCompoundFuncArg(lex)
re, err := regexp.Compile(arg)
if err != nil {
return nil, fmt.Errorf("invalid regexp %q: %w", arg, err)
}
fr := &filterRegexp{
fieldName: fieldName,
re: re,
}
return fr, nil
}
func parseFilterEQ(lex *lexer, fieldName string) (filter, error) {
lex.nextToken()
phrase := getCompoundFuncArg(lex)
@ -1024,13 +1040,13 @@ func parseFilterEQ(lex *lexer, fieldName string) (filter, error) {
lex.nextToken()
f := &filterExactPrefix{
fieldName: fieldName,
prefix: phrase,
prefix: phrase,
}
return f, nil
}
f := &filterExact{
fieldName: fieldName,
value: phrase,
value: phrase,
}
return f, nil
}

View file

@ -571,7 +571,7 @@ func TestParseQuerySuccess(t *testing.T) {
f(`foo:(bar baz or not :xxx)`, `foo:bar foo:baz or !foo:xxx`)
f(`(foo:bar and (foo:baz or aa:bb) and xx) and y`, `foo:bar (foo:baz or aa:bb) xx y`)
f("level:error and _msg:(a or b)", "level:error (a or b)")
f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:re("foo") !bar`)
f("level: ( ((error or warn*) and re(foo))) (not (bar))", `(level:error or level:warn*) level:~"foo" !bar`)
f("!(foo bar or baz and not aa*)", `!(foo bar or baz !aa*)`)
// prefix search
@ -761,9 +761,12 @@ func TestParseQuerySuccess(t *testing.T) {
f(`>=10 <20`, `>=10 <20`)
// re filter
f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`)
f("re(foo)", `re("foo")`)
f(`foo:re(foo-bar/baz.)`, `foo:re("foo-bar/baz.")`)
f("re('foo|ba(r.+)')", `~"foo|ba(r.+)"`)
f("re(foo)", `~"foo"`)
f(`foo:re(foo-bar/baz.)`, `foo:~"foo-bar/baz."`)
f(`~foo.bar.baz`, `~"foo.bar.baz"`)
f(`foo:~~foo~ba/ba>z`, `foo:~"~foo~ba/ba>z"`)
f(`foo:~'.*'`, `foo:~".*"`)
// seq filter
f(`seq()`, `seq()`)