mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/logstorage: support for short form of _time:(now-duration, now] filter: _time:duration
This commit is contained in:
parent
8fdfd13a29
commit
6751a08071
4 changed files with 107 additions and 45 deletions
|
@ -66,7 +66,7 @@ For example, the following query returns logs with the `error` [word](#word),
|
|||
which were ingested into VictoriaLogs during the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
error AND _time:[now-5m,now]
|
||||
error AND _time:5m
|
||||
```
|
||||
|
||||
This query consists of two [filters](#filters) joined with `AND` [operator](#logical-filter):
|
||||
|
@ -80,7 +80,7 @@ Typical LogsQL query constists of multiple [filters](#filters) joined with `AND`
|
|||
So LogsQL allows omitting `AND` words. For example, the following query is equivalent to the query above:
|
||||
|
||||
```logsql
|
||||
error _time:[now-5m,now]
|
||||
error _time:5m
|
||||
```
|
||||
|
||||
The query returns the following [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) by default:
|
||||
|
@ -95,34 +95,34 @@ then just refer them in the query with `field_name:*` [filter](#any-value-filter
|
|||
For example, the following query returns `host.hostname` field additionally to `_msg`, `_stream` and `_time` fields:
|
||||
|
||||
```logsql
|
||||
error _time:[now-5m,now] host.hostname:*
|
||||
error _time:5m host.hostname:*
|
||||
```
|
||||
|
||||
Suppose the query above selects too many rows because some buggy app pushes invalid error logs to VictoriaLogs. Suppose the app adds `buggy_app` [word](#word) to every log line.
|
||||
Then the following query removes all the logs from the buggy app, allowing us paying attention to the real errors:
|
||||
|
||||
```logsql
|
||||
_time:[now-5m,now] error NOT buggy_app
|
||||
_time:5m error NOT buggy_app
|
||||
```
|
||||
|
||||
This query uses `NOT` [operator](#logical-filter) for removing log lines from the buggy app. The `NOT` operator is used frequently, so it can be substituted with `!` char.
|
||||
So the following query is equivalent to the previous one:
|
||||
|
||||
```logsql
|
||||
_time:[now-5m,now] error !buggy_app
|
||||
_time:5m error !buggy_app
|
||||
```
|
||||
|
||||
Suppose another buggy app starts pushing invalid error logs to VictoriaLogs - it adds `foobar` [word](#word) to every emitted log line.
|
||||
No problems - just add `!foobar` to the query in order to remove these buggy logs:
|
||||
|
||||
```logsql
|
||||
_time:[now-5m,now] error !buggy_app !foobar
|
||||
_time:5m error !buggy_app !foobar
|
||||
```
|
||||
|
||||
This query can be rewritten to more clear query with the `OR` [operator](#logical-filter) inside parentheses:
|
||||
|
||||
```logsql
|
||||
_time:[now-5m,now] error !(buggy_app OR foobar)
|
||||
_time:5m error !(buggy_app OR foobar)
|
||||
```
|
||||
|
||||
Note that the parentheses are required here, since otherwise the query won't return the expected results.
|
||||
|
@ -138,26 +138,26 @@ This word can be stored in other [field](https://docs.victoriametrics.com/Victor
|
|||
How to select error logs in this case? Just add the `log.level:` prefix in front of the `error` word:
|
||||
|
||||
```logsq
|
||||
_time:[now-5m,now] log.level:error !(buggy_app OR foobar)
|
||||
_time:5m log.level:error !(buggy_app OR foobar)
|
||||
```
|
||||
|
||||
The field name can be wrapped into quotes if it contains special chars or keywords, which may clash with LogsQL syntax.
|
||||
Any [word](#word) also can be wrapped into quotes. So the following query is equivalent to the previous one:
|
||||
|
||||
```logsql
|
||||
"_time":[now-5m,now] "log.level":"error" !("buggy_app" OR "foobar")
|
||||
"_time":"5m" "log.level":"error" !("buggy_app" OR "foobar")
|
||||
```
|
||||
|
||||
What if the application identifier - such as `buggy_app` and `foobar` - is stored in the `app` field? Correct - just add `app:` prefix in front of `buggy_app` and `foobar`:
|
||||
|
||||
```logsql
|
||||
_time:[now-5m,now] log.level:error !(app:buggy_app OR app:foobar)
|
||||
_time:5m log.level:error !(app:buggy_app OR app:foobar)
|
||||
```
|
||||
|
||||
The query can be simplified by moving the `app:` prefix outside the parentheses:
|
||||
|
||||
```logsql
|
||||
_time:[now-5m,now] log.level:error !app:(buggy_app OR foobar)
|
||||
_time:5m log.level:error !app:(buggy_app OR foobar)
|
||||
```
|
||||
|
||||
The `app` field uniquely identifies the application instance if a single instance runs per each unique `app`.
|
||||
|
@ -167,7 +167,7 @@ and query performance when querying the needed streams via [`_stream` filter](#s
|
|||
If the `app` field is associated with the log stream, then the query above can be rewritten to more performant one:
|
||||
|
||||
```logsql
|
||||
_time:[now-5m,now] log.level:error _stream:{app!~"buggy_app|foobar"}
|
||||
_time:5m log.level:error _stream:{app!~"buggy_app|foobar"}
|
||||
```
|
||||
|
||||
This query completely skips scanning for logs from `buggy_app` and `foobar` apps, thus significantly reducing disk read IO and CPU time
|
||||
|
@ -254,29 +254,27 @@ For example, the following query returns [log messages](https://docs.victoriamet
|
|||
ingested into VictoriaLogs during the last hour, which contain the `error` [word](#word):
|
||||
|
||||
```logsql
|
||||
_time:(now-1h, now) AND error
|
||||
_time:1h AND error
|
||||
```
|
||||
|
||||
The following formats are supported for `_time` filter:
|
||||
|
||||
- Fixed time:
|
||||
- `_time:YYYY-MM-DD` - matches all the log messages for the particular day. For example, `_time:2023-04-25` matches all the log messages for April 25, 2023 by UTC.
|
||||
- `_time:YYYY-MM` - matches all the log messages for the particular month. For example, `_time:2023-02` matches all the log messages for February, 2023 by UTC.
|
||||
- `_time:YYYY` - matches all the log messages for the particular year. For example, `_time:2023` matches all the log message for 2023 by UTC.
|
||||
- `_time:YYYY-MM-DDTHH` - matches all the log messages for the particular hour. For example, `_time:2023-04-25T22` matches all the log messages from `22:00` to `23:00`
|
||||
on April 25, 2023 by UTC.
|
||||
- `_time:YYYY-MM-DDTHH:MM` - matches all the log messages for the particular minute. For example, `_time:2023-04-25T22:45` matches all the log messages from `22:45` to `22:46`
|
||||
on April 25, 2023 by UTC.
|
||||
- `_time:YYYY-MM-DDTHH:MM:SS` - matches all the log messages for the particular second. For example, `_time:2023-04-25T22:45:59` matches all the log messages
|
||||
from `22:45:59` to `23:46:00` on April 25, 2023 by UTC.
|
||||
|
||||
- Time range:
|
||||
- `_time:[min_time, max_time]` - matches log messages on the time range `[min_time, max_time]`, including both `min_time` and `max_time`.
|
||||
- `_time:duration`, the equivalent to `_time:(now-duration, now]`. It matches logs with timestamps on the time range `(now-duration, now]`. Examples:
|
||||
- `_time:5m` - returns logs for the last 5 minutes
|
||||
- `_time:2.5d15m42.345s` - returns logs for the last 2.5 days, 15 minutes and 42.345 seconds
|
||||
- `_time:1y` - returns logs for the last year
|
||||
- `_time:YYYY-MM-DD` - matches all the logs for the particular day by UTC. For example, `_time:2023-04-25`.
|
||||
- `_time:YYYY-MM` - matches all the logs for the particular month by UTC. For example, `_time:2023-02`.
|
||||
- `_time:YYYY` - matches all the logs for the particular year by UTC. For example, `_time:2023`.
|
||||
- `_time:YYYY-MM-DDTHH` - matches all the logs for the particular hour by UTC. For example, `_time:2023-04-25T22`.
|
||||
- `_time:YYYY-MM-DDTHH:MM` - matches all the logs for the particular minute by UTC. For example, `_time:2023-04-25T22:45`.
|
||||
- `_time:YYYY-MM-DDTHH:MM:SS` - matches all the logs for the particular second by UTC. For example, `_time:2023-04-25T22:45:59`.
|
||||
- `_time:[min_time, max_time]` - matches logs on the time range `[min_time, max_time]`, including both `min_time` and `max_time`.
|
||||
The `min_time` and `max_time` can contain any format specified [here](https://docs.victoriametrics.com/#timestamp-formats).
|
||||
For example, `_time:[2023-04-01, 2023-04-30]` matches log messages for the whole April, 2023 by UTC, e.g. it is equivalent to `_time:2023-04`.
|
||||
- `_time:[min_time, max_time)` - matches log messages on the time range `[min_time, max_time)`, not including `max_time`.
|
||||
For example, `_time:[2023-04-01, 2023-04-30]` matches logs for the whole April, 2023 by UTC, e.g. it is equivalent to `_time:2023-04`.
|
||||
- `_time:[min_time, max_time)` - matches logs on the time range `[min_time, max_time)`, not including `max_time`.
|
||||
The `min_time` and `max_time` can contain any format specified [here](https://docs.victoriametrics.com/#timestamp-formats).
|
||||
For example, `_time:[2023-02-01, 2023-03-01)` matches log messages for the whole February, 2023 by UTC, e.g. it is equivalent to `_time:2023-02`.
|
||||
For example, `_time:[2023-02-01, 2023-03-01)` matches logs for the whole February, 2023 by UTC, e.g. it is equivalent to `_time:2023-02`.
|
||||
|
||||
It is possible to specify time zone offset for all the absolute time formats by appending `+hh:mm` or `-hh:mm` suffix.
|
||||
For example, `_time:2023-04-25+05:30` matches all the log messages on April 25, 2023 by India time zone,
|
||||
|
@ -285,7 +283,7 @@ while `_time:2023-02-07:00` matches all the log messages from February, 2023 by
|
|||
Performance tips:
|
||||
|
||||
- It is recommended specifying the smallest possible time range during the search, since it reduces the amounts of log entries, which need to be scanned during the query.
|
||||
For example, `_time:[now-1h, now]` is usually faster than `_time:[now-5h, now]`.
|
||||
For example, `_time:1h` is usually faster than `_time:5h`.
|
||||
|
||||
- While LogsQL supports arbitrary number of `_time:...` filters at any level of [logical filters](#logical-filter),
|
||||
it is recommended specifying a single `_time` filter at the top level of the query.
|
||||
|
@ -661,7 +659,7 @@ For example, the following query selects all the logs for the last hour for user
|
|||
during the last day:
|
||||
|
||||
```logsql
|
||||
_time:[now-1h,now] AND user_id:in(_time:[now-1d,now] AND path:admin | fields user_id)
|
||||
_time:1h AND user_id:in(_time:1d AND path:admin | fields user_id)
|
||||
```
|
||||
|
||||
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
|
||||
|
|
|
@ -168,7 +168,7 @@ received from [streams](https://docs.victoriametrics.com/VictoriaLogs/keyConcept
|
|||
during the last 5 minutes:
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=_stream:{app="nginx"} AND _time:[now-5m,now] AND error' | wc -l
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=_stream:{app="nginx"} AND _time:5m AND error' | wc -l
|
||||
```
|
||||
|
||||
See [these docs](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-filter) about `_stream` filter,
|
||||
|
@ -194,7 +194,7 @@ The following example calculates stats on the number of log messages received du
|
|||
grouped by `log.level` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model):
|
||||
|
||||
```bash
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=_time:[now-5m,now] log.level:*' | jq -r '."log.level"' | sort | uniq -c
|
||||
curl http://localhost:9428/select/logsql/query -d 'query=_time:5m log.level:*' | jq -r '."log.level"' | sort | uniq -c
|
||||
```
|
||||
|
||||
The query selects all the log messages with non-empty `log.level` field via ["any value" filter](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#any-value-filter),
|
||||
|
|
|
@ -488,7 +488,7 @@ func parseAnyCaseFilter(lex *lexer, fieldName string) (filter, error) {
|
|||
})
|
||||
}
|
||||
|
||||
func parseFuncArgMaybePrefix(lex *lexer, funcName, fieldName string, f func(arg string, isPrefiFilter bool) (filter, error)) (filter, error) {
|
||||
func parseFuncArgMaybePrefix(lex *lexer, funcName, fieldName string, callback func(arg string, isPrefiFilter bool) (filter, error)) (filter, error) {
|
||||
phrase := lex.token
|
||||
lex.nextToken()
|
||||
if !lex.isKeyword("(") {
|
||||
|
@ -510,7 +510,7 @@ func parseFuncArgMaybePrefix(lex *lexer, funcName, fieldName string, f func(arg
|
|||
return nil, fmt.Errorf("unexpected token %q instead of ')' in %s()", lex.token, funcName)
|
||||
}
|
||||
lex.nextToken()
|
||||
return f(phrase, isPrefixFilter)
|
||||
return callback(phrase, isPrefixFilter)
|
||||
}
|
||||
|
||||
func parseLenRangeFilter(lex *lexer, fieldName string) (filter, error) {
|
||||
|
@ -781,6 +781,25 @@ func parseFuncArgs(lex *lexer, fieldName string, callback func(args []string) (f
|
|||
return callback(args)
|
||||
}
|
||||
|
||||
// startsWithYear returns true if s starts from YYYY
|
||||
func startsWithYear(s string) bool {
|
||||
if len(s) < 4 {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < 4; i++ {
|
||||
c := s[i]
|
||||
if c < '0' || c > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
s = s[4:]
|
||||
if len(s) == 0 {
|
||||
return true
|
||||
}
|
||||
c := s[0]
|
||||
return c == '-' || c == '+' || c == 'Z' || c == 'z'
|
||||
}
|
||||
|
||||
func parseTimeFilter(lex *lexer) (*timeFilter, error) {
|
||||
startTimeInclude := false
|
||||
switch {
|
||||
|
@ -789,17 +808,36 @@ func parseTimeFilter(lex *lexer) (*timeFilter, error) {
|
|||
case lex.isKeyword("("):
|
||||
startTimeInclude = false
|
||||
default:
|
||||
// Try parsing '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)'
|
||||
startTime, stringRepr, err := parseTime(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse _time filter: %w", err)
|
||||
}
|
||||
endTime := getMatchingEndTime(startTime, stringRepr)
|
||||
tf := &timeFilter{
|
||||
minTimestamp: startTime,
|
||||
maxTimestamp: endTime,
|
||||
s := getCompoundToken(lex)
|
||||
if strings.ToLower(s) == "now" || startsWithYear(s) {
|
||||
// Parse '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)'
|
||||
t, err := promutils.ParseTimeAt(s, float64(lex.currentTimestamp)/1e9)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse _time filter: %w", err)
|
||||
}
|
||||
startTime := int64(t * 1e9)
|
||||
endTime := getMatchingEndTime(startTime, s)
|
||||
tf := &timeFilter{
|
||||
minTimestamp: startTime,
|
||||
maxTimestamp: endTime,
|
||||
|
||||
stringRepr: stringRepr,
|
||||
stringRepr: s,
|
||||
}
|
||||
return tf, nil
|
||||
}
|
||||
// Parse _time:duration, which transforms to '_time:(now-duration, now]'
|
||||
d, err := promutils.ParseDuration(s)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse duration in _time filter: %w", err)
|
||||
}
|
||||
if d < 0 {
|
||||
d = -d
|
||||
}
|
||||
tf := &timeFilter{
|
||||
minTimestamp: lex.currentTimestamp - int64(d),
|
||||
maxTimestamp: lex.currentTimestamp,
|
||||
|
||||
stringRepr: s,
|
||||
}
|
||||
return tf, nil
|
||||
}
|
||||
|
|
|
@ -79,6 +79,30 @@ func TestNewStreamFilterFailure(t *testing.T) {
|
|||
f("{foo='bar' baz='x'}")
|
||||
}
|
||||
|
||||
func TestParseTimeDuration(t *testing.T) {
|
||||
f := func(s string, durationExpected time.Duration) {
|
||||
t.Helper()
|
||||
q, err := ParseQuery("_time:" + s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
tf, ok := q.f.(*timeFilter)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected filter; got %T; want *timeFilter; filter: %s", q.f, q.f)
|
||||
}
|
||||
if tf.stringRepr != s {
|
||||
t.Fatalf("unexpected string represenation for timeFilter; got %q; want %q", tf.stringRepr, s)
|
||||
}
|
||||
duration := time.Duration(tf.maxTimestamp - tf.minTimestamp)
|
||||
if duration != durationExpected {
|
||||
t.Fatalf("unexpected duration; got %s; want %s", duration, durationExpected)
|
||||
}
|
||||
}
|
||||
f("5m", 5*time.Minute)
|
||||
f("-5.5m", 5*time.Minute + 30*time.Second)
|
||||
f("3d2h12m34s45ms", 3*24*time.Hour + 2*time.Hour+12*time.Minute+34*time.Second + 45*time.Millisecond)
|
||||
}
|
||||
|
||||
func TestParseTimeRange(t *testing.T) {
|
||||
f := func(s string, minTimestampExpected, maxTimestampExpected int64) {
|
||||
t.Helper()
|
||||
|
@ -606,6 +630,7 @@ func TestParseQuerySuccess(t *testing.T) {
|
|||
f(`_time:2023-01-02T04:05:06.789-02:30`, `_time:2023-01-02T04:05:06.789-02:30`)
|
||||
f(`_time:2023-01-02T04:05:06.789+02:30`, `_time:2023-01-02T04:05:06.789+02:30`)
|
||||
f(`_time:[1234567890, 1400000000]`, `_time:[1234567890,1400000000]`)
|
||||
f(`_time:2d3h5.5m3s45ms`, `_time:2d3h5.5m3s45ms`)
|
||||
|
||||
// reserved keywords
|
||||
f("and", `"and"`)
|
||||
|
@ -837,6 +862,7 @@ func TestParseQueryFailure(t *testing.T) {
|
|||
f("_time:[2023-01-02T04:05:06+12,2023]")
|
||||
f("_time:[2023-01-02T04:05:06-12,2023]")
|
||||
f("_time:2023-01-02T04:05:06.789")
|
||||
f("_time:234foo")
|
||||
|
||||
// long query with error
|
||||
f(`very long query with error aaa ffdfd fdfdfd fdfd:( ffdfdfdfdfd`)
|
||||
|
|
Loading…
Reference in a new issue