From 037652d5aee069289bd762e7dcab5b808735af77 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 26 Sep 2024 12:43:16 +0200 Subject: [PATCH] app/vlinsert: support `_time` field without timezone information during data ingestion Use local timezone of the host server in this case. The timezone can be overridden with TZ environment variable if needed. While at it, allow using whitespace instead of T as a delimiter between data and time in the ingested _time field. For example, '2024-09-20 10:20:30' is now accepted during data ingestion. This is valid ISO8601 format, which is used by some log shippers, so it should be supported. This format is also known as SQL datetime format. Also assume local time zone when time without timezone information is passed to querying APIs. Previously such a time was parsed in UTC timezone. Add `Z` to the end of the time string if the old behaviour is preferred. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6721 --- .../elasticsearch/elasticsearch_test.go | 4 +- app/vlinsert/insertutils/timestamp_test.go | 3 - app/vlinsert/jsonline/jsonline_test.go | 6 +- app/vmctl/utils/time_test.go | 25 ++++----- docs/README.md | 6 +- docs/VictoriaLogs/CHANGELOG.md | 2 + docs/VictoriaLogs/LogsQL.md | 23 ++++---- docs/VictoriaLogs/data-ingestion/README.md | 12 ++-- docs/VictoriaLogs/keyConcepts.md | 5 +- docs/VictoriaLogs/querying/README.md | 4 +- lib/httputils/time_test.go | 23 ++++---- lib/logstorage/parser_test.go | 24 +++----- lib/logstorage/syslog_parser.go | 2 +- lib/logstorage/values_encoder.go | 55 +++++++++++-------- lib/logstorage/values_encoder_test.go | 10 ++-- lib/promutils/time.go | 10 ++++ lib/promutils/time_test.go | 6 -- lib/timeutil/timezone.go | 30 ++++++++++ 18 files changed, 144 insertions(+), 106 deletions(-) create mode 100644 lib/timeutil/timezone.go diff --git a/app/vlinsert/elasticsearch/elasticsearch_test.go b/app/vlinsert/elasticsearch/elasticsearch_test.go index 1874fdefc..ca88d2ccc 100644 --- a/app/vlinsert/elasticsearch/elasticsearch_test.go +++ b/app/vlinsert/elasticsearch/elasticsearch_test.go @@ -76,14 +76,14 @@ func TestReadBulkRequest_Success(t *testing.T) { data := `{"create":{"_index":"filebeat-8.8.0"}} {"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"} {"create":{"_index":"filebeat-8.8.0"}} -{"@timestamp":"2023-06-06T04:48:12.735Z","message":"baz"} +{"@timestamp":"2023-06-06 04:48:12.735+01:00","message":"baz"} {"index":{"_index":"filebeat-8.8.0"}} {"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"} ` timeField := "@timestamp" msgField := "message" rowsExpected := 3 - timestampsExpected := []int64{1686026891735000000, 1686026892735000000, 1686026893735000000} + timestampsExpected := []int64{1686026891735000000, 1686023292735000000, 1686026893735000000} resultExpected := `{"@timestamp":"","log.offset":"71770","log.file.path":"/var/log/auth.log","_msg":"foobar"} {"@timestamp":"","_msg":"baz"} {"_msg":"xyz","@timestamp":"","x":"y"}` diff --git a/app/vlinsert/insertutils/timestamp_test.go b/app/vlinsert/insertutils/timestamp_test.go index ca7f9d572..a386f09fa 100644 --- a/app/vlinsert/insertutils/timestamp_test.go +++ b/app/vlinsert/insertutils/timestamp_test.go @@ -66,9 +66,6 @@ func TestExtractTimestampRFC3339NanoFromFields_Error(t *testing.T) { f("foobar") - // no Z at the end - f("2024-06-18T23:37:20") - // incomplete time f("2024-06-18") f("2024-06-18T23:37") diff --git a/app/vlinsert/jsonline/jsonline_test.go b/app/vlinsert/jsonline/jsonline_test.go index 153b4db19..068bfb92f 100644 --- a/app/vlinsert/jsonline/jsonline_test.go +++ b/app/vlinsert/jsonline/jsonline_test.go @@ -23,13 +23,13 @@ func TestProcessStreamInternal_Success(t *testing.T) { } data := `{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"} -{"@timestamp":"2023-06-06T04:48:12.735Z","message":"baz"} -{"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"} +{"@timestamp":"2023-06-06T04:48:12.735+01:00","message":"baz"} +{"message":"xyz","@timestamp":"2023-06-06 04:48:13.735Z","x":"y"} ` timeField := "@timestamp" msgField := "message" rowsExpected := 3 - timestampsExpected := []int64{1686026891735000000, 1686026892735000000, 1686026893735000000} + timestampsExpected := []int64{1686026891735000000, 1686023292735000000, 1686026893735000000} resultExpected := `{"@timestamp":"","log.offset":"71770","log.file.path":"/var/log/auth.log","_msg":"foobar"} {"@timestamp":"","_msg":"baz"} {"_msg":"xyz","@timestamp":"","x":"y"}` diff --git a/app/vmctl/utils/time_test.go b/app/vmctl/utils/time_test.go index b469a5899..b2210c9e2 100644 --- a/app/vmctl/utils/time_test.go +++ b/app/vmctl/utils/time_test.go @@ -36,40 +36,37 @@ func TestGetTime_Success(t *testing.T) { } // only year - f("2019", time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC)) + f("2019Z", time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC)) // year and month - f("2019-01", time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC)) + f("2019-01Z", time.Date(2019, 1, 1, 0, 0, 0, 0, time.UTC)) // year and not first month - f("2019-02", time.Date(2019, 2, 1, 0, 0, 0, 0, time.UTC)) + f("2019-02Z", time.Date(2019, 2, 1, 0, 0, 0, 0, time.UTC)) // year, month and day - f("2019-02-01", time.Date(2019, 2, 1, 0, 0, 0, 0, time.UTC)) + f("2019-02-01Z", time.Date(2019, 2, 1, 0, 0, 0, 0, time.UTC)) // year, month and not first day - f("2019-02-10", time.Date(2019, 2, 10, 0, 0, 0, 0, time.UTC)) + f("2019-02-10Z", time.Date(2019, 2, 10, 0, 0, 0, 0, time.UTC)) // year, month, day and time - f("2019-02-02T00", time.Date(2019, 2, 2, 0, 0, 0, 0, time.UTC)) + f("2019-02-02T00Z", time.Date(2019, 2, 2, 0, 0, 0, 0, time.UTC)) // year, month, day and one hour time - f("2019-02-02T01", time.Date(2019, 2, 2, 1, 0, 0, 0, time.UTC)) + f("2019-02-02T01Z", time.Date(2019, 2, 2, 1, 0, 0, 0, time.UTC)) // time with zero minutes - f("2019-02-02T01:00", time.Date(2019, 2, 2, 1, 0, 0, 0, time.UTC)) + f("2019-02-02T01:00Z", time.Date(2019, 2, 2, 1, 0, 0, 0, time.UTC)) // time with one minute - f("2019-02-02T01:01", time.Date(2019, 2, 2, 1, 1, 0, 0, time.UTC)) + f("2019-02-02T01:01Z", time.Date(2019, 2, 2, 1, 1, 0, 0, time.UTC)) // time with zero seconds - f("2019-02-02T01:01:00", time.Date(2019, 2, 2, 1, 1, 0, 0, time.UTC)) + f("2019-02-02T01:01:00Z", time.Date(2019, 2, 2, 1, 1, 0, 0, time.UTC)) // timezone with one second - f("2019-02-02T01:01:01", time.Date(2019, 2, 2, 1, 1, 1, 0, time.UTC)) - - // time with two second and timezone - f("2019-07-07T20:01:02Z", time.Date(2019, 7, 7, 20, 1, 02, 0, time.UTC)) + f("2019-02-02T01:01:01Z", time.Date(2019, 2, 2, 1, 1, 1, 0, time.UTC)) // time with seconds and timezone f("2019-07-07T20:47:40+03:00", func() time.Time { diff --git a/docs/README.md b/docs/README.md index 33e02541c..f0e2e1af1 100644 --- a/docs/README.md +++ b/docs/README.md @@ -974,11 +974,11 @@ in [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series). - Unix timestamps in milliseconds. For example, `1562529662678`. - [RFC3339](https://www.ietf.org/rfc/rfc3339.txt). For example, `2022-03-29T01:02:03Z` or `2022-03-29T01:02:03+02:30`. - Partial RFC3339. Examples: `2022`, `2022-03`, `2022-03-29`, `2022-03-29T01`, `2022-03-29T01:02`, `2022-03-29T01:02:03`. - The partial RFC3339 time is in UTC timezone by default. It is possible to specify timezone there by adding `+hh:mm` or `-hh:mm` suffix to partial time. - For example, `2022-03-01+06:30` is `2022-03-01` at `06:30` timezone. + The partial RFC3339 time is in local timezone of the host where VictoriaMetrics runs. + It is possible to specify the needed timezone by adding `Z` (UTC), `+hh:mm` or `-hh:mm` suffix to partial time. + For example, `2022-03-01Z` corresponds to the given date in UTC timezone, while `2022-03-01+06:30` corresponds to `2022-03-01` date at `06:30` timezone. - Relative duration comparing to the current time. For example, `1h5m`, `-1h5m` or `now-1h5m` means `one hour and five minutes ago`, while `now` means `now`. - ## Graphite API usage VictoriaMetrics supports data ingestion in Graphite protocol - see [these docs](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) for details. diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index df52a6060..6402685a4 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -19,10 +19,12 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta * FEATURE: drop logs without [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) field or with empty `_msg` field, since this field is required to be non-empty in [VictoriaLogs data model](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785). * FEATURE: improve performance of analytical queries, which do not need reading the `_time` field. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7070). * FEATURE: add [`blocks_count` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#blocks_count-pipe), which can be used for counting the number of matching blocks for the given query. For example, `_time:5m | blocks_count` returns the number of blocks with logs for the last 5 minutes. This pipe can be useful for debugging purposes. +* FEATURE: support [ingesting logs](https://docs.victoriametrics.com/victorialogs/data-ingestion/) with `_time` field, which doesn't contain timezone information. For example, `2024-09-20T10:20:30`. In this case the local timezone of the host where VictoriaLogs runs is used. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6721). * BUGFIX: fix Windows build, which has been broken in [v0.29.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.29.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6973). * BUGFIX: properly return logs from [`/select/logsql/tail` endpoint](https://docs.victoriametrics.com/victorialogs/querying/#live-tailing) if the query contains [`_time:some_duration` filter](https://docs.victoriametrics.com/victorialogs/logsql/#time-filter) like `_time:5m`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7028). The bug has been introduced in [v0.29.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.29.0-victorialogs). * BUGFIX: properly return logs without [`_msg`](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) field when `*` query is passed to [`/select/logsql/query` endpoint](https://docs.victoriametrics.com/victorialogs/querying/#querying-logs) together with positive `limit` arg. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6785). Thanks to @jiekun for identifying the root cause of the issue. +* BUGFIX: support [ingesting logs](https://docs.victoriametrics.com/victorialogs/data-ingestion/) with `_time` field containing whitespace delimiter between the date and time instead of `T` delimiter. For example, `2024-09-20 10:20:30`. This is valid [ISO8601 format](https://en.wikipedia.org/wiki/ISO_8601) aka `SQL datetime` format, which sometimes is used in production. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6721). ## [v0.29.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.29.0-victorialogs) diff --git a/docs/VictoriaLogs/LogsQL.md b/docs/VictoriaLogs/LogsQL.md index 5e2ee15a8..8802cfb38 100644 --- a/docs/VictoriaLogs/LogsQL.md +++ b/docs/VictoriaLogs/LogsQL.md @@ -294,28 +294,31 @@ The following formats are supported for `_time` filter: - `_time:5m` - returns logs for the last 5 minutes - `_time:2.5d15m42.345s` - returns logs for the last 2.5 days, 15 minutes and 42.345 seconds - `_time:1y` - returns logs for the last year -- `_time:YYYY-MM-DD` - matches all the logs for the particular day by UTC. For example, `_time:2023-04-25` matches logs on April 25, 2023 by UTC. -- `_time:YYYY-MM` - matches all the logs for the particular month by UTC. For example, `_time:2023-02` matches logs on February, 2023 by UTC. -- `_time:YYYY` - matches all the logs for the particular year by UTC. For example, `_time:2023` matches logs on 2023 by UTC. -- `_time:YYYY-MM-DDTHH` - matches all the logs for the particular hour by UTC. For example, `_time:2023-04-25T22` matches logs on April 25, 2023 at 22 hour by UTC. -- `_time:YYYY-MM-DDTHH:MM` - matches all the logs for the particular minute by UTC. For example, `_time:2023-04-25T22:45` matches logs on April 25, 2023 at 22:45 by UTC. -- `_time:YYYY-MM-DDTHH:MM:SS` - matches all the logs for the particular second by UTC. For example, `_time:2023-04-25T22:45:59` matches logs on April 25, 2023 at 22:45:59 by UTC. +- `_time:YYYY-MM-DDZ` - matches all the logs for the particular day by UTC. For example, `_time:2023-04-25Z` matches logs on April 25, 2023 by UTC. +- `_time:YYYY-MMZ` - matches all the logs for the particular month by UTC. For example, `_time:2023-02Z` matches logs on February, 2023 by UTC. +- `_time:YYYYZ` - matches all the logs for the particular year by UTC. For example, `_time:2023Z` matches logs on 2023 by UTC. +- `_time:YYYY-MM-DDTHHZ` - matches all the logs for the particular hour by UTC. For example, `_time:2023-04-25T22Z` matches logs on April 25, 2023 at 22 hour by UTC. +- `_time:YYYY-MM-DDTHH:MMZ` - matches all the logs for the particular minute by UTC. For example, `_time:2023-04-25T22:45Z` matches logs on April 25, 2023 at 22:45 by UTC. +- `_time:YYYY-MM-DDTHH:MM:SSZ` - matches all the logs for the particular second by UTC. For example, `_time:2023-04-25T22:45:59Z` matches logs on April 25, 2023 at 22:45:59 by UTC. - `_time:[min_time, max_time]` - matches logs on the time range `[min_time, max_time]`, including both `min_time` and `max_time`. The `min_time` and `max_time` can contain any format specified [here](https://docs.victoriametrics.com/#timestamp-formats). - For example, `_time:[2023-04-01, 2023-04-30]` matches logs for the whole April, 2023 by UTC, e.g. it is equivalent to `_time:2023-04`. + For example, `_time:[2023-04-01Z, 2023-04-30Z]` matches logs for the whole April, 2023 by UTC, e.g. it is equivalent to `_time:2023-04Z`. - `_time:[min_time, max_time)` - matches logs on the time range `[min_time, max_time)`, not including `max_time`. The `min_time` and `max_time` can contain any format specified [here](https://docs.victoriametrics.com/#timestamp-formats). - For example, `_time:[2023-02-01, 2023-03-01)` matches logs for the whole February, 2023 by UTC, e.g. it is equivalent to `_time:2023-02`. + For example, `_time:[2023-02-01Z, 2023-03-01Z)` matches logs for the whole February, 2023 by UTC, e.g. it is equivalent to `_time:2023-02Z`. It is possible to specify time zone offset for all the absolute time formats by appending `+hh:mm` or `-hh:mm` suffix. For example, `_time:2023-04-25+05:30` matches all the logs on April 25, 2023 by India time zone, while `_time:2023-02-07:00` matches all the logs on February, 2023 by California time zone. +If the timezone offset information is missing, then the local time zone of the host where VictoriaLogs runs is used. +For example, `_time:2023-10-20` matches all the logs for `2023-10-20` day according to the local time zone of the host where VictoriaLogs runs. + It is possible to specify generic offset for the selected time range by appending `offset` after the `_time` filter. Examples: - `_time:5m offset 1h` matches logs on the time range `(now-1h5m, now-1h]`. -- `_time:2023-07 offset 5h30m` matches logs on July, 2023 by UTC with offset 5h30m. -- `_time:[2023-02-01, 2023-03-01) offset 1w` matches logs the week before the time range `[2023-02-01, 2023-03-01)` by UTC. +- `_time:2023-07Z offset 5h30m` matches logs on July, 2023 by UTC with offset 5h30m. +- `_time:[2023-02-01Z, 2023-03-01Z) offset 1w` matches logs the week before the time range `[2023-02-01Z, 2023-03-01Z)` by UTC. Performance tips: diff --git a/docs/VictoriaLogs/data-ingestion/README.md b/docs/VictoriaLogs/data-ingestion/README.md index 350c8fd71..523e05697 100644 --- a/docs/VictoriaLogs/data-ingestion/README.md +++ b/docs/VictoriaLogs/data-ingestion/README.md @@ -45,9 +45,9 @@ It is possible to push thousands of log lines in a single request to this API. If the [timestamp field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) is set to `"0"`, then the current timestamp at VictoriaLogs side is used per each ingested log line. -Otherwise the timestamp field must be in the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. For example, `2023-06-20T15:32:10Z`. -Optional fractional part of seconds can be specified after the dot - `2023-06-20T15:32:10.123Z`. -Timezone can be specified instead of `Z` suffix - `2023-06-20T15:32:10+02:00`. +Otherwise the timestamp field must be in the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) or [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) format. +For example, `2023-06-20T15:32:10Z` or `2023-06-20 15:32:10.123456789+02:00`. +If timezone information is missing (for example, `2023-06-20 15:32:10`), then the time is parsed in the local timezone of the host where VictoriaLogs runs. See [these docs](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for details on fields, which must be present in the ingested log messages. @@ -95,9 +95,9 @@ It is possible to push unlimited number of log lines in a single request to this If the [timestamp field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) is set to `"0"`, then the current timestamp at VictoriaLogs side is used per each ingested log line. -Otherwise the timestamp field must be in the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. For example, `2023-06-20T15:32:10Z`. -Optional fractional part of seconds can be specified after the dot - `2023-06-20T15:32:10.123Z`. -Timezone can be specified instead of `Z` suffix - `2023-06-20T15:32:10+02:00`. +Otherwise the timestamp field must be in the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) or [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) format. +For example, `2023-06-20T15:32:10Z` or `2023-06-20 15:32:10.123456789+02:00`. +If timezone information is missing (for example, `2023-06-20 15:32:10`), then the time is parsed in the local timezone of the host where VictoriaLogs runs. See [these docs](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) for details on fields, which must be present in the ingested log messages. diff --git a/docs/VictoriaLogs/keyConcepts.md b/docs/VictoriaLogs/keyConcepts.md index f091ee1dd..dd1e5f8d7 100644 --- a/docs/VictoriaLogs/keyConcepts.md +++ b/docs/VictoriaLogs/keyConcepts.md @@ -135,8 +135,7 @@ during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-inges ### Time field The ingested [log entries](#data-model) may contain `_time` field with the timestamp of the ingested log entry. -The timestamp must be in [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) format. The most commonly used subset of [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) -is also supported. It is allowed specifying seconds part of the timestamp with any precision up to nanoseconds. +The timestamp must be in [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) or [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) format. For example, the following [log entry](#data-model) contains valid timestamp with millisecond precision in the `_time` field: ```json @@ -146,6 +145,8 @@ For example, the following [log entry](#data-model) contains valid timestamp wit } ``` +If timezone information is missing in the `_time` field value, then the local timezone of the host where VictoriaLogs runs is used. + If the actual timestamp has other than `_time` field name, then it is possible to specify the real timestamp field via `_time_field` query arg during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/). For example, if timestamp is located in the `event.created` field, then specify `_time_field=event.created` query arg diff --git a/docs/VictoriaLogs/querying/README.md b/docs/VictoriaLogs/querying/README.md index fe727cac6..357955050 100644 --- a/docs/VictoriaLogs/querying/README.md +++ b/docs/VictoriaLogs/querying/README.md @@ -298,7 +298,7 @@ For example, the following command returns the number of logs per each `level` [ across logs over `2024-01-01` day by UTC: ```sh -curl http://localhost:9428/select/logsql/stats_query -d 'query=_time:1d | stats by (level) count(*)' -d 'time=2024-01-02' +curl http://localhost:9428/select/logsql/stats_query -d 'query=_time:1d | stats by (level) count(*)' -d 'time=2024-01-02Z' ``` Below is an example JSON output returned from this endpoint: @@ -373,7 +373,7 @@ For example, the following command returns the number of logs per each `level` [ across logs over `2024-01-01` day by UTC with 6-hour granularity: ```sh -curl http://localhost:9428/select/logsql/stats_query_range -d 'query=* | stats by (level) count(*)' -d 'start=2024-01-01' -d 'end=2024-01-02' -d 'step=6h' +curl http://localhost:9428/select/logsql/stats_query_range -d 'query=* | stats by (level) count(*)' -d 'start=2024-01-01Z' -d 'end=2024-01-02Z' -d 'step=6h' ``` Below is an example JSON output returned from this endpoint: diff --git a/lib/httputils/time_test.go b/lib/httputils/time_test.go index 73667e719..751f4cf3a 100644 --- a/lib/httputils/time_test.go +++ b/lib/httputils/time_test.go @@ -35,18 +35,17 @@ func TestGetTimeSuccess(t *testing.T) { } } - f("2019", 1546300800000) - f("2019-01", 1546300800000) - f("2019-02", 1548979200000) - f("2019-02-01", 1548979200000) - f("2019-02-02", 1549065600000) - f("2019-02-02T00", 1549065600000) - f("2019-02-02T01", 1549069200000) - f("2019-02-02T01:00", 1549069200000) - f("2019-02-02T01:01", 1549069260000) - f("2019-02-02T01:01:00", 1549069260000) - f("2019-02-02T01:01:01", 1549069261000) - f("2019-07-07T20:01:02Z", 1562529662000) + f("2019Z", 1546300800000) + f("2019-01Z", 1546300800000) + f("2019-02Z", 1548979200000) + f("2019-02-01Z", 1548979200000) + f("2019-02-02Z", 1549065600000) + f("2019-02-02T00Z", 1549065600000) + f("2019-02-02T01Z", 1549069200000) + f("2019-02-02T01:00Z", 1549069200000) + f("2019-02-02T01:01Z", 1549069260000) + f("2019-02-02T01:01:00Z", 1549069260000) + f("2019-02-02T01:01:01Z", 1549069261000) f("2020-02-21T16:07:49.433Z", 1582301269433) f("2019-07-07T20:47:40+03:00", 1562521660000) f("-292273086-05-16T16:47:06Z", minTimeMsecs) diff --git a/lib/logstorage/parser_test.go b/lib/logstorage/parser_test.go index 5a2ec2ec9..a4d393d42 100644 --- a/lib/logstorage/parser_test.go +++ b/lib/logstorage/parser_test.go @@ -159,7 +159,6 @@ func TestParseTimeRange(t *testing.T) { // _time:YYYY -> _time:[YYYY, YYYY+1) minTimestamp = time.Date(2023, time.January, 1, 0, 0, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2024, time.January, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f("2023", minTimestamp, maxTimestamp) f("2023Z", minTimestamp, maxTimestamp) // _time:YYYY-hh:mm -> _time:[YYYY-hh:mm, (YYYY+1)-hh:mm) @@ -175,7 +174,6 @@ func TestParseTimeRange(t *testing.T) { // _time:YYYY-MM -> _time:[YYYY-MM, YYYY-MM+1) minTimestamp = time.Date(2023, time.February, 1, 0, 0, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f("2023-02", minTimestamp, maxTimestamp) f("2023-02Z", minTimestamp, maxTimestamp) // _time:YYYY-MM-hh:mm -> _time:[YYYY-MM-hh:mm, (YYYY-MM+1)-hh:mm) @@ -203,16 +201,15 @@ func TestParseTimeRange(t *testing.T) { // _time:YYYY-MM-DD minTimestamp = time.Date(2023, time.February, 12, 0, 0, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.February, 13, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f("2023-02-12", minTimestamp, maxTimestamp) f("2023-02-12Z", minTimestamp, maxTimestamp) // February 28 minTimestamp = time.Date(2023, time.February, 28, 0, 0, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f("2023-02-28", minTimestamp, maxTimestamp) + f("2023-02-28Z", minTimestamp, maxTimestamp) // January 31 minTimestamp = time.Date(2023, time.January, 31, 0, 0, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.February, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f("2023-01-31", minTimestamp, maxTimestamp) + f("2023-01-31Z", minTimestamp, maxTimestamp) // _time:YYYY-MM-DD-hh:mm minTimestamp = time.Date(2023, time.January, 31, 2, 25, 0, 0, time.UTC).UnixNano() @@ -227,7 +224,6 @@ func TestParseTimeRange(t *testing.T) { // _time:YYYY-MM-DDTHH minTimestamp = time.Date(2023, time.February, 28, 23, 0, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f("2023-02-28T23", minTimestamp, maxTimestamp) f("2023-02-28T23Z", minTimestamp, maxTimestamp) // _time:YYYY-MM-DDTHH-hh:mm @@ -243,7 +239,6 @@ func TestParseTimeRange(t *testing.T) { // _time:YYYY-MM-DDTHH:MM minTimestamp = time.Date(2023, time.February, 28, 23, 59, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f("2023-02-28T23:59", minTimestamp, maxTimestamp) f("2023-02-28T23:59Z", minTimestamp, maxTimestamp) // _time:YYYY-MM-DDTHH:MM-hh:mm @@ -259,7 +254,6 @@ func TestParseTimeRange(t *testing.T) { // _time:YYYY-MM-DDTHH:MM:SS-hh:mm minTimestamp = time.Date(2023, time.February, 28, 23, 59, 59, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f("2023-02-28T23:59:59", minTimestamp, maxTimestamp) f("2023-02-28T23:59:59Z", minTimestamp, maxTimestamp) // _time:[YYYY-MM-DDTHH:MM:SS.sss, YYYY-MM-DDTHH:MM:SS.sss) @@ -290,28 +284,28 @@ func TestParseTimeRange(t *testing.T) { // _time:(start, end) minTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() + 1 maxTimestamp = time.Date(2023, time.April, 6, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f(`(2023-03-01,2023-04-06)`, minTimestamp, maxTimestamp) + f(`(2023-03-01Z,2023-04-06Z)`, minTimestamp, maxTimestamp) // _time:[start, end) minTimestamp = time.Date(2023, time.March, 1, 0, 0, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.April, 6, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f(`[2023-03-01,2023-04-06)`, minTimestamp, maxTimestamp) + f(`[2023-03-01Z,2023-04-06Z)`, minTimestamp, maxTimestamp) // _time:(start, end] minTimestamp = time.Date(2023, time.March, 1, 21, 20, 0, 0, time.UTC).UnixNano() + 1 maxTimestamp = time.Date(2023, time.April, 7, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f(`(2023-03-01T21:20,2023-04-06]`, minTimestamp, maxTimestamp) + f(`(2023-03-01T21:20Z,2023-04-06Z]`, minTimestamp, maxTimestamp) // _time:[start, end] with timezone minTimestamp = time.Date(2023, time.February, 28, 21, 40, 0, 0, time.UTC).UnixNano() maxTimestamp = time.Date(2023, time.April, 7, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - f(`[2023-03-01+02:20,2023-04-06T23]`, minTimestamp, maxTimestamp) + f(`[2023-03-01+02:20,2023-04-06T23Z]`, minTimestamp, maxTimestamp) // _time:[start, end] with timezone and offset offset := int64(30*time.Minute + 5*time.Second) minTimestamp = time.Date(2023, time.February, 28, 21, 40, 0, 0, time.UTC).UnixNano() - offset maxTimestamp = time.Date(2023, time.April, 7, 0, 0, 0, 0, time.UTC).UnixNano() - 1 - offset - f(`[2023-03-01+02:20,2023-04-06T23] offset 30m5s`, minTimestamp, maxTimestamp) + f(`[2023-03-01+02:20,2023-04-06T23Z] offset 30m5s`, minTimestamp, maxTimestamp) } func TestParseFilterSequence(t *testing.T) { @@ -2030,8 +2024,8 @@ func TestQueryGetFilterTimeRange(t *testing.T) { f("*", -9223372036854775808, 9223372036854775807) f("_time:2024-05-31T10:20:30.456789123Z", 1717150830456789123, 1717150830456789123) - f("_time:2024-05-31", 1717113600000000000, 1717199999999999999) - f("_time:2024-05-31 _time:day_range[08:00, 16:00]", 1717113600000000000, 1717199999999999999) + f("_time:2024-05-31Z", 1717113600000000000, 1717199999999999999) + f("_time:2024-05-31Z _time:day_range[08:00, 16:00]", 1717113600000000000, 1717199999999999999) } func TestQueryCanReturnLastNResults(t *testing.T) { diff --git a/lib/logstorage/syslog_parser.go b/lib/logstorage/syslog_parser.go index b60a242df..25c5b4961 100644 --- a/lib/logstorage/syslog_parser.go +++ b/lib/logstorage/syslog_parser.go @@ -62,7 +62,7 @@ type SyslogParser struct { // currentYear is used as the current year for rfc3164 messages. currentYear int - // timezeon is used as the current timezeon for rfc3164 messages. + // timezone is used as the current timezone for rfc3164 messages. timezone *time.Location } diff --git a/lib/logstorage/values_encoder.go b/lib/logstorage/values_encoder.go index 178ad87bf..dc7dfebee 100644 --- a/lib/logstorage/values_encoder.go +++ b/lib/logstorage/values_encoder.go @@ -12,6 +12,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil" ) // valueType is the type of values stored in every column block. @@ -283,11 +284,13 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) ( return dstBuf, dstValues, valueTypeTimestampISO8601, uint64(minValue), uint64(maxValue) } -// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds. +// TryParseTimestampRFC3339Nano parses s as RFC3339 with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds. +// +// If s doesn't contain timezone offset, then the local timezone is used. // // The returned timestamp can be negative if s is smaller than 1970 year. func TryParseTimestampRFC3339Nano(s string) (int64, bool) { - if len(s) < len("2006-01-02T15:04:05Z") { + if len(s) < len("2006-01-02T15:04:05") { return 0, false } @@ -301,28 +304,29 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) { // Parse timezone offset n := strings.IndexAny(s, "Z+-") if n < 0 { - return 0, false - } - offsetStr := s[n+1:] - if s[n] != 'Z' { - isMinus := s[n] == '-' - if len(offsetStr) == 0 { - return 0, false - } - offsetNsecs, ok := tryParseTimezoneOffset(offsetStr) - if !ok { - return 0, false - } - if isMinus { - offsetNsecs = -offsetNsecs - } - nsecs -= offsetNsecs + nsecs -= timeutil.GetLocalTimezoneOffsetNsecs() } else { - if len(offsetStr) != 0 { - return 0, false + offsetStr := s[n+1:] + if s[n] != 'Z' { + isMinus := s[n] == '-' + if len(offsetStr) == 0 { + return 0, false + } + offsetNsecs, ok := tryParseTimezoneOffset(offsetStr) + if !ok { + return 0, false + } + if isMinus { + offsetNsecs = -offsetNsecs + } + nsecs -= offsetNsecs + } else { + if len(offsetStr) != 0 { + return 0, false + } } + s = s[:n] } - s = s[:n] // Parse optional fractional part of seconds. if len(s) == 0 { @@ -434,8 +438,13 @@ func tryParseTimestampSecs(s string) (int64, bool, string) { month := time.Month(n) s = s[len("MM")+1:] - // Parse day - if s[len("DD")] != 'T' { + // Parse day. + // + // Allow whitespace additionally to T as the delimiter after DD, + // so SQL datetime format can be parsed additionally to RFC3339. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6721 + delim := s[len("DD")] + if delim != 'T' && delim != ' ' { return 0, false, s } dayStr := s[:len("DD")] diff --git a/lib/logstorage/values_encoder_test.go b/lib/logstorage/values_encoder_test.go index 6ec9f7e50..b71b45747 100644 --- a/lib/logstorage/values_encoder_test.go +++ b/lib/logstorage/values_encoder_test.go @@ -151,6 +151,7 @@ func TestTryParseIPv4_Failure(t *testing.T) { func TestTryParseTimestampRFC3339NanoString_Success(t *testing.T) { f := func(s, timestampExpected string) { t.Helper() + nsecs, ok := TryParseTimestampRFC3339Nano(s) if !ok { t.Fatalf("cannot parse timestamp %q", s) @@ -184,6 +185,11 @@ func TestTryParseTimestampRFC3339NanoString_Success(t *testing.T) { // timestamp with timezone f("2023-01-16T00:45:51+01:00", "2023-01-15T23:45:51Z") f("2023-01-16T00:45:51.123-01:00", "2023-01-16T01:45:51.123Z") + + // SQL datetime format + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6721 + f("2023-01-16 00:45:51+01:00", "2023-01-15T23:45:51Z") + f("2023-01-16 00:45:51.123-01:00", "2023-01-16T01:45:51.123Z") } func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { @@ -199,10 +205,6 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { f("") f("foobar") - // Missing Z at the end - f("2023-01-15T22:15:51") - f("2023-01-15T22:15:51.123") - // missing fractional part after dot f("2023-01-15T22:15:51.Z") diff --git a/lib/promutils/time.go b/lib/promutils/time.go index b2ac05dc0..6372d3667 100644 --- a/lib/promutils/time.go +++ b/lib/promutils/time.go @@ -6,6 +6,8 @@ import ( "strconv" "strings" "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil" ) // ParseTimeMsec parses time s in different formats. @@ -33,6 +35,8 @@ const ( // // See https://docs.victoriametrics.com/single-server-victoriametrics/#timestamp-formats // +// If s doesn't contain timezone information, then the local timezone is used. +// // It returns unix timestamp in nanoseconds. func ParseTimeAt(s string, currentTimestamp int64) (int64, error) { if s == "now" { @@ -58,6 +62,12 @@ func ParseTimeAt(s string, currentTimestamp int64) (int64, error) { tzOffset = -tzOffset } s = sOrig[:len(sOrig)-6] + } else { + if !strings.HasSuffix(s, "Z") { + tzOffset = -timeutil.GetLocalTimezoneOffsetNsecs() + } else { + s = s[:len(s)-1] + } } } s = strings.TrimSuffix(s, "Z") diff --git a/lib/promutils/time_test.go b/lib/promutils/time_test.go index 2f9e76b52..0c41fb671 100644 --- a/lib/promutils/time_test.go +++ b/lib/promutils/time_test.go @@ -37,37 +37,31 @@ func TestParseTimeAtSuccess(t *testing.T) { f("now-1h5m", now, now-(3600+5*60)*1e9) // Year - f("2023", now, 1.6725312e+09*1e9) f("2023Z", now, 1.6725312e+09*1e9) f("2023+02:00", now, 1.672524e+09*1e9) f("2023-02:00", now, 1.6725384e+09*1e9) // Year and month - f("2023-05", now, 1.6828992e+09*1e9) f("2023-05Z", now, 1.6828992e+09*1e9) f("2023-05+02:00", now, 1.682892e+09*1e9) f("2023-05-02:00", now, 1.6829064e+09*1e9) // Year, month and day - f("2023-05-20", now, 1.6845408e+09*1e9) f("2023-05-20Z", now, 1.6845408e+09*1e9) f("2023-05-20+02:30", now, 1.6845318e+09*1e9) f("2023-05-20-02:30", now, 1.6845498e+09*1e9) // Year, month, day and hour - f("2023-05-20T04", now, 1.6845552e+09*1e9) f("2023-05-20T04Z", now, 1.6845552e+09*1e9) f("2023-05-20T04+02:30", now, 1.6845462e+09*1e9) f("2023-05-20T04-02:30", now, 1.6845642e+09*1e9) // Year, month, day, hour and minute - f("2023-05-20T04:57", now, 1.68455862e+09*1e9) f("2023-05-20T04:57Z", now, 1.68455862e+09*1e9) f("2023-05-20T04:57+02:30", now, 1.68454962e+09*1e9) f("2023-05-20T04:57-02:30", now, 1.68456762e+09*1e9) // Year, month, day, hour, minute and second - f("2023-05-20T04:57:43", now, 1.684558663e+09*1e9) f("2023-05-20T04:57:43Z", now, 1.684558663e+09*1e9) f("2023-05-20T04:57:43+02:30", now, 1.684549663e+09*1e9) f("2023-05-20T04:57:43-02:30", now, 1.684567663e+09*1e9) diff --git a/lib/timeutil/timezone.go b/lib/timeutil/timezone.go new file mode 100644 index 000000000..c2f51f777 --- /dev/null +++ b/lib/timeutil/timezone.go @@ -0,0 +1,30 @@ +package timeutil + +import ( + "sync/atomic" + "time" +) + +// GetLocalTimezoneOffsetNsecs returns local timezone offset in nanoseconds. +func GetLocalTimezoneOffsetNsecs() int64 { + return localTimezoneOffsetNsecs.Load() +} + +var localTimezoneOffsetNsecs atomic.Int64 + +func updateLocalTimezoneOffsetNsecs() { + _, offset := time.Now().Zone() + nsecs := int64(offset) * 1e9 + localTimezoneOffsetNsecs.Store(nsecs) +} + +func init() { + updateLocalTimezoneOffsetNsecs() + // Update local timezone offset in a loop, since it may change over the year due to DST. + go func() { + t := time.NewTicker(5 * time.Second) + for range t.C { + updateLocalTimezoneOffsetNsecs() + } + }() +}