From 9e1c03724907649f8347f9f2752d200b196f6f1e Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 25 Jun 2024 14:08:01 +0200 Subject: [PATCH] lib/logstorage: properly parse timezone offset at TryParseTimestampRFC3339Nano() The TryParseTimestampRFC3339Nano() must properly parse RFC3339 timestamps with timezone offsets. While at it, make tryParseTimestampISO8601 function private in order to prevent from improper usage of this function from outside the lib/logstorage package. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508 --- app/vlinsert/insertutils/timestamp.go | 8 +-- app/vlinsert/insertutils/timestamp_test.go | 75 ++++++++++++++++++++ docs/VictoriaLogs/CHANGELOG.md | 2 + lib/logstorage/block_result.go | 26 +------ lib/logstorage/filter_exact.go | 4 +- lib/logstorage/filter_in.go | 2 +- lib/logstorage/filter_phrase.go | 2 +- lib/logstorage/pipe_stats.go | 2 +- lib/logstorage/values_encoder.go | 57 ++++++++++++--- lib/logstorage/values_encoder_test.go | 44 ++++++------ lib/logstorage/values_encoder_timing_test.go | 2 +- 11 files changed, 157 insertions(+), 67 deletions(-) create mode 100644 app/vlinsert/insertutils/timestamp_test.go diff --git a/app/vlinsert/insertutils/timestamp.go b/app/vlinsert/insertutils/timestamp.go index ec546bcd8..e9d41d0a4 100644 --- a/app/vlinsert/insertutils/timestamp.go +++ b/app/vlinsert/insertutils/timestamp.go @@ -19,12 +19,12 @@ func ExtractTimestampRFC3339NanoFromFields(timeField string, fields []logstorage if f.Name != timeField { continue } + if f.Value == "" || f.Value == "0" { + return time.Now().UnixNano(), nil + } nsecs, ok := logstorage.TryParseTimestampRFC3339Nano(f.Value) if !ok { - if f.Value == "0" || f.Value == "" { - return time.Now().UnixNano(), nil - } - return time.Now().UnixNano(), fmt.Errorf("cannot unmarshal iso8601 timestamp from %s=%q", timeField, f.Value) + return 0, fmt.Errorf("cannot unmarshal rfc3339 timestamp from %s=%q", timeField, f.Value) } f.Value = "" return nsecs, nil diff --git a/app/vlinsert/insertutils/timestamp_test.go b/app/vlinsert/insertutils/timestamp_test.go new file mode 100644 index 000000000..ca7f9d572 --- /dev/null +++ b/app/vlinsert/insertutils/timestamp_test.go @@ -0,0 +1,75 @@ +package insertutils + +import ( + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" +) + +func TestExtractTimestampRFC3339NanoFromFields_Success(t *testing.T) { + f := func(timeField string, fields []logstorage.Field, nsecsExpected int64) { + t.Helper() + + nsecs, err := ExtractTimestampRFC3339NanoFromFields(timeField, fields) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + if nsecs != nsecsExpected { + t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, nsecsExpected) + } + + for _, f := range fields { + if f.Name == timeField { + if f.Value != "" { + t.Fatalf("unexpected value for field %s; got %q; want %q", timeField, f.Value, "") + } + } + } + } + + f("time", []logstorage.Field{ + {Name: "foo", Value: "bar"}, + {Name: "time", Value: "2024-06-18T23:37:20Z"}, + }, 1718753840000000000) + + f("time", []logstorage.Field{ + {Name: "foo", Value: "bar"}, + {Name: "time", Value: "2024-06-18T23:37:20+08:00"}, + }, 1718725040000000000) + + f("time", []logstorage.Field{ + {Name: "foo", Value: "bar"}, + {Name: "time", Value: "2024-06-18T23:37:20.123-05:30"}, + }, 1718773640123000000) + + f("time", []logstorage.Field{ + {Name: "time", Value: "2024-06-18T23:37:20.123456789-05:30"}, + {Name: "foo", Value: "bar"}, + }, 1718773640123456789) +} + +func TestExtractTimestampRFC3339NanoFromFields_Error(t *testing.T) { + f := func(s string) { + t.Helper() + + fields := []logstorage.Field{ + {Name: "time", Value: s}, + } + nsecs, err := ExtractTimestampRFC3339NanoFromFields("time", fields) + if err == nil { + t.Fatalf("expecting non-nil error") + } + if nsecs != 0 { + t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, 0) + } + } + + f("foobar") + + // no Z at the end + f("2024-06-18T23:37:20") + + // incomplete time + f("2024-06-18") + f("2024-06-18T23:37") +} diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index be915da9c..5cc2ab556 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* BUGFIX: properly parse timestamps with timezones during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/). This has been broken in [v0.20.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.20.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508). + ## [v0.22.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.22.0-victorialogs) Released at 2024-06-24 diff --git a/lib/logstorage/block_result.go b/lib/logstorage/block_result.go index f50f0f6ef..2ef7fe857 100644 --- a/lib/logstorage/block_result.go +++ b/lib/logstorage/block_result.go @@ -1180,30 +1180,8 @@ func (br *blockResult) getBucketedValue(s string, bf *byStatsField) string { return bytesutil.ToUnsafeString(buf[bufLen:]) } - if timestamp, ok := TryParseTimestampISO8601(s); ok { - bucketSizeInt := int64(bf.bucketSize) - if bucketSizeInt <= 0 { - bucketSizeInt = 1 - } - bucketOffset := int64(bf.bucketOffset) - - timestamp -= bucketOffset - if bf.bucketSizeStr == "month" { - timestamp = truncateTimestampToMonth(timestamp) - } else if bf.bucketSizeStr == "year" { - timestamp = truncateTimestampToYear(timestamp) - } else { - timestamp -= timestamp % bucketSizeInt - } - timestamp += bucketOffset - - buf := br.a.b - bufLen := len(buf) - buf = marshalTimestampISO8601String(buf, timestamp) - br.a.b = buf - return bytesutil.ToUnsafeString(buf[bufLen:]) - } - + // There is no need in calling tryParseTimestampISO8601 here, since TryParseTimestampRFC3339Nano + // should successfully parse ISO8601 timestamps. if timestamp, ok := TryParseTimestampRFC3339Nano(s); ok { bucketSizeInt := int64(bf.bucketSize) if bucketSizeInt <= 0 { diff --git a/lib/logstorage/filter_exact.go b/lib/logstorage/filter_exact.go index 7434e7339..98e9ea1ee 100644 --- a/lib/logstorage/filter_exact.go +++ b/lib/logstorage/filter_exact.go @@ -141,7 +141,7 @@ func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) { return ip == ipNeeded }) case valueTypeTimestampISO8601: - timestampNeeded, ok := TryParseTimestampISO8601(value) + timestampNeeded, ok := tryParseTimestampISO8601(value) if !ok { bm.resetBits() return @@ -213,7 +213,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { - n, ok := TryParseTimestampISO8601(value) + n, ok := tryParseTimestampISO8601(value) if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) { bm.resetBits() return diff --git a/lib/logstorage/filter_in.go b/lib/logstorage/filter_in.go index c3d4d3622..eb39f41f6 100644 --- a/lib/logstorage/filter_in.go +++ b/lib/logstorage/filter_in.go @@ -245,7 +245,7 @@ func (fi *filterIn) initTimestampISO8601Values() { m := make(map[string]struct{}, len(values)) buf := make([]byte, 0, len(values)*8) for _, v := range values { - n, ok := TryParseTimestampISO8601(v) + n, ok := tryParseTimestampISO8601(v) if !ok { continue } diff --git a/lib/logstorage/filter_phrase.go b/lib/logstorage/filter_phrase.go index 2598209be..3b4c04174 100644 --- a/lib/logstorage/filter_phrase.go +++ b/lib/logstorage/filter_phrase.go @@ -100,7 +100,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - _, ok := TryParseTimestampISO8601(phrase) + _, ok := tryParseTimestampISO8601(phrase) if ok { // Fast path - the phrase contains complete timestamp, so we can use exact search matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens) diff --git a/lib/logstorage/pipe_stats.go b/lib/logstorage/pipe_stats.go index be0587784..4fb2ad48a 100644 --- a/lib/logstorage/pipe_stats.go +++ b/lib/logstorage/pipe_stats.go @@ -599,7 +599,7 @@ func parsePipeStats(lex *lexer, needStatsKeyword bool) (*pipeStats, error) { return &ps, nil } if !lex.isKeyword(",") { - return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", sf, lex.token) + return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", lex.token, sf) } lex.nextToken() } diff --git a/lib/logstorage/values_encoder.go b/lib/logstorage/values_encoder.go index 7e9bf0cd0..178ad87bf 100644 --- a/lib/logstorage/values_encoder.go +++ b/lib/logstorage/values_encoder.go @@ -262,7 +262,7 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) ( a := u64s.A var minValue, maxValue int64 for i, v := range srcValues { - n, ok := TryParseTimestampISO8601(v) + n, ok := tryParseTimestampISO8601(v) if !ok { return dstBuf, dstValues, valueTypeUnknown, 0, 0 } @@ -283,14 +283,10 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) ( return dstBuf, dstValues, valueTypeTimestampISO8601, uint64(minValue), uint64(maxValue) } -// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and 'Z' tail and returns unix timestamp in nanoseconds. +// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds. // // The returned timestamp can be negative if s is smaller than 1970 year. func TryParseTimestampRFC3339Nano(s string) (int64, bool) { - // Do not parse timestamps with timezone other than Z, since they cannot be converted back - // to the same string representation in general case. - // This may break search. - if len(s) < len("2006-01-02T15:04:05Z") { return 0, false } @@ -302,12 +298,33 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) { s = tail nsecs := secs * 1e9 - // Parse optional fractional part of seconds. - n := strings.IndexByte(s, 'Z') - if n < 0 || n != len(s)-1 { + // Parse timezone offset + n := strings.IndexAny(s, "Z+-") + if n < 0 { return 0, false } + offsetStr := s[n+1:] + if s[n] != 'Z' { + isMinus := s[n] == '-' + if len(offsetStr) == 0 { + return 0, false + } + offsetNsecs, ok := tryParseTimezoneOffset(offsetStr) + if !ok { + return 0, false + } + if isMinus { + offsetNsecs = -offsetNsecs + } + nsecs -= offsetNsecs + } else { + if len(offsetStr) != 0 { + return 0, false + } + } s = s[:n] + + // Parse optional fractional part of seconds. if len(s) == 0 { return nsecs, true } @@ -330,10 +347,28 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) { return nsecs, true } -// TryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds. +func tryParseTimezoneOffset(offsetStr string) (int64, bool) { + n := strings.IndexByte(offsetStr, ':') + if n < 0 { + return 0, false + } + hourStr := offsetStr[:n] + minuteStr := offsetStr[n+1:] + hours, ok := tryParseUint64(hourStr) + if !ok || hours > 24 { + return 0, false + } + minutes, ok := tryParseUint64(minuteStr) + if !ok || minutes > 60 { + return 0, false + } + return int64(hours)*nsecsPerHour + int64(minutes)*nsecsPerMinute, true +} + +// tryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds. // // The returned timestamp can be negative if s is smaller than 1970 year. -func TryParseTimestampISO8601(s string) (int64, bool) { +func tryParseTimestampISO8601(s string) (int64, bool) { // Do not parse timestamps with timezone, since they cannot be converted back // to the same string representation in general case. // This may break search. diff --git a/lib/logstorage/values_encoder_test.go b/lib/logstorage/values_encoder_test.go index 6564f4a4f..6ec9f7e50 100644 --- a/lib/logstorage/values_encoder_test.go +++ b/lib/logstorage/values_encoder_test.go @@ -149,37 +149,41 @@ func TestTryParseIPv4_Failure(t *testing.T) { } func TestTryParseTimestampRFC3339NanoString_Success(t *testing.T) { - f := func(s string) { + f := func(s, timestampExpected string) { t.Helper() nsecs, ok := TryParseTimestampRFC3339Nano(s) if !ok { t.Fatalf("cannot parse timestamp %q", s) } - data := marshalTimestampRFC3339NanoString(nil, nsecs) - if string(data) != s { - t.Fatalf("unexpected timestamp; got %q; want %q", data, s) + timestamp := marshalTimestampRFC3339NanoString(nil, nsecs) + if string(timestamp) != timestampExpected { + t.Fatalf("unexpected timestamp; got %q; want %q", timestamp, timestampExpected) } } // No fractional seconds - f("2023-01-15T23:45:51Z") + f("2023-01-15T23:45:51Z", "2023-01-15T23:45:51Z") // Different number of fractional seconds - f("2023-01-15T23:45:51.1Z") - f("2023-01-15T23:45:51.12Z") - f("2023-01-15T23:45:51.123Z") - f("2023-01-15T23:45:51.1234Z") - f("2023-01-15T23:45:51.12345Z") - f("2023-01-15T23:45:51.123456Z") - f("2023-01-15T23:45:51.1234567Z") - f("2023-01-15T23:45:51.12345678Z") - f("2023-01-15T23:45:51.123456789Z") + f("2023-01-15T23:45:51.1Z", "2023-01-15T23:45:51.1Z") + f("2023-01-15T23:45:51.12Z", "2023-01-15T23:45:51.12Z") + f("2023-01-15T23:45:51.123Z", "2023-01-15T23:45:51.123Z") + f("2023-01-15T23:45:51.1234Z", "2023-01-15T23:45:51.1234Z") + f("2023-01-15T23:45:51.12345Z", "2023-01-15T23:45:51.12345Z") + f("2023-01-15T23:45:51.123456Z", "2023-01-15T23:45:51.123456Z") + f("2023-01-15T23:45:51.1234567Z", "2023-01-15T23:45:51.1234567Z") + f("2023-01-15T23:45:51.12345678Z", "2023-01-15T23:45:51.12345678Z") + f("2023-01-15T23:45:51.123456789Z", "2023-01-15T23:45:51.123456789Z") // The minimum possible timestamp - f("1677-09-21T00:12:44Z") + f("1677-09-21T00:12:44Z", "1677-09-21T00:12:44Z") // The maximum possible timestamp - f("2262-04-11T23:47:15.999999999Z") + f("2262-04-11T23:47:15.999999999Z", "2262-04-11T23:47:15.999999999Z") + + // timestamp with timezone + f("2023-01-16T00:45:51+01:00", "2023-01-15T23:45:51Z") + f("2023-01-16T00:45:51.123-01:00", "2023-01-16T01:45:51.123Z") } func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { @@ -202,10 +206,6 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { // missing fractional part after dot f("2023-01-15T22:15:51.Z") - // timestamp with timezone - f("2023-01-16T00:45:51+01:00") - f("2023-01-16T00:45:51.123+01:00") - // too small year f("1676-09-21T00:12:43Z") @@ -240,7 +240,7 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { func TestTryParseTimestampISO8601String_Success(t *testing.T) { f := func(s string) { t.Helper() - nsecs, ok := TryParseTimestampISO8601(s) + nsecs, ok := tryParseTimestampISO8601(s) if !ok { t.Fatalf("cannot parse timestamp %q", s) } @@ -263,7 +263,7 @@ func TestTryParseTimestampISO8601String_Success(t *testing.T) { func TestTryParseTimestampISO8601_Failure(t *testing.T) { f := func(s string) { t.Helper() - _, ok := TryParseTimestampISO8601(s) + _, ok := tryParseTimestampISO8601(s) if ok { t.Fatalf("expecting faulure when parsing %q", s) } diff --git a/lib/logstorage/values_encoder_timing_test.go b/lib/logstorage/values_encoder_timing_test.go index ef564fee4..382a3c40b 100644 --- a/lib/logstorage/values_encoder_timing_test.go +++ b/lib/logstorage/values_encoder_timing_test.go @@ -47,7 +47,7 @@ func BenchmarkTryParseTimestampISO8601(b *testing.B) { nSum := int64(0) for pb.Next() { for _, s := range a { - n, ok := TryParseTimestampISO8601(s) + n, ok := tryParseTimestampISO8601(s) if !ok { panic(fmt.Errorf("cannot parse timestamp %q", s)) }