diff --git a/app/vlinsert/insertutils/timestamp.go b/app/vlinsert/insertutils/timestamp.go index ec546bcd8..e9d41d0a4 100644 --- a/app/vlinsert/insertutils/timestamp.go +++ b/app/vlinsert/insertutils/timestamp.go @@ -19,12 +19,12 @@ func ExtractTimestampRFC3339NanoFromFields(timeField string, fields []logstorage if f.Name != timeField { continue } + if f.Value == "" || f.Value == "0" { + return time.Now().UnixNano(), nil + } nsecs, ok := logstorage.TryParseTimestampRFC3339Nano(f.Value) if !ok { - if f.Value == "0" || f.Value == "" { - return time.Now().UnixNano(), nil - } - return time.Now().UnixNano(), fmt.Errorf("cannot unmarshal iso8601 timestamp from %s=%q", timeField, f.Value) + return 0, fmt.Errorf("cannot unmarshal rfc3339 timestamp from %s=%q", timeField, f.Value) } f.Value = "" return nsecs, nil diff --git a/app/vlinsert/insertutils/timestamp_test.go b/app/vlinsert/insertutils/timestamp_test.go new file mode 100644 index 000000000..ca7f9d572 --- /dev/null +++ b/app/vlinsert/insertutils/timestamp_test.go @@ -0,0 +1,75 @@ +package insertutils + +import ( + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage" +) + +func TestExtractTimestampRFC3339NanoFromFields_Success(t *testing.T) { + f := func(timeField string, fields []logstorage.Field, nsecsExpected int64) { + t.Helper() + + nsecs, err := ExtractTimestampRFC3339NanoFromFields(timeField, fields) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + if nsecs != nsecsExpected { + t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, nsecsExpected) + } + + for _, f := range fields { + if f.Name == timeField { + if f.Value != "" { + t.Fatalf("unexpected value for field %s; got %q; want %q", timeField, f.Value, "") + } + } + } + } + + f("time", []logstorage.Field{ + {Name: "foo", Value: "bar"}, + {Name: "time", Value: "2024-06-18T23:37:20Z"}, + }, 1718753840000000000) + + f("time", []logstorage.Field{ + {Name: "foo", Value: "bar"}, + {Name: "time", Value: "2024-06-18T23:37:20+08:00"}, + }, 1718725040000000000) + + f("time", []logstorage.Field{ + {Name: "foo", Value: "bar"}, + {Name: "time", Value: "2024-06-18T23:37:20.123-05:30"}, + }, 1718773640123000000) + + f("time", []logstorage.Field{ + {Name: "time", Value: "2024-06-18T23:37:20.123456789-05:30"}, + {Name: "foo", Value: "bar"}, + }, 1718773640123456789) +} + +func TestExtractTimestampRFC3339NanoFromFields_Error(t *testing.T) { + f := func(s string) { + t.Helper() + + fields := []logstorage.Field{ + {Name: "time", Value: s}, + } + nsecs, err := ExtractTimestampRFC3339NanoFromFields("time", fields) + if err == nil { + t.Fatalf("expecting non-nil error") + } + if nsecs != 0 { + t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, 0) + } + } + + f("foobar") + + // no Z at the end + f("2024-06-18T23:37:20") + + // incomplete time + f("2024-06-18") + f("2024-06-18T23:37") +} diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index be915da9c..5cc2ab556 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* BUGFIX: properly parse timestamps with timezones during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/). This has been broken in [v0.20.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.20.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508). + ## [v0.22.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.22.0-victorialogs) Released at 2024-06-24 diff --git a/lib/logstorage/block_result.go b/lib/logstorage/block_result.go index f50f0f6ef..2ef7fe857 100644 --- a/lib/logstorage/block_result.go +++ b/lib/logstorage/block_result.go @@ -1180,30 +1180,8 @@ func (br *blockResult) getBucketedValue(s string, bf *byStatsField) string { return bytesutil.ToUnsafeString(buf[bufLen:]) } - if timestamp, ok := TryParseTimestampISO8601(s); ok { - bucketSizeInt := int64(bf.bucketSize) - if bucketSizeInt <= 0 { - bucketSizeInt = 1 - } - bucketOffset := int64(bf.bucketOffset) - - timestamp -= bucketOffset - if bf.bucketSizeStr == "month" { - timestamp = truncateTimestampToMonth(timestamp) - } else if bf.bucketSizeStr == "year" { - timestamp = truncateTimestampToYear(timestamp) - } else { - timestamp -= timestamp % bucketSizeInt - } - timestamp += bucketOffset - - buf := br.a.b - bufLen := len(buf) - buf = marshalTimestampISO8601String(buf, timestamp) - br.a.b = buf - return bytesutil.ToUnsafeString(buf[bufLen:]) - } - + // There is no need in calling tryParseTimestampISO8601 here, since TryParseTimestampRFC3339Nano + // should successfully parse ISO8601 timestamps. if timestamp, ok := TryParseTimestampRFC3339Nano(s); ok { bucketSizeInt := int64(bf.bucketSize) if bucketSizeInt <= 0 { diff --git a/lib/logstorage/filter_exact.go b/lib/logstorage/filter_exact.go index 7434e7339..98e9ea1ee 100644 --- a/lib/logstorage/filter_exact.go +++ b/lib/logstorage/filter_exact.go @@ -141,7 +141,7 @@ func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) { return ip == ipNeeded }) case valueTypeTimestampISO8601: - timestampNeeded, ok := TryParseTimestampISO8601(value) + timestampNeeded, ok := tryParseTimestampISO8601(value) if !ok { bm.resetBits() return @@ -213,7 +213,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { - n, ok := TryParseTimestampISO8601(value) + n, ok := tryParseTimestampISO8601(value) if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) { bm.resetBits() return diff --git a/lib/logstorage/filter_in.go b/lib/logstorage/filter_in.go index c3d4d3622..eb39f41f6 100644 --- a/lib/logstorage/filter_in.go +++ b/lib/logstorage/filter_in.go @@ -245,7 +245,7 @@ func (fi *filterIn) initTimestampISO8601Values() { m := make(map[string]struct{}, len(values)) buf := make([]byte, 0, len(values)*8) for _, v := range values { - n, ok := TryParseTimestampISO8601(v) + n, ok := tryParseTimestampISO8601(v) if !ok { continue } diff --git a/lib/logstorage/filter_phrase.go b/lib/logstorage/filter_phrase.go index 2598209be..3b4c04174 100644 --- a/lib/logstorage/filter_phrase.go +++ b/lib/logstorage/filter_phrase.go @@ -100,7 +100,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { } func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { - _, ok := TryParseTimestampISO8601(phrase) + _, ok := tryParseTimestampISO8601(phrase) if ok { // Fast path - the phrase contains complete timestamp, so we can use exact search matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens) diff --git a/lib/logstorage/pipe_stats.go b/lib/logstorage/pipe_stats.go index be0587784..4fb2ad48a 100644 --- a/lib/logstorage/pipe_stats.go +++ b/lib/logstorage/pipe_stats.go @@ -599,7 +599,7 @@ func parsePipeStats(lex *lexer, needStatsKeyword bool) (*pipeStats, error) { return &ps, nil } if !lex.isKeyword(",") { - return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", sf, lex.token) + return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", lex.token, sf) } lex.nextToken() } diff --git a/lib/logstorage/values_encoder.go b/lib/logstorage/values_encoder.go index 7e9bf0cd0..178ad87bf 100644 --- a/lib/logstorage/values_encoder.go +++ b/lib/logstorage/values_encoder.go @@ -262,7 +262,7 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) ( a := u64s.A var minValue, maxValue int64 for i, v := range srcValues { - n, ok := TryParseTimestampISO8601(v) + n, ok := tryParseTimestampISO8601(v) if !ok { return dstBuf, dstValues, valueTypeUnknown, 0, 0 } @@ -283,14 +283,10 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) ( return dstBuf, dstValues, valueTypeTimestampISO8601, uint64(minValue), uint64(maxValue) } -// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and 'Z' tail and returns unix timestamp in nanoseconds. +// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds. // // The returned timestamp can be negative if s is smaller than 1970 year. func TryParseTimestampRFC3339Nano(s string) (int64, bool) { - // Do not parse timestamps with timezone other than Z, since they cannot be converted back - // to the same string representation in general case. - // This may break search. - if len(s) < len("2006-01-02T15:04:05Z") { return 0, false } @@ -302,12 +298,33 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) { s = tail nsecs := secs * 1e9 - // Parse optional fractional part of seconds. - n := strings.IndexByte(s, 'Z') - if n < 0 || n != len(s)-1 { + // Parse timezone offset + n := strings.IndexAny(s, "Z+-") + if n < 0 { return 0, false } + offsetStr := s[n+1:] + if s[n] != 'Z' { + isMinus := s[n] == '-' + if len(offsetStr) == 0 { + return 0, false + } + offsetNsecs, ok := tryParseTimezoneOffset(offsetStr) + if !ok { + return 0, false + } + if isMinus { + offsetNsecs = -offsetNsecs + } + nsecs -= offsetNsecs + } else { + if len(offsetStr) != 0 { + return 0, false + } + } s = s[:n] + + // Parse optional fractional part of seconds. if len(s) == 0 { return nsecs, true } @@ -330,10 +347,28 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) { return nsecs, true } -// TryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds. +func tryParseTimezoneOffset(offsetStr string) (int64, bool) { + n := strings.IndexByte(offsetStr, ':') + if n < 0 { + return 0, false + } + hourStr := offsetStr[:n] + minuteStr := offsetStr[n+1:] + hours, ok := tryParseUint64(hourStr) + if !ok || hours > 24 { + return 0, false + } + minutes, ok := tryParseUint64(minuteStr) + if !ok || minutes > 60 { + return 0, false + } + return int64(hours)*nsecsPerHour + int64(minutes)*nsecsPerMinute, true +} + +// tryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds. // // The returned timestamp can be negative if s is smaller than 1970 year. -func TryParseTimestampISO8601(s string) (int64, bool) { +func tryParseTimestampISO8601(s string) (int64, bool) { // Do not parse timestamps with timezone, since they cannot be converted back // to the same string representation in general case. // This may break search. diff --git a/lib/logstorage/values_encoder_test.go b/lib/logstorage/values_encoder_test.go index 6564f4a4f..6ec9f7e50 100644 --- a/lib/logstorage/values_encoder_test.go +++ b/lib/logstorage/values_encoder_test.go @@ -149,37 +149,41 @@ func TestTryParseIPv4_Failure(t *testing.T) { } func TestTryParseTimestampRFC3339NanoString_Success(t *testing.T) { - f := func(s string) { + f := func(s, timestampExpected string) { t.Helper() nsecs, ok := TryParseTimestampRFC3339Nano(s) if !ok { t.Fatalf("cannot parse timestamp %q", s) } - data := marshalTimestampRFC3339NanoString(nil, nsecs) - if string(data) != s { - t.Fatalf("unexpected timestamp; got %q; want %q", data, s) + timestamp := marshalTimestampRFC3339NanoString(nil, nsecs) + if string(timestamp) != timestampExpected { + t.Fatalf("unexpected timestamp; got %q; want %q", timestamp, timestampExpected) } } // No fractional seconds - f("2023-01-15T23:45:51Z") + f("2023-01-15T23:45:51Z", "2023-01-15T23:45:51Z") // Different number of fractional seconds - f("2023-01-15T23:45:51.1Z") - f("2023-01-15T23:45:51.12Z") - f("2023-01-15T23:45:51.123Z") - f("2023-01-15T23:45:51.1234Z") - f("2023-01-15T23:45:51.12345Z") - f("2023-01-15T23:45:51.123456Z") - f("2023-01-15T23:45:51.1234567Z") - f("2023-01-15T23:45:51.12345678Z") - f("2023-01-15T23:45:51.123456789Z") + f("2023-01-15T23:45:51.1Z", "2023-01-15T23:45:51.1Z") + f("2023-01-15T23:45:51.12Z", "2023-01-15T23:45:51.12Z") + f("2023-01-15T23:45:51.123Z", "2023-01-15T23:45:51.123Z") + f("2023-01-15T23:45:51.1234Z", "2023-01-15T23:45:51.1234Z") + f("2023-01-15T23:45:51.12345Z", "2023-01-15T23:45:51.12345Z") + f("2023-01-15T23:45:51.123456Z", "2023-01-15T23:45:51.123456Z") + f("2023-01-15T23:45:51.1234567Z", "2023-01-15T23:45:51.1234567Z") + f("2023-01-15T23:45:51.12345678Z", "2023-01-15T23:45:51.12345678Z") + f("2023-01-15T23:45:51.123456789Z", "2023-01-15T23:45:51.123456789Z") // The minimum possible timestamp - f("1677-09-21T00:12:44Z") + f("1677-09-21T00:12:44Z", "1677-09-21T00:12:44Z") // The maximum possible timestamp - f("2262-04-11T23:47:15.999999999Z") + f("2262-04-11T23:47:15.999999999Z", "2262-04-11T23:47:15.999999999Z") + + // timestamp with timezone + f("2023-01-16T00:45:51+01:00", "2023-01-15T23:45:51Z") + f("2023-01-16T00:45:51.123-01:00", "2023-01-16T01:45:51.123Z") } func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { @@ -202,10 +206,6 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { // missing fractional part after dot f("2023-01-15T22:15:51.Z") - // timestamp with timezone - f("2023-01-16T00:45:51+01:00") - f("2023-01-16T00:45:51.123+01:00") - // too small year f("1676-09-21T00:12:43Z") @@ -240,7 +240,7 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { func TestTryParseTimestampISO8601String_Success(t *testing.T) { f := func(s string) { t.Helper() - nsecs, ok := TryParseTimestampISO8601(s) + nsecs, ok := tryParseTimestampISO8601(s) if !ok { t.Fatalf("cannot parse timestamp %q", s) } @@ -263,7 +263,7 @@ func TestTryParseTimestampISO8601String_Success(t *testing.T) { func TestTryParseTimestampISO8601_Failure(t *testing.T) { f := func(s string) { t.Helper() - _, ok := TryParseTimestampISO8601(s) + _, ok := tryParseTimestampISO8601(s) if ok { t.Fatalf("expecting faulure when parsing %q", s) } diff --git a/lib/logstorage/values_encoder_timing_test.go b/lib/logstorage/values_encoder_timing_test.go index ef564fee4..382a3c40b 100644 --- a/lib/logstorage/values_encoder_timing_test.go +++ b/lib/logstorage/values_encoder_timing_test.go @@ -47,7 +47,7 @@ func BenchmarkTryParseTimestampISO8601(b *testing.B) { nSum := int64(0) for pb.Next() { for _, s := range a { - n, ok := TryParseTimestampISO8601(s) + n, ok := tryParseTimestampISO8601(s) if !ok { panic(fmt.Errorf("cannot parse timestamp %q", s)) }