lib/logstorage: properly parse timezone offset at TryParseTimestampRFC3339Nano()

The TryParseTimestampRFC3339Nano() must properly parse RFC3339 timestamps with timezone offsets.

While at it, make tryParseTimestampISO8601 function private in order to prevent
from improper usage of this function from outside the lib/logstorage package.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508
This commit is contained in:
Aliaksandr Valialkin 2024-06-25 14:08:01 +02:00
parent 24825ba63b
commit 9e1c037249
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
11 changed files with 157 additions and 67 deletions

View file

@ -19,12 +19,12 @@ func ExtractTimestampRFC3339NanoFromFields(timeField string, fields []logstorage
if f.Name != timeField { if f.Name != timeField {
continue continue
} }
if f.Value == "" || f.Value == "0" {
return time.Now().UnixNano(), nil
}
nsecs, ok := logstorage.TryParseTimestampRFC3339Nano(f.Value) nsecs, ok := logstorage.TryParseTimestampRFC3339Nano(f.Value)
if !ok { if !ok {
if f.Value == "0" || f.Value == "" { return 0, fmt.Errorf("cannot unmarshal rfc3339 timestamp from %s=%q", timeField, f.Value)
return time.Now().UnixNano(), nil
}
return time.Now().UnixNano(), fmt.Errorf("cannot unmarshal iso8601 timestamp from %s=%q", timeField, f.Value)
} }
f.Value = "" f.Value = ""
return nsecs, nil return nsecs, nil

View file

@ -0,0 +1,75 @@
package insertutils
import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
func TestExtractTimestampRFC3339NanoFromFields_Success(t *testing.T) {
f := func(timeField string, fields []logstorage.Field, nsecsExpected int64) {
t.Helper()
nsecs, err := ExtractTimestampRFC3339NanoFromFields(timeField, fields)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if nsecs != nsecsExpected {
t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, nsecsExpected)
}
for _, f := range fields {
if f.Name == timeField {
if f.Value != "" {
t.Fatalf("unexpected value for field %s; got %q; want %q", timeField, f.Value, "")
}
}
}
}
f("time", []logstorage.Field{
{Name: "foo", Value: "bar"},
{Name: "time", Value: "2024-06-18T23:37:20Z"},
}, 1718753840000000000)
f("time", []logstorage.Field{
{Name: "foo", Value: "bar"},
{Name: "time", Value: "2024-06-18T23:37:20+08:00"},
}, 1718725040000000000)
f("time", []logstorage.Field{
{Name: "foo", Value: "bar"},
{Name: "time", Value: "2024-06-18T23:37:20.123-05:30"},
}, 1718773640123000000)
f("time", []logstorage.Field{
{Name: "time", Value: "2024-06-18T23:37:20.123456789-05:30"},
{Name: "foo", Value: "bar"},
}, 1718773640123456789)
}
func TestExtractTimestampRFC3339NanoFromFields_Error(t *testing.T) {
f := func(s string) {
t.Helper()
fields := []logstorage.Field{
{Name: "time", Value: s},
}
nsecs, err := ExtractTimestampRFC3339NanoFromFields("time", fields)
if err == nil {
t.Fatalf("expecting non-nil error")
}
if nsecs != 0 {
t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, 0)
}
}
f("foobar")
// no Z at the end
f("2024-06-18T23:37:20")
// incomplete time
f("2024-06-18")
f("2024-06-18T23:37")
}

View file

@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
## tip ## tip
* BUGFIX: properly parse timestamps with timezones during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/). This has been broken in [v0.20.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.20.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508).
## [v0.22.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.22.0-victorialogs) ## [v0.22.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.22.0-victorialogs)
Released at 2024-06-24 Released at 2024-06-24

View file

@ -1180,30 +1180,8 @@ func (br *blockResult) getBucketedValue(s string, bf *byStatsField) string {
return bytesutil.ToUnsafeString(buf[bufLen:]) return bytesutil.ToUnsafeString(buf[bufLen:])
} }
if timestamp, ok := TryParseTimestampISO8601(s); ok { // There is no need in calling tryParseTimestampISO8601 here, since TryParseTimestampRFC3339Nano
bucketSizeInt := int64(bf.bucketSize) // should successfully parse ISO8601 timestamps.
if bucketSizeInt <= 0 {
bucketSizeInt = 1
}
bucketOffset := int64(bf.bucketOffset)
timestamp -= bucketOffset
if bf.bucketSizeStr == "month" {
timestamp = truncateTimestampToMonth(timestamp)
} else if bf.bucketSizeStr == "year" {
timestamp = truncateTimestampToYear(timestamp)
} else {
timestamp -= timestamp % bucketSizeInt
}
timestamp += bucketOffset
buf := br.a.b
bufLen := len(buf)
buf = marshalTimestampISO8601String(buf, timestamp)
br.a.b = buf
return bytesutil.ToUnsafeString(buf[bufLen:])
}
if timestamp, ok := TryParseTimestampRFC3339Nano(s); ok { if timestamp, ok := TryParseTimestampRFC3339Nano(s); ok {
bucketSizeInt := int64(bf.bucketSize) bucketSizeInt := int64(bf.bucketSize)
if bucketSizeInt <= 0 { if bucketSizeInt <= 0 {

View file

@ -141,7 +141,7 @@ func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
return ip == ipNeeded return ip == ipNeeded
}) })
case valueTypeTimestampISO8601: case valueTypeTimestampISO8601:
timestampNeeded, ok := TryParseTimestampISO8601(value) timestampNeeded, ok := tryParseTimestampISO8601(value)
if !ok { if !ok {
bm.resetBits() bm.resetBits()
return return
@ -213,7 +213,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) { func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
n, ok := TryParseTimestampISO8601(value) n, ok := tryParseTimestampISO8601(value)
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) { if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
bm.resetBits() bm.resetBits()
return return

View file

@ -245,7 +245,7 @@ func (fi *filterIn) initTimestampISO8601Values() {
m := make(map[string]struct{}, len(values)) m := make(map[string]struct{}, len(values))
buf := make([]byte, 0, len(values)*8) buf := make([]byte, 0, len(values)*8)
for _, v := range values { for _, v := range values {
n, ok := TryParseTimestampISO8601(v) n, ok := tryParseTimestampISO8601(v)
if !ok { if !ok {
continue continue
} }

View file

@ -100,7 +100,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
} }
func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) { func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
_, ok := TryParseTimestampISO8601(phrase) _, ok := tryParseTimestampISO8601(phrase)
if ok { if ok {
// Fast path - the phrase contains complete timestamp, so we can use exact search // Fast path - the phrase contains complete timestamp, so we can use exact search
matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens) matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens)

View file

@ -599,7 +599,7 @@ func parsePipeStats(lex *lexer, needStatsKeyword bool) (*pipeStats, error) {
return &ps, nil return &ps, nil
} }
if !lex.isKeyword(",") { if !lex.isKeyword(",") {
return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", sf, lex.token) return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", lex.token, sf)
} }
lex.nextToken() lex.nextToken()
} }

View file

@ -262,7 +262,7 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) (
a := u64s.A a := u64s.A
var minValue, maxValue int64 var minValue, maxValue int64
for i, v := range srcValues { for i, v := range srcValues {
n, ok := TryParseTimestampISO8601(v) n, ok := tryParseTimestampISO8601(v)
if !ok { if !ok {
return dstBuf, dstValues, valueTypeUnknown, 0, 0 return dstBuf, dstValues, valueTypeUnknown, 0, 0
} }
@ -283,14 +283,10 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) (
return dstBuf, dstValues, valueTypeTimestampISO8601, uint64(minValue), uint64(maxValue) return dstBuf, dstValues, valueTypeTimestampISO8601, uint64(minValue), uint64(maxValue)
} }
// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and 'Z' tail and returns unix timestamp in nanoseconds. // TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds.
// //
// The returned timestamp can be negative if s is smaller than 1970 year. // The returned timestamp can be negative if s is smaller than 1970 year.
func TryParseTimestampRFC3339Nano(s string) (int64, bool) { func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
// Do not parse timestamps with timezone other than Z, since they cannot be converted back
// to the same string representation in general case.
// This may break search.
if len(s) < len("2006-01-02T15:04:05Z") { if len(s) < len("2006-01-02T15:04:05Z") {
return 0, false return 0, false
} }
@ -302,12 +298,33 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
s = tail s = tail
nsecs := secs * 1e9 nsecs := secs * 1e9
// Parse optional fractional part of seconds. // Parse timezone offset
n := strings.IndexByte(s, 'Z') n := strings.IndexAny(s, "Z+-")
if n < 0 || n != len(s)-1 { if n < 0 {
return 0, false return 0, false
} }
offsetStr := s[n+1:]
if s[n] != 'Z' {
isMinus := s[n] == '-'
if len(offsetStr) == 0 {
return 0, false
}
offsetNsecs, ok := tryParseTimezoneOffset(offsetStr)
if !ok {
return 0, false
}
if isMinus {
offsetNsecs = -offsetNsecs
}
nsecs -= offsetNsecs
} else {
if len(offsetStr) != 0 {
return 0, false
}
}
s = s[:n] s = s[:n]
// Parse optional fractional part of seconds.
if len(s) == 0 { if len(s) == 0 {
return nsecs, true return nsecs, true
} }
@ -330,10 +347,28 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
return nsecs, true return nsecs, true
} }
// TryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds. func tryParseTimezoneOffset(offsetStr string) (int64, bool) {
n := strings.IndexByte(offsetStr, ':')
if n < 0 {
return 0, false
}
hourStr := offsetStr[:n]
minuteStr := offsetStr[n+1:]
hours, ok := tryParseUint64(hourStr)
if !ok || hours > 24 {
return 0, false
}
minutes, ok := tryParseUint64(minuteStr)
if !ok || minutes > 60 {
return 0, false
}
return int64(hours)*nsecsPerHour + int64(minutes)*nsecsPerMinute, true
}
// tryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds.
// //
// The returned timestamp can be negative if s is smaller than 1970 year. // The returned timestamp can be negative if s is smaller than 1970 year.
func TryParseTimestampISO8601(s string) (int64, bool) { func tryParseTimestampISO8601(s string) (int64, bool) {
// Do not parse timestamps with timezone, since they cannot be converted back // Do not parse timestamps with timezone, since they cannot be converted back
// to the same string representation in general case. // to the same string representation in general case.
// This may break search. // This may break search.

View file

@ -149,37 +149,41 @@ func TestTryParseIPv4_Failure(t *testing.T) {
} }
func TestTryParseTimestampRFC3339NanoString_Success(t *testing.T) { func TestTryParseTimestampRFC3339NanoString_Success(t *testing.T) {
f := func(s string) { f := func(s, timestampExpected string) {
t.Helper() t.Helper()
nsecs, ok := TryParseTimestampRFC3339Nano(s) nsecs, ok := TryParseTimestampRFC3339Nano(s)
if !ok { if !ok {
t.Fatalf("cannot parse timestamp %q", s) t.Fatalf("cannot parse timestamp %q", s)
} }
data := marshalTimestampRFC3339NanoString(nil, nsecs) timestamp := marshalTimestampRFC3339NanoString(nil, nsecs)
if string(data) != s { if string(timestamp) != timestampExpected {
t.Fatalf("unexpected timestamp; got %q; want %q", data, s) t.Fatalf("unexpected timestamp; got %q; want %q", timestamp, timestampExpected)
} }
} }
// No fractional seconds // No fractional seconds
f("2023-01-15T23:45:51Z") f("2023-01-15T23:45:51Z", "2023-01-15T23:45:51Z")
// Different number of fractional seconds // Different number of fractional seconds
f("2023-01-15T23:45:51.1Z") f("2023-01-15T23:45:51.1Z", "2023-01-15T23:45:51.1Z")
f("2023-01-15T23:45:51.12Z") f("2023-01-15T23:45:51.12Z", "2023-01-15T23:45:51.12Z")
f("2023-01-15T23:45:51.123Z") f("2023-01-15T23:45:51.123Z", "2023-01-15T23:45:51.123Z")
f("2023-01-15T23:45:51.1234Z") f("2023-01-15T23:45:51.1234Z", "2023-01-15T23:45:51.1234Z")
f("2023-01-15T23:45:51.12345Z") f("2023-01-15T23:45:51.12345Z", "2023-01-15T23:45:51.12345Z")
f("2023-01-15T23:45:51.123456Z") f("2023-01-15T23:45:51.123456Z", "2023-01-15T23:45:51.123456Z")
f("2023-01-15T23:45:51.1234567Z") f("2023-01-15T23:45:51.1234567Z", "2023-01-15T23:45:51.1234567Z")
f("2023-01-15T23:45:51.12345678Z") f("2023-01-15T23:45:51.12345678Z", "2023-01-15T23:45:51.12345678Z")
f("2023-01-15T23:45:51.123456789Z") f("2023-01-15T23:45:51.123456789Z", "2023-01-15T23:45:51.123456789Z")
// The minimum possible timestamp // The minimum possible timestamp
f("1677-09-21T00:12:44Z") f("1677-09-21T00:12:44Z", "1677-09-21T00:12:44Z")
// The maximum possible timestamp // The maximum possible timestamp
f("2262-04-11T23:47:15.999999999Z") f("2262-04-11T23:47:15.999999999Z", "2262-04-11T23:47:15.999999999Z")
// timestamp with timezone
f("2023-01-16T00:45:51+01:00", "2023-01-15T23:45:51Z")
f("2023-01-16T00:45:51.123-01:00", "2023-01-16T01:45:51.123Z")
} }
func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) { func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
@ -202,10 +206,6 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
// missing fractional part after dot // missing fractional part after dot
f("2023-01-15T22:15:51.Z") f("2023-01-15T22:15:51.Z")
// timestamp with timezone
f("2023-01-16T00:45:51+01:00")
f("2023-01-16T00:45:51.123+01:00")
// too small year // too small year
f("1676-09-21T00:12:43Z") f("1676-09-21T00:12:43Z")
@ -240,7 +240,7 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
func TestTryParseTimestampISO8601String_Success(t *testing.T) { func TestTryParseTimestampISO8601String_Success(t *testing.T) {
f := func(s string) { f := func(s string) {
t.Helper() t.Helper()
nsecs, ok := TryParseTimestampISO8601(s) nsecs, ok := tryParseTimestampISO8601(s)
if !ok { if !ok {
t.Fatalf("cannot parse timestamp %q", s) t.Fatalf("cannot parse timestamp %q", s)
} }
@ -263,7 +263,7 @@ func TestTryParseTimestampISO8601String_Success(t *testing.T) {
func TestTryParseTimestampISO8601_Failure(t *testing.T) { func TestTryParseTimestampISO8601_Failure(t *testing.T) {
f := func(s string) { f := func(s string) {
t.Helper() t.Helper()
_, ok := TryParseTimestampISO8601(s) _, ok := tryParseTimestampISO8601(s)
if ok { if ok {
t.Fatalf("expecting faulure when parsing %q", s) t.Fatalf("expecting faulure when parsing %q", s)
} }

View file

@ -47,7 +47,7 @@ func BenchmarkTryParseTimestampISO8601(b *testing.B) {
nSum := int64(0) nSum := int64(0)
for pb.Next() { for pb.Next() {
for _, s := range a { for _, s := range a {
n, ok := TryParseTimestampISO8601(s) n, ok := tryParseTimestampISO8601(s)
if !ok { if !ok {
panic(fmt.Errorf("cannot parse timestamp %q", s)) panic(fmt.Errorf("cannot parse timestamp %q", s))
} }