lib/logstorage: properly parse timezone offset at TryParseTimestampRFC3339Nano()

The TryParseTimestampRFC3339Nano() must properly parse RFC3339 timestamps with timezone offsets.

While at it, make tryParseTimestampISO8601 function private in order to prevent
from improper usage of this function from outside the lib/logstorage package.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508
This commit is contained in:
Aliaksandr Valialkin 2024-06-25 14:08:01 +02:00
parent 24825ba63b
commit 9e1c037249
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
11 changed files with 157 additions and 67 deletions

View file

@ -19,12 +19,12 @@ func ExtractTimestampRFC3339NanoFromFields(timeField string, fields []logstorage
if f.Name != timeField {
continue
}
if f.Value == "" || f.Value == "0" {
return time.Now().UnixNano(), nil
}
nsecs, ok := logstorage.TryParseTimestampRFC3339Nano(f.Value)
if !ok {
if f.Value == "0" || f.Value == "" {
return time.Now().UnixNano(), nil
}
return time.Now().UnixNano(), fmt.Errorf("cannot unmarshal iso8601 timestamp from %s=%q", timeField, f.Value)
return 0, fmt.Errorf("cannot unmarshal rfc3339 timestamp from %s=%q", timeField, f.Value)
}
f.Value = ""
return nsecs, nil

View file

@ -0,0 +1,75 @@
package insertutils
import (
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
)
func TestExtractTimestampRFC3339NanoFromFields_Success(t *testing.T) {
f := func(timeField string, fields []logstorage.Field, nsecsExpected int64) {
t.Helper()
nsecs, err := ExtractTimestampRFC3339NanoFromFields(timeField, fields)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if nsecs != nsecsExpected {
t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, nsecsExpected)
}
for _, f := range fields {
if f.Name == timeField {
if f.Value != "" {
t.Fatalf("unexpected value for field %s; got %q; want %q", timeField, f.Value, "")
}
}
}
}
f("time", []logstorage.Field{
{Name: "foo", Value: "bar"},
{Name: "time", Value: "2024-06-18T23:37:20Z"},
}, 1718753840000000000)
f("time", []logstorage.Field{
{Name: "foo", Value: "bar"},
{Name: "time", Value: "2024-06-18T23:37:20+08:00"},
}, 1718725040000000000)
f("time", []logstorage.Field{
{Name: "foo", Value: "bar"},
{Name: "time", Value: "2024-06-18T23:37:20.123-05:30"},
}, 1718773640123000000)
f("time", []logstorage.Field{
{Name: "time", Value: "2024-06-18T23:37:20.123456789-05:30"},
{Name: "foo", Value: "bar"},
}, 1718773640123456789)
}
func TestExtractTimestampRFC3339NanoFromFields_Error(t *testing.T) {
f := func(s string) {
t.Helper()
fields := []logstorage.Field{
{Name: "time", Value: s},
}
nsecs, err := ExtractTimestampRFC3339NanoFromFields("time", fields)
if err == nil {
t.Fatalf("expecting non-nil error")
}
if nsecs != 0 {
t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, 0)
}
}
f("foobar")
// no Z at the end
f("2024-06-18T23:37:20")
// incomplete time
f("2024-06-18")
f("2024-06-18T23:37")
}

View file

@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
## tip
* BUGFIX: properly parse timestamps with timezones during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/). This has been broken in [v0.20.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.20.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508).
## [v0.22.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.22.0-victorialogs)
Released at 2024-06-24

View file

@ -1180,30 +1180,8 @@ func (br *blockResult) getBucketedValue(s string, bf *byStatsField) string {
return bytesutil.ToUnsafeString(buf[bufLen:])
}
if timestamp, ok := TryParseTimestampISO8601(s); ok {
bucketSizeInt := int64(bf.bucketSize)
if bucketSizeInt <= 0 {
bucketSizeInt = 1
}
bucketOffset := int64(bf.bucketOffset)
timestamp -= bucketOffset
if bf.bucketSizeStr == "month" {
timestamp = truncateTimestampToMonth(timestamp)
} else if bf.bucketSizeStr == "year" {
timestamp = truncateTimestampToYear(timestamp)
} else {
timestamp -= timestamp % bucketSizeInt
}
timestamp += bucketOffset
buf := br.a.b
bufLen := len(buf)
buf = marshalTimestampISO8601String(buf, timestamp)
br.a.b = buf
return bytesutil.ToUnsafeString(buf[bufLen:])
}
// There is no need in calling tryParseTimestampISO8601 here, since TryParseTimestampRFC3339Nano
// should successfully parse ISO8601 timestamps.
if timestamp, ok := TryParseTimestampRFC3339Nano(s); ok {
bucketSizeInt := int64(bf.bucketSize)
if bucketSizeInt <= 0 {

View file

@ -141,7 +141,7 @@ func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
return ip == ipNeeded
})
case valueTypeTimestampISO8601:
timestampNeeded, ok := TryParseTimestampISO8601(value)
timestampNeeded, ok := tryParseTimestampISO8601(value)
if !ok {
bm.resetBits()
return
@ -213,7 +213,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
n, ok := TryParseTimestampISO8601(value)
n, ok := tryParseTimestampISO8601(value)
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
bm.resetBits()
return

View file

@ -245,7 +245,7 @@ func (fi *filterIn) initTimestampISO8601Values() {
m := make(map[string]struct{}, len(values))
buf := make([]byte, 0, len(values)*8)
for _, v := range values {
n, ok := TryParseTimestampISO8601(v)
n, ok := tryParseTimestampISO8601(v)
if !ok {
continue
}

View file

@ -100,7 +100,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
}
func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
_, ok := TryParseTimestampISO8601(phrase)
_, ok := tryParseTimestampISO8601(phrase)
if ok {
// Fast path - the phrase contains complete timestamp, so we can use exact search
matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens)

View file

@ -599,7 +599,7 @@ func parsePipeStats(lex *lexer, needStatsKeyword bool) (*pipeStats, error) {
return &ps, nil
}
if !lex.isKeyword(",") {
return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", sf, lex.token)
return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", lex.token, sf)
}
lex.nextToken()
}

View file

@ -262,7 +262,7 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) (
a := u64s.A
var minValue, maxValue int64
for i, v := range srcValues {
n, ok := TryParseTimestampISO8601(v)
n, ok := tryParseTimestampISO8601(v)
if !ok {
return dstBuf, dstValues, valueTypeUnknown, 0, 0
}
@ -283,14 +283,10 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) (
return dstBuf, dstValues, valueTypeTimestampISO8601, uint64(minValue), uint64(maxValue)
}
// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and 'Z' tail and returns unix timestamp in nanoseconds.
// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds.
//
// The returned timestamp can be negative if s is smaller than 1970 year.
func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
// Do not parse timestamps with timezone other than Z, since they cannot be converted back
// to the same string representation in general case.
// This may break search.
if len(s) < len("2006-01-02T15:04:05Z") {
return 0, false
}
@ -302,12 +298,33 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
s = tail
nsecs := secs * 1e9
// Parse optional fractional part of seconds.
n := strings.IndexByte(s, 'Z')
if n < 0 || n != len(s)-1 {
// Parse timezone offset
n := strings.IndexAny(s, "Z+-")
if n < 0 {
return 0, false
}
offsetStr := s[n+1:]
if s[n] != 'Z' {
isMinus := s[n] == '-'
if len(offsetStr) == 0 {
return 0, false
}
offsetNsecs, ok := tryParseTimezoneOffset(offsetStr)
if !ok {
return 0, false
}
if isMinus {
offsetNsecs = -offsetNsecs
}
nsecs -= offsetNsecs
} else {
if len(offsetStr) != 0 {
return 0, false
}
}
s = s[:n]
// Parse optional fractional part of seconds.
if len(s) == 0 {
return nsecs, true
}
@ -330,10 +347,28 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
return nsecs, true
}
// TryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds.
func tryParseTimezoneOffset(offsetStr string) (int64, bool) {
n := strings.IndexByte(offsetStr, ':')
if n < 0 {
return 0, false
}
hourStr := offsetStr[:n]
minuteStr := offsetStr[n+1:]
hours, ok := tryParseUint64(hourStr)
if !ok || hours > 24 {
return 0, false
}
minutes, ok := tryParseUint64(minuteStr)
if !ok || minutes > 60 {
return 0, false
}
return int64(hours)*nsecsPerHour + int64(minutes)*nsecsPerMinute, true
}
// tryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds.
//
// The returned timestamp can be negative if s is smaller than 1970 year.
func TryParseTimestampISO8601(s string) (int64, bool) {
func tryParseTimestampISO8601(s string) (int64, bool) {
// Do not parse timestamps with timezone, since they cannot be converted back
// to the same string representation in general case.
// This may break search.

View file

@ -149,37 +149,41 @@ func TestTryParseIPv4_Failure(t *testing.T) {
}
func TestTryParseTimestampRFC3339NanoString_Success(t *testing.T) {
f := func(s string) {
f := func(s, timestampExpected string) {
t.Helper()
nsecs, ok := TryParseTimestampRFC3339Nano(s)
if !ok {
t.Fatalf("cannot parse timestamp %q", s)
}
data := marshalTimestampRFC3339NanoString(nil, nsecs)
if string(data) != s {
t.Fatalf("unexpected timestamp; got %q; want %q", data, s)
timestamp := marshalTimestampRFC3339NanoString(nil, nsecs)
if string(timestamp) != timestampExpected {
t.Fatalf("unexpected timestamp; got %q; want %q", timestamp, timestampExpected)
}
}
// No fractional seconds
f("2023-01-15T23:45:51Z")
f("2023-01-15T23:45:51Z", "2023-01-15T23:45:51Z")
// Different number of fractional seconds
f("2023-01-15T23:45:51.1Z")
f("2023-01-15T23:45:51.12Z")
f("2023-01-15T23:45:51.123Z")
f("2023-01-15T23:45:51.1234Z")
f("2023-01-15T23:45:51.12345Z")
f("2023-01-15T23:45:51.123456Z")
f("2023-01-15T23:45:51.1234567Z")
f("2023-01-15T23:45:51.12345678Z")
f("2023-01-15T23:45:51.123456789Z")
f("2023-01-15T23:45:51.1Z", "2023-01-15T23:45:51.1Z")
f("2023-01-15T23:45:51.12Z", "2023-01-15T23:45:51.12Z")
f("2023-01-15T23:45:51.123Z", "2023-01-15T23:45:51.123Z")
f("2023-01-15T23:45:51.1234Z", "2023-01-15T23:45:51.1234Z")
f("2023-01-15T23:45:51.12345Z", "2023-01-15T23:45:51.12345Z")
f("2023-01-15T23:45:51.123456Z", "2023-01-15T23:45:51.123456Z")
f("2023-01-15T23:45:51.1234567Z", "2023-01-15T23:45:51.1234567Z")
f("2023-01-15T23:45:51.12345678Z", "2023-01-15T23:45:51.12345678Z")
f("2023-01-15T23:45:51.123456789Z", "2023-01-15T23:45:51.123456789Z")
// The minimum possible timestamp
f("1677-09-21T00:12:44Z")
f("1677-09-21T00:12:44Z", "1677-09-21T00:12:44Z")
// The maximum possible timestamp
f("2262-04-11T23:47:15.999999999Z")
f("2262-04-11T23:47:15.999999999Z", "2262-04-11T23:47:15.999999999Z")
// timestamp with timezone
f("2023-01-16T00:45:51+01:00", "2023-01-15T23:45:51Z")
f("2023-01-16T00:45:51.123-01:00", "2023-01-16T01:45:51.123Z")
}
func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
@ -202,10 +206,6 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
// missing fractional part after dot
f("2023-01-15T22:15:51.Z")
// timestamp with timezone
f("2023-01-16T00:45:51+01:00")
f("2023-01-16T00:45:51.123+01:00")
// too small year
f("1676-09-21T00:12:43Z")
@ -240,7 +240,7 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
func TestTryParseTimestampISO8601String_Success(t *testing.T) {
f := func(s string) {
t.Helper()
nsecs, ok := TryParseTimestampISO8601(s)
nsecs, ok := tryParseTimestampISO8601(s)
if !ok {
t.Fatalf("cannot parse timestamp %q", s)
}
@ -263,7 +263,7 @@ func TestTryParseTimestampISO8601String_Success(t *testing.T) {
func TestTryParseTimestampISO8601_Failure(t *testing.T) {
f := func(s string) {
t.Helper()
_, ok := TryParseTimestampISO8601(s)
_, ok := tryParseTimestampISO8601(s)
if ok {
t.Fatalf("expecting faulure when parsing %q", s)
}

View file

@ -47,7 +47,7 @@ func BenchmarkTryParseTimestampISO8601(b *testing.B) {
nSum := int64(0)
for pb.Next() {
for _, s := range a {
n, ok := TryParseTimestampISO8601(s)
n, ok := tryParseTimestampISO8601(s)
if !ok {
panic(fmt.Errorf("cannot parse timestamp %q", s))
}