mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/logstorage: properly parse timezone offset at TryParseTimestampRFC3339Nano()
The TryParseTimestampRFC3339Nano() must properly parse RFC3339 timestamps with timezone offsets. While at it, make tryParseTimestampISO8601 function private in order to prevent from improper usage of this function from outside the lib/logstorage package. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508
This commit is contained in:
parent
24825ba63b
commit
9e1c037249
11 changed files with 157 additions and 67 deletions
|
@ -19,12 +19,12 @@ func ExtractTimestampRFC3339NanoFromFields(timeField string, fields []logstorage
|
|||
if f.Name != timeField {
|
||||
continue
|
||||
}
|
||||
nsecs, ok := logstorage.TryParseTimestampRFC3339Nano(f.Value)
|
||||
if !ok {
|
||||
if f.Value == "0" || f.Value == "" {
|
||||
if f.Value == "" || f.Value == "0" {
|
||||
return time.Now().UnixNano(), nil
|
||||
}
|
||||
return time.Now().UnixNano(), fmt.Errorf("cannot unmarshal iso8601 timestamp from %s=%q", timeField, f.Value)
|
||||
nsecs, ok := logstorage.TryParseTimestampRFC3339Nano(f.Value)
|
||||
if !ok {
|
||||
return 0, fmt.Errorf("cannot unmarshal rfc3339 timestamp from %s=%q", timeField, f.Value)
|
||||
}
|
||||
f.Value = ""
|
||||
return nsecs, nil
|
||||
|
|
75
app/vlinsert/insertutils/timestamp_test.go
Normal file
75
app/vlinsert/insertutils/timestamp_test.go
Normal file
|
@ -0,0 +1,75 @@
|
|||
package insertutils
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
)
|
||||
|
||||
func TestExtractTimestampRFC3339NanoFromFields_Success(t *testing.T) {
|
||||
f := func(timeField string, fields []logstorage.Field, nsecsExpected int64) {
|
||||
t.Helper()
|
||||
|
||||
nsecs, err := ExtractTimestampRFC3339NanoFromFields(timeField, fields)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if nsecs != nsecsExpected {
|
||||
t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, nsecsExpected)
|
||||
}
|
||||
|
||||
for _, f := range fields {
|
||||
if f.Name == timeField {
|
||||
if f.Value != "" {
|
||||
t.Fatalf("unexpected value for field %s; got %q; want %q", timeField, f.Value, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f("time", []logstorage.Field{
|
||||
{Name: "foo", Value: "bar"},
|
||||
{Name: "time", Value: "2024-06-18T23:37:20Z"},
|
||||
}, 1718753840000000000)
|
||||
|
||||
f("time", []logstorage.Field{
|
||||
{Name: "foo", Value: "bar"},
|
||||
{Name: "time", Value: "2024-06-18T23:37:20+08:00"},
|
||||
}, 1718725040000000000)
|
||||
|
||||
f("time", []logstorage.Field{
|
||||
{Name: "foo", Value: "bar"},
|
||||
{Name: "time", Value: "2024-06-18T23:37:20.123-05:30"},
|
||||
}, 1718773640123000000)
|
||||
|
||||
f("time", []logstorage.Field{
|
||||
{Name: "time", Value: "2024-06-18T23:37:20.123456789-05:30"},
|
||||
{Name: "foo", Value: "bar"},
|
||||
}, 1718773640123456789)
|
||||
}
|
||||
|
||||
func TestExtractTimestampRFC3339NanoFromFields_Error(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
|
||||
fields := []logstorage.Field{
|
||||
{Name: "time", Value: s},
|
||||
}
|
||||
nsecs, err := ExtractTimestampRFC3339NanoFromFields("time", fields)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if nsecs != 0 {
|
||||
t.Fatalf("unexpected nsecs; got %d; want %d", nsecs, 0)
|
||||
}
|
||||
}
|
||||
|
||||
f("foobar")
|
||||
|
||||
// no Z at the end
|
||||
f("2024-06-18T23:37:20")
|
||||
|
||||
// incomplete time
|
||||
f("2024-06-18")
|
||||
f("2024-06-18T23:37")
|
||||
}
|
|
@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
|||
|
||||
## tip
|
||||
|
||||
* BUGFIX: properly parse timestamps with timezones during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/) and [querying](https://docs.victoriametrics.com/victorialogs/querying/). This has been broken in [v0.20.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.20.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6508).
|
||||
|
||||
## [v0.22.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.22.0-victorialogs)
|
||||
|
||||
Released at 2024-06-24
|
||||
|
|
|
@ -1180,30 +1180,8 @@ func (br *blockResult) getBucketedValue(s string, bf *byStatsField) string {
|
|||
return bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
}
|
||||
|
||||
if timestamp, ok := TryParseTimestampISO8601(s); ok {
|
||||
bucketSizeInt := int64(bf.bucketSize)
|
||||
if bucketSizeInt <= 0 {
|
||||
bucketSizeInt = 1
|
||||
}
|
||||
bucketOffset := int64(bf.bucketOffset)
|
||||
|
||||
timestamp -= bucketOffset
|
||||
if bf.bucketSizeStr == "month" {
|
||||
timestamp = truncateTimestampToMonth(timestamp)
|
||||
} else if bf.bucketSizeStr == "year" {
|
||||
timestamp = truncateTimestampToYear(timestamp)
|
||||
} else {
|
||||
timestamp -= timestamp % bucketSizeInt
|
||||
}
|
||||
timestamp += bucketOffset
|
||||
|
||||
buf := br.a.b
|
||||
bufLen := len(buf)
|
||||
buf = marshalTimestampISO8601String(buf, timestamp)
|
||||
br.a.b = buf
|
||||
return bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
}
|
||||
|
||||
// There is no need in calling tryParseTimestampISO8601 here, since TryParseTimestampRFC3339Nano
|
||||
// should successfully parse ISO8601 timestamps.
|
||||
if timestamp, ok := TryParseTimestampRFC3339Nano(s); ok {
|
||||
bucketSizeInt := int64(bf.bucketSize)
|
||||
if bucketSizeInt <= 0 {
|
||||
|
|
|
@ -141,7 +141,7 @@ func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
|
|||
return ip == ipNeeded
|
||||
})
|
||||
case valueTypeTimestampISO8601:
|
||||
timestampNeeded, ok := TryParseTimestampISO8601(value)
|
||||
timestampNeeded, ok := tryParseTimestampISO8601(value)
|
||||
if !ok {
|
||||
bm.resetBits()
|
||||
return
|
||||
|
@ -213,7 +213,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
}
|
||||
|
||||
func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
|
||||
n, ok := TryParseTimestampISO8601(value)
|
||||
n, ok := tryParseTimestampISO8601(value)
|
||||
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
|
||||
bm.resetBits()
|
||||
return
|
||||
|
|
|
@ -245,7 +245,7 @@ func (fi *filterIn) initTimestampISO8601Values() {
|
|||
m := make(map[string]struct{}, len(values))
|
||||
buf := make([]byte, 0, len(values)*8)
|
||||
for _, v := range values {
|
||||
n, ok := TryParseTimestampISO8601(v)
|
||||
n, ok := tryParseTimestampISO8601(v)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
|
|
@ -100,7 +100,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
|||
}
|
||||
|
||||
func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
_, ok := TryParseTimestampISO8601(phrase)
|
||||
_, ok := tryParseTimestampISO8601(phrase)
|
||||
if ok {
|
||||
// Fast path - the phrase contains complete timestamp, so we can use exact search
|
||||
matchTimestampISO8601ByExactValue(bs, ch, bm, phrase, tokens)
|
||||
|
|
|
@ -599,7 +599,7 @@ func parsePipeStats(lex *lexer, needStatsKeyword bool) (*pipeStats, error) {
|
|||
return &ps, nil
|
||||
}
|
||||
if !lex.isKeyword(",") {
|
||||
return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", sf, lex.token)
|
||||
return nil, fmt.Errorf("unexpected token %q after [%s]; want ',', '|' or ')'", lex.token, sf)
|
||||
}
|
||||
lex.nextToken()
|
||||
}
|
||||
|
|
|
@ -262,7 +262,7 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) (
|
|||
a := u64s.A
|
||||
var minValue, maxValue int64
|
||||
for i, v := range srcValues {
|
||||
n, ok := TryParseTimestampISO8601(v)
|
||||
n, ok := tryParseTimestampISO8601(v)
|
||||
if !ok {
|
||||
return dstBuf, dstValues, valueTypeUnknown, 0, 0
|
||||
}
|
||||
|
@ -283,14 +283,10 @@ func tryTimestampISO8601Encoding(dstBuf []byte, dstValues, srcValues []string) (
|
|||
return dstBuf, dstValues, valueTypeTimestampISO8601, uint64(minValue), uint64(maxValue)
|
||||
}
|
||||
|
||||
// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and 'Z' tail and returns unix timestamp in nanoseconds.
|
||||
// TryParseTimestampRFC3339Nano parses 'YYYY-MM-DDThh:mm:ss' with optional nanoseconds part and timezone offset and returns unix timestamp in nanoseconds.
|
||||
//
|
||||
// The returned timestamp can be negative if s is smaller than 1970 year.
|
||||
func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
|
||||
// Do not parse timestamps with timezone other than Z, since they cannot be converted back
|
||||
// to the same string representation in general case.
|
||||
// This may break search.
|
||||
|
||||
if len(s) < len("2006-01-02T15:04:05Z") {
|
||||
return 0, false
|
||||
}
|
||||
|
@ -302,12 +298,33 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
|
|||
s = tail
|
||||
nsecs := secs * 1e9
|
||||
|
||||
// Parse optional fractional part of seconds.
|
||||
n := strings.IndexByte(s, 'Z')
|
||||
if n < 0 || n != len(s)-1 {
|
||||
// Parse timezone offset
|
||||
n := strings.IndexAny(s, "Z+-")
|
||||
if n < 0 {
|
||||
return 0, false
|
||||
}
|
||||
offsetStr := s[n+1:]
|
||||
if s[n] != 'Z' {
|
||||
isMinus := s[n] == '-'
|
||||
if len(offsetStr) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
offsetNsecs, ok := tryParseTimezoneOffset(offsetStr)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
if isMinus {
|
||||
offsetNsecs = -offsetNsecs
|
||||
}
|
||||
nsecs -= offsetNsecs
|
||||
} else {
|
||||
if len(offsetStr) != 0 {
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
s = s[:n]
|
||||
|
||||
// Parse optional fractional part of seconds.
|
||||
if len(s) == 0 {
|
||||
return nsecs, true
|
||||
}
|
||||
|
@ -330,10 +347,28 @@ func TryParseTimestampRFC3339Nano(s string) (int64, bool) {
|
|||
return nsecs, true
|
||||
}
|
||||
|
||||
// TryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds.
|
||||
func tryParseTimezoneOffset(offsetStr string) (int64, bool) {
|
||||
n := strings.IndexByte(offsetStr, ':')
|
||||
if n < 0 {
|
||||
return 0, false
|
||||
}
|
||||
hourStr := offsetStr[:n]
|
||||
minuteStr := offsetStr[n+1:]
|
||||
hours, ok := tryParseUint64(hourStr)
|
||||
if !ok || hours > 24 {
|
||||
return 0, false
|
||||
}
|
||||
minutes, ok := tryParseUint64(minuteStr)
|
||||
if !ok || minutes > 60 {
|
||||
return 0, false
|
||||
}
|
||||
return int64(hours)*nsecsPerHour + int64(minutes)*nsecsPerMinute, true
|
||||
}
|
||||
|
||||
// tryParseTimestampISO8601 parses 'YYYY-MM-DDThh:mm:ss.mssZ' and returns unix timestamp in nanoseconds.
|
||||
//
|
||||
// The returned timestamp can be negative if s is smaller than 1970 year.
|
||||
func TryParseTimestampISO8601(s string) (int64, bool) {
|
||||
func tryParseTimestampISO8601(s string) (int64, bool) {
|
||||
// Do not parse timestamps with timezone, since they cannot be converted back
|
||||
// to the same string representation in general case.
|
||||
// This may break search.
|
||||
|
|
|
@ -149,37 +149,41 @@ func TestTryParseIPv4_Failure(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTryParseTimestampRFC3339NanoString_Success(t *testing.T) {
|
||||
f := func(s string) {
|
||||
f := func(s, timestampExpected string) {
|
||||
t.Helper()
|
||||
nsecs, ok := TryParseTimestampRFC3339Nano(s)
|
||||
if !ok {
|
||||
t.Fatalf("cannot parse timestamp %q", s)
|
||||
}
|
||||
data := marshalTimestampRFC3339NanoString(nil, nsecs)
|
||||
if string(data) != s {
|
||||
t.Fatalf("unexpected timestamp; got %q; want %q", data, s)
|
||||
timestamp := marshalTimestampRFC3339NanoString(nil, nsecs)
|
||||
if string(timestamp) != timestampExpected {
|
||||
t.Fatalf("unexpected timestamp; got %q; want %q", timestamp, timestampExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// No fractional seconds
|
||||
f("2023-01-15T23:45:51Z")
|
||||
f("2023-01-15T23:45:51Z", "2023-01-15T23:45:51Z")
|
||||
|
||||
// Different number of fractional seconds
|
||||
f("2023-01-15T23:45:51.1Z")
|
||||
f("2023-01-15T23:45:51.12Z")
|
||||
f("2023-01-15T23:45:51.123Z")
|
||||
f("2023-01-15T23:45:51.1234Z")
|
||||
f("2023-01-15T23:45:51.12345Z")
|
||||
f("2023-01-15T23:45:51.123456Z")
|
||||
f("2023-01-15T23:45:51.1234567Z")
|
||||
f("2023-01-15T23:45:51.12345678Z")
|
||||
f("2023-01-15T23:45:51.123456789Z")
|
||||
f("2023-01-15T23:45:51.1Z", "2023-01-15T23:45:51.1Z")
|
||||
f("2023-01-15T23:45:51.12Z", "2023-01-15T23:45:51.12Z")
|
||||
f("2023-01-15T23:45:51.123Z", "2023-01-15T23:45:51.123Z")
|
||||
f("2023-01-15T23:45:51.1234Z", "2023-01-15T23:45:51.1234Z")
|
||||
f("2023-01-15T23:45:51.12345Z", "2023-01-15T23:45:51.12345Z")
|
||||
f("2023-01-15T23:45:51.123456Z", "2023-01-15T23:45:51.123456Z")
|
||||
f("2023-01-15T23:45:51.1234567Z", "2023-01-15T23:45:51.1234567Z")
|
||||
f("2023-01-15T23:45:51.12345678Z", "2023-01-15T23:45:51.12345678Z")
|
||||
f("2023-01-15T23:45:51.123456789Z", "2023-01-15T23:45:51.123456789Z")
|
||||
|
||||
// The minimum possible timestamp
|
||||
f("1677-09-21T00:12:44Z")
|
||||
f("1677-09-21T00:12:44Z", "1677-09-21T00:12:44Z")
|
||||
|
||||
// The maximum possible timestamp
|
||||
f("2262-04-11T23:47:15.999999999Z")
|
||||
f("2262-04-11T23:47:15.999999999Z", "2262-04-11T23:47:15.999999999Z")
|
||||
|
||||
// timestamp with timezone
|
||||
f("2023-01-16T00:45:51+01:00", "2023-01-15T23:45:51Z")
|
||||
f("2023-01-16T00:45:51.123-01:00", "2023-01-16T01:45:51.123Z")
|
||||
}
|
||||
|
||||
func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
|
||||
|
@ -202,10 +206,6 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
|
|||
// missing fractional part after dot
|
||||
f("2023-01-15T22:15:51.Z")
|
||||
|
||||
// timestamp with timezone
|
||||
f("2023-01-16T00:45:51+01:00")
|
||||
f("2023-01-16T00:45:51.123+01:00")
|
||||
|
||||
// too small year
|
||||
f("1676-09-21T00:12:43Z")
|
||||
|
||||
|
@ -240,7 +240,7 @@ func TestTryParseTimestampRFC3339Nano_Failure(t *testing.T) {
|
|||
func TestTryParseTimestampISO8601String_Success(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
nsecs, ok := TryParseTimestampISO8601(s)
|
||||
nsecs, ok := tryParseTimestampISO8601(s)
|
||||
if !ok {
|
||||
t.Fatalf("cannot parse timestamp %q", s)
|
||||
}
|
||||
|
@ -263,7 +263,7 @@ func TestTryParseTimestampISO8601String_Success(t *testing.T) {
|
|||
func TestTryParseTimestampISO8601_Failure(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
_, ok := TryParseTimestampISO8601(s)
|
||||
_, ok := tryParseTimestampISO8601(s)
|
||||
if ok {
|
||||
t.Fatalf("expecting faulure when parsing %q", s)
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ func BenchmarkTryParseTimestampISO8601(b *testing.B) {
|
|||
nSum := int64(0)
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
n, ok := TryParseTimestampISO8601(s)
|
||||
n, ok := tryParseTimestampISO8601(s)
|
||||
if !ok {
|
||||
panic(fmt.Errorf("cannot parse timestamp %q", s))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue