diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 799ca4f2e5..2b13e9f3e3 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -23,6 +23,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: properly limit the number of [OpenTSDB HTTP](https://docs.victoriametrics.com/#sending-opentsdb-data-via-http-apiput-requests) concurrent requests specified via `-maxConcurrentInserts` command-line flag. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4204). Thanks to @zouxiang1993 for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4208). * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly return empty slices instead of nil for `/api/v1/rules` and `/api/v1/alerts` API handlers. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `__meta_kubernetes_endpoints_name` label for all ports discovered from endpoint. Previously, ports not matched by `Service` did not have this label. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4154) for details. +* BUGFIX: fix possible infinite loop during `indexdb` rotation when `-retentionTimezoneOffset` command-line flag is set and the local timezone is not UTC. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4207). Thanks to @faceair for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4206). * BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html): suppress `series after dedup` error message in logs when `-remoteWrite.streamAggr.dedupInterval` command-line flag is set at [vmagent](https://docs.victoriametrics.com/vmgent.html) or when `-streamAggr.dedupInterval` command-line flag is set at [single-node VictoriaMetrics](https://docs.victoriametrics.com/). ## [v1.87.5](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.5) diff --git a/lib/storage/storage.go b/lib/storage/storage.go index ed15d11cdf..49c103be76 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -1111,18 +1111,23 @@ func SetRetentionTimezoneOffset(offset time.Duration) { var retentionTimezoneOffsetMsecs int64 func nextRetentionDuration(retentionMsecs int64) time.Duration { - // Round retentionMsecs to days. This guarantees that per-day inverted index works as expected. - retentionMsecs = ((retentionMsecs + msecPerDay - 1) / msecPerDay) * msecPerDay - t := time.Now().UnixNano() / 1e6 - deadline := ((t + retentionMsecs - 1) / retentionMsecs) * retentionMsecs + nowMsecs := time.Now().UnixNano() / 1e6 + return nextRetentionDurationAt(nowMsecs, retentionMsecs) +} + +func nextRetentionDurationAt(atMsecs int64, retentionMsecs int64) time.Duration { // Schedule the deadline to +4 hours from the next retention period start. // This should prevent from possible double deletion of indexdb // due to time drift - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/248 . - deadline += 4 * 3600 * 1000 + retentionOffsetMsecs := retentionTimezoneOffsetMsecs - int64(4*3600*1000) + + // Round retentionMsecs to days. This guarantees that per-day inverted index works as expected + deadline := ((atMsecs + retentionMsecs + retentionOffsetMsecs - 1) / retentionMsecs) * retentionMsecs + // The effect of time zone on retention period is moved out. // See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2574 - deadline -= retentionTimezoneOffsetMsecs - return time.Duration(deadline-t) * time.Millisecond + deadline -= retentionOffsetMsecs + return time.Duration(deadline-atMsecs) * time.Millisecond } // SearchMetricNames returns marshaled metric names matching the given tfss on the given tr. diff --git a/lib/storage/storage_test.go b/lib/storage/storage_test.go index e22d9cad48..e43843877b 100644 --- a/lib/storage/storage_test.go +++ b/lib/storage/storage_test.go @@ -493,12 +493,38 @@ func TestMetricRowMarshalUnmarshal(t *testing.T) { } func TestNextRetentionDuration(t *testing.T) { - for retentionMonths := float64(0.1); retentionMonths < 120; retentionMonths += 0.3 { - d := nextRetentionDuration(int64(retentionMonths * msecsPerMonth)) + validateRetention := func(now time.Time, retention float64) { + t.Helper() + + nowMsecs := now.UnixMilli() + d := nextRetentionDurationAt(nowMsecs, int64(retention*msecsPerMonth)) if d <= 0 { - currTime := time.Now().UTC() - nextTime := time.Now().UTC().Add(d) - t.Fatalf("unexpected retention duration for retentionMonths=%f; got %s; must be %s + %f months", retentionMonths, nextTime, currTime, retentionMonths) + nextTime := now.Add(d) + t.Fatalf("unexpected retention duration for retentionMonths=%f; got %s; must be %s + %f months", retention, nextTime, now, retention) + } + } + + for retentionMonths := float64(0.1); retentionMonths < 120; retentionMonths += 0.3 { + // UTC offsets are in range [-12 hours, +14 hours]. + // Verify that any legit combination of retention timezone and local time + // will return valid retention duration. + // See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4207 + for retentionOffset := -12; retentionOffset <= 14; retentionOffset++ { + for localTimeOffset := -12; localTimeOffset <= 14; localTimeOffset++ { + SetRetentionTimezoneOffset(time.Duration(retentionOffset) * time.Hour) + tz := time.FixedZone("", -1*localTimeOffset*60*60) + now := time.Now().In(tz) + validateRetention(now, retentionMonths) + + now = time.Date(2023, 4, 27, 3, 58, 0, 0, tz) + validateRetention(now, retentionMonths) + + now = time.Date(2023, 4, 27, 4, 1, 0, 0, tz) + validateRetention(now, retentionMonths) + + now = time.Date(2023, 4, 27, 6, 0, 0, 0, tz) + validateRetention(now, retentionMonths) + } } } }