lib/storage: fix indexdb rotation infinite loop (#4249)

When using `retentionTimezoneOffset` and having local timezone being more than 4 hours different from UTC indexdb retention calculation could return negative value. This caused indexdb rotation to get in loop.
Fix calculation of offset to use `retentionTimezoneOffset` value properly and add test to cover all legit timezone configs.
See:
- https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4207
- https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4206

Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
Co-authored-by: Nikolay <nik@victoriametrics.com>
This commit is contained in:
Zakhar Bessarab 2023-05-04 19:16:48 +04:00 committed by Aliaksandr Valialkin
parent 26fc4afff8
commit 348693ff84
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
3 changed files with 44 additions and 12 deletions

View file

@ -55,6 +55,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly display an error when using `query` function for templating value of `-external.alert.source` flag. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4181).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly return empty slices instead of nil for `/api/v1/rules` and `/api/v1/alerts` API handlers. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `__meta_kubernetes_endpoints_name` label for all ports discovered from endpoint. Previously, ports not matched by `Service` did not have this label. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4154) for details.
* BUGFIX: fix indexdb rotation getting in infinite loop when using `retentionTimezoneOffset` and local timezone is not UTC. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4207) for details. Thanks to @faceair for the fix.
* BUGFIX: max value for `memory.allowedPercent` changed from 200 to 100. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4171).
## [v1.90.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.90.0)

View file

@ -1090,18 +1090,23 @@ func SetRetentionTimezoneOffset(offset time.Duration) {
var retentionTimezoneOffsetMsecs int64
func nextRetentionDuration(retentionMsecs int64) time.Duration {
// Round retentionMsecs to days. This guarantees that per-day inverted index works as expected.
retentionMsecs = ((retentionMsecs + msecPerDay - 1) / msecPerDay) * msecPerDay
t := time.Now().UnixNano() / 1e6
deadline := ((t + retentionMsecs - 1) / retentionMsecs) * retentionMsecs
nowMsecs := time.Now().UnixNano() / 1e6
return nextRetentionDurationAt(nowMsecs, retentionMsecs)
}
func nextRetentionDurationAt(atMsecs int64, retentionMsecs int64) time.Duration {
// Schedule the deadline to +4 hours from the next retention period start.
// This should prevent from possible double deletion of indexdb
// due to time drift - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/248 .
deadline += 4 * 3600 * 1000
retentionOffsetMsecs := retentionTimezoneOffsetMsecs - int64(4*3600*1000)
// Round retentionMsecs to days. This guarantees that per-day inverted index works as expected
deadline := ((atMsecs + retentionMsecs + retentionOffsetMsecs - 1) / retentionMsecs) * retentionMsecs
// The effect of time zone on retention period is moved out.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2574
deadline -= retentionTimezoneOffsetMsecs
return time.Duration(deadline-t) * time.Millisecond
deadline -= retentionOffsetMsecs
return time.Duration(deadline-atMsecs) * time.Millisecond
}
// SearchMetricNames returns marshaled metric names matching the given tfss on the given tr.

View file

@ -494,12 +494,38 @@ func TestMetricRowMarshalUnmarshal(t *testing.T) {
}
func TestNextRetentionDuration(t *testing.T) {
for retentionMonths := float64(0.1); retentionMonths < 120; retentionMonths += 0.3 {
d := nextRetentionDuration(int64(retentionMonths * msecsPerMonth))
validateRetention := func(now time.Time, retention float64) {
t.Helper()
nowMsecs := now.UnixMilli()
d := nextRetentionDurationAt(nowMsecs, int64(retention*msecsPerMonth))
if d <= 0 {
currTime := time.Now().UTC()
nextTime := time.Now().UTC().Add(d)
t.Fatalf("unexpected retention duration for retentionMonths=%f; got %s; must be %s + %f months", retentionMonths, nextTime, currTime, retentionMonths)
nextTime := now.Add(d)
t.Fatalf("unexpected retention duration for retentionMonths=%f; got %s; must be %s + %f months", retention, nextTime, now, retention)
}
}
for retentionMonths := float64(0.1); retentionMonths < 120; retentionMonths += 0.3 {
// UTC offsets are in range [-12 hours, +14 hours].
// Verify that any legit combination of retention timezone and local time
// will return valid retention duration.
// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4207
for retentionOffset := -12; retentionOffset <= 14; retentionOffset++ {
for localTimeOffset := -12; localTimeOffset <= 14; localTimeOffset++ {
SetRetentionTimezoneOffset(time.Duration(retentionOffset) * time.Hour)
tz := time.FixedZone("", -1*localTimeOffset*60*60)
now := time.Now().In(tz)
validateRetention(now, retentionMonths)
now = time.Date(2023, 4, 27, 3, 58, 0, 0, tz)
validateRetention(now, retentionMonths)
now = time.Date(2023, 4, 27, 4, 1, 0, 0, tz)
validateRetention(now, retentionMonths)
now = time.Date(2023, 4, 27, 6, 0, 0, 0, tz)
validateRetention(now, retentionMonths)
}
}
}
}