mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/storage: fix indexdb rotation infinite loop (#4249)
When using `retentionTimezoneOffset` and having local timezone being more than 4 hours different from UTC indexdb retention calculation could return negative value. This caused indexdb rotation to get in loop. Fix calculation of offset to use `retentionTimezoneOffset` value properly and add test to cover all legit timezone configs. See: - https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4207 - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4206 Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> Co-authored-by: Nikolay <nik@victoriametrics.com>
This commit is contained in:
parent
89ec85c5db
commit
a50d63c376
3 changed files with 44 additions and 12 deletions
|
@ -23,6 +23,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
|||
* BUGFIX: properly limit the number of [OpenTSDB HTTP](https://docs.victoriametrics.com/#sending-opentsdb-data-via-http-apiput-requests) concurrent requests specified via `-maxConcurrentInserts` command-line flag. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4204). Thanks to @zouxiang1993 for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4208).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly return empty slices instead of nil for `/api/v1/rules` and `/api/v1/alerts` API handlers. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `__meta_kubernetes_endpoints_name` label for all ports discovered from endpoint. Previously, ports not matched by `Service` did not have this label. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4154) for details.
|
||||
* BUGFIX: fix possible infinite loop during `indexdb` rotation when `-retentionTimezoneOffset` command-line flag is set and the local timezone is not UTC. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4207). Thanks to @faceair for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4206).
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html): suppress `series after dedup` error message in logs when `-remoteWrite.streamAggr.dedupInterval` command-line flag is set at [vmagent](https://docs.victoriametrics.com/vmgent.html) or when `-streamAggr.dedupInterval` command-line flag is set at [single-node VictoriaMetrics](https://docs.victoriametrics.com/).
|
||||
|
||||
## [v1.87.5](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.5)
|
||||
|
|
|
@ -1048,18 +1048,23 @@ func SetRetentionTimezoneOffset(offset time.Duration) {
|
|||
var retentionTimezoneOffsetMsecs int64
|
||||
|
||||
func nextRetentionDuration(retentionMsecs int64) time.Duration {
|
||||
// Round retentionMsecs to days. This guarantees that per-day inverted index works as expected.
|
||||
retentionMsecs = ((retentionMsecs + msecPerDay - 1) / msecPerDay) * msecPerDay
|
||||
t := time.Now().UnixNano() / 1e6
|
||||
deadline := ((t + retentionMsecs - 1) / retentionMsecs) * retentionMsecs
|
||||
nowMsecs := time.Now().UnixNano() / 1e6
|
||||
return nextRetentionDurationAt(nowMsecs, retentionMsecs)
|
||||
}
|
||||
|
||||
func nextRetentionDurationAt(atMsecs int64, retentionMsecs int64) time.Duration {
|
||||
// Schedule the deadline to +4 hours from the next retention period start.
|
||||
// This should prevent from possible double deletion of indexdb
|
||||
// due to time drift - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/248 .
|
||||
deadline += 4 * 3600 * 1000
|
||||
retentionOffsetMsecs := retentionTimezoneOffsetMsecs - int64(4*3600*1000)
|
||||
|
||||
// Round retentionMsecs to days. This guarantees that per-day inverted index works as expected
|
||||
deadline := ((atMsecs + retentionMsecs + retentionOffsetMsecs - 1) / retentionMsecs) * retentionMsecs
|
||||
|
||||
// The effect of time zone on retention period is moved out.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2574
|
||||
deadline -= retentionTimezoneOffsetMsecs
|
||||
return time.Duration(deadline-t) * time.Millisecond
|
||||
deadline -= retentionOffsetMsecs
|
||||
return time.Duration(deadline-atMsecs) * time.Millisecond
|
||||
}
|
||||
|
||||
// SearchMetricNames returns marshaled metric names matching the given tfss on the given tr.
|
||||
|
|
|
@ -408,12 +408,38 @@ func TestMetricRowMarshalUnmarshal(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestNextRetentionDuration(t *testing.T) {
|
||||
for retentionMonths := float64(0.1); retentionMonths < 120; retentionMonths += 0.3 {
|
||||
d := nextRetentionDuration(int64(retentionMonths * msecsPerMonth))
|
||||
validateRetention := func(now time.Time, retention float64) {
|
||||
t.Helper()
|
||||
|
||||
nowMsecs := now.UnixMilli()
|
||||
d := nextRetentionDurationAt(nowMsecs, int64(retention*msecsPerMonth))
|
||||
if d <= 0 {
|
||||
currTime := time.Now().UTC()
|
||||
nextTime := time.Now().UTC().Add(d)
|
||||
t.Fatalf("unexpected retention duration for retentionMonths=%f; got %s; must be %s + %f months", retentionMonths, nextTime, currTime, retentionMonths)
|
||||
nextTime := now.Add(d)
|
||||
t.Fatalf("unexpected retention duration for retentionMonths=%f; got %s; must be %s + %f months", retention, nextTime, now, retention)
|
||||
}
|
||||
}
|
||||
|
||||
for retentionMonths := float64(0.1); retentionMonths < 120; retentionMonths += 0.3 {
|
||||
// UTC offsets are in range [-12 hours, +14 hours].
|
||||
// Verify that any legit combination of retention timezone and local time
|
||||
// will return valid retention duration.
|
||||
// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4207
|
||||
for retentionOffset := -12; retentionOffset <= 14; retentionOffset++ {
|
||||
for localTimeOffset := -12; localTimeOffset <= 14; localTimeOffset++ {
|
||||
SetRetentionTimezoneOffset(time.Duration(retentionOffset) * time.Hour)
|
||||
tz := time.FixedZone("", -1*localTimeOffset*60*60)
|
||||
now := time.Now().In(tz)
|
||||
validateRetention(now, retentionMonths)
|
||||
|
||||
now = time.Date(2023, 4, 27, 3, 58, 0, 0, tz)
|
||||
validateRetention(now, retentionMonths)
|
||||
|
||||
now = time.Date(2023, 4, 27, 4, 1, 0, 0, tz)
|
||||
validateRetention(now, retentionMonths)
|
||||
|
||||
now = time.Date(2023, 4, 27, 6, 0, 0, 0, tz)
|
||||
validateRetention(now, retentionMonths)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue