From fdccb566208b0d3a51f61ca43aee3b6798832df0 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Thu, 13 Jul 2023 17:11:22 +0200 Subject: [PATCH] vmalert: check for negative offset for missed rounds (#4628) It could happen for low evaluation intervals and irregular delays during execution that evaluation time would get a negative offset. This could result into cumulative discrepancy between the actual time and evaluation time for rules. Signed-off-by: hagen1778 --- app/vmalert/group.go | 5 +++++ docs/CHANGELOG.md | 1 + 2 files changed, 6 insertions(+) diff --git a/app/vmalert/group.go b/app/vmalert/group.go index 1aae05b5d..10581b1fc 100644 --- a/app/vmalert/group.go +++ b/app/vmalert/group.go @@ -389,6 +389,11 @@ func (g *Group) start(ctx context.Context, nts func() []notifier.Notifier, rw *r logger.Infof("group %q re-started; interval=%v; concurrency=%d", g.Name, g.Interval, g.Concurrency) case <-t.C: missed := (time.Since(evalTS) / g.Interval) - 1 + if missed < 0 { + // missed can become < 0 due to irregular delays during evaluation + // which can result in time.Since(evalTS) < g.Interval + missed = 0 + } if missed > 0 { g.metrics.iterationMissed.Inc() } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 899d9cfdb..0fc9e5bb6 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -52,6 +52,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: add validation for invalid [partial RFC3339 timestamp formats](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#timestamp-formats) in query and export APIs. * BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): interrupt explore procedure in influx mode if vmctl found no numeric fields. * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): use RFC3339 time format in query args instead of unix timestamp for all issued queries to Prometheus-like datasources. +* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): correctly calculate evaluation time for rules. Before, there was a low probability for discrepancy between actual time and rules evaluation time if evaluation interval was lower than the execution time for rules within the group. * BUGFIX: vmselect: fix timestamp alignment for Prometheus querying API if time argument is less than 10m from the beginning of Unix epoch. ## [v1.91.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.91.3)