From 88c4c6f465830cc1a52f14f859e64f4882442ca5 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Fri, 20 May 2022 17:31:16 +0200 Subject: [PATCH] vmalert: add new metric `vmalert_iteration_interval_seconds` (#2623) The new metric shows the configured evaluation interval per group. Metric updates its value when group's interval is changed during hot reload. The new metric can be used to estimate how close group is to start missing evaluation rounds. The following query will show the % of used time by the group to evaluate all rules before the next round: ``` (max(vmalert_iteration_duration_seconds{quantile="0.99"}) / vmalert_iteration_interval_seconds) * 100 ``` https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2618 Signed-off-by: hagen1778 --- app/vmalert/group.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/app/vmalert/group.go b/app/vmalert/group.go index 3009a7650..c428537ba 100644 --- a/app/vmalert/group.go +++ b/app/vmalert/group.go @@ -49,14 +49,21 @@ type groupMetrics struct { iterationTotal *utils.Counter iterationDuration *utils.Summary iterationMissed *utils.Counter + iterationInterval *utils.Gauge } -func newGroupMetrics(name, file string) *groupMetrics { +func newGroupMetrics(g *Group) *groupMetrics { m := &groupMetrics{} - labels := fmt.Sprintf(`group=%q, file=%q`, name, file) + labels := fmt.Sprintf(`group=%q, file=%q`, g.Name, g.File) m.iterationTotal = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels)) m.iterationDuration = utils.GetOrCreateSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels)) m.iterationMissed = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_missed_total{%s}`, labels)) + m.iterationInterval = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_iteration_interval_seconds{%s}`, labels), func() float64 { + g.mu.RLock() + i := g.Interval.Seconds() + g.mu.RUnlock() + return i + }) return m } @@ -92,13 +99,13 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti finishedCh: make(chan struct{}), updateCh: make(chan *Group), } - g.metrics = newGroupMetrics(g.Name, g.File) if g.Interval == 0 { g.Interval = defaultInterval } if g.Concurrency < 1 { g.Concurrency = 1 } + g.metrics = newGroupMetrics(g) rules := make([]Rule, len(cfg.Rules)) for i, r := range cfg.Rules { var extraLabels map[string]string @@ -222,6 +229,8 @@ func (g *Group) close() { g.metrics.iterationDuration.Unregister() g.metrics.iterationTotal.Unregister() + g.metrics.iterationMissed.Unregister() + g.metrics.iterationInterval.Unregister() for _, rule := range g.Rules { rule.Close() }