vmalert: add new metric vmalert_iteration_interval_seconds (#2623)

The new metric shows the configured evaluation interval per group.
Metric updates its value when group's interval is changed during
hot reload.
The new metric can be used to estimate how close group
is to start missing evaluation rounds. The following query
will show the % of used time by the group to evaluate all rules
before the next round:
```
(max(vmalert_iteration_duration_seconds{quantile="0.99"}) / vmalert_iteration_interval_seconds) * 100
```

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2618
Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2022-05-20 17:31:16 +02:00 committed by Aliaksandr Valialkin
parent a723de6ae1
commit 88c4c6f465
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1

View file

@ -49,14 +49,21 @@ type groupMetrics struct {
iterationTotal *utils.Counter iterationTotal *utils.Counter
iterationDuration *utils.Summary iterationDuration *utils.Summary
iterationMissed *utils.Counter iterationMissed *utils.Counter
iterationInterval *utils.Gauge
} }
func newGroupMetrics(name, file string) *groupMetrics { func newGroupMetrics(g *Group) *groupMetrics {
m := &groupMetrics{} m := &groupMetrics{}
labels := fmt.Sprintf(`group=%q, file=%q`, name, file) labels := fmt.Sprintf(`group=%q, file=%q`, g.Name, g.File)
m.iterationTotal = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels)) m.iterationTotal = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels))
m.iterationDuration = utils.GetOrCreateSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels)) m.iterationDuration = utils.GetOrCreateSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels))
m.iterationMissed = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_missed_total{%s}`, labels)) m.iterationMissed = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_missed_total{%s}`, labels))
m.iterationInterval = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_iteration_interval_seconds{%s}`, labels), func() float64 {
g.mu.RLock()
i := g.Interval.Seconds()
g.mu.RUnlock()
return i
})
return m return m
} }
@ -92,13 +99,13 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
finishedCh: make(chan struct{}), finishedCh: make(chan struct{}),
updateCh: make(chan *Group), updateCh: make(chan *Group),
} }
g.metrics = newGroupMetrics(g.Name, g.File)
if g.Interval == 0 { if g.Interval == 0 {
g.Interval = defaultInterval g.Interval = defaultInterval
} }
if g.Concurrency < 1 { if g.Concurrency < 1 {
g.Concurrency = 1 g.Concurrency = 1
} }
g.metrics = newGroupMetrics(g)
rules := make([]Rule, len(cfg.Rules)) rules := make([]Rule, len(cfg.Rules))
for i, r := range cfg.Rules { for i, r := range cfg.Rules {
var extraLabels map[string]string var extraLabels map[string]string
@ -222,6 +229,8 @@ func (g *Group) close() {
g.metrics.iterationDuration.Unregister() g.metrics.iterationDuration.Unregister()
g.metrics.iterationTotal.Unregister() g.metrics.iterationTotal.Unregister()
g.metrics.iterationMissed.Unregister()
g.metrics.iterationInterval.Unregister()
for _, rule := range g.Rules { for _, rule := range g.Rules {
rule.Close() rule.Close()
} }