mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
130e0ea5f0
1. split package rule under /app/vmalert, expose needed objects 2. add vmalert-tool with unittest subcmd https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2945
168 lines
4 KiB
Go
168 lines
4 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
)
|
|
|
|
// manager controls group states
|
|
type manager struct {
|
|
querierBuilder datasource.QuerierBuilder
|
|
notifiers func() []notifier.Notifier
|
|
|
|
rw remotewrite.RWClient
|
|
// remote read builder.
|
|
rr datasource.QuerierBuilder
|
|
|
|
wg sync.WaitGroup
|
|
labels map[string]string
|
|
|
|
groupsMu sync.RWMutex
|
|
groups map[uint64]*rule.Group
|
|
}
|
|
|
|
// ruleAPI generates apiRule object from alert by its ID(hash)
|
|
func (m *manager) ruleAPI(gID, rID uint64) (apiRule, error) {
|
|
m.groupsMu.RLock()
|
|
defer m.groupsMu.RUnlock()
|
|
|
|
g, ok := m.groups[gID]
|
|
if !ok {
|
|
return apiRule{}, fmt.Errorf("can't find group with id %d", gID)
|
|
}
|
|
for _, rule := range g.Rules {
|
|
if rule.ID() == rID {
|
|
return ruleToAPI(rule), nil
|
|
}
|
|
}
|
|
return apiRule{}, fmt.Errorf("can't find rule with id %d in group %q", rID, g.Name)
|
|
}
|
|
|
|
// alertAPI generates apiAlert object from alert by its ID(hash)
|
|
func (m *manager) alertAPI(gID, aID uint64) (*apiAlert, error) {
|
|
m.groupsMu.RLock()
|
|
defer m.groupsMu.RUnlock()
|
|
|
|
g, ok := m.groups[gID]
|
|
if !ok {
|
|
return nil, fmt.Errorf("can't find group with id %d", gID)
|
|
}
|
|
for _, r := range g.Rules {
|
|
ar, ok := r.(*rule.AlertingRule)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if apiAlert := alertToAPI(ar, aID); apiAlert != nil {
|
|
return apiAlert, nil
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("can't find alert with id %d in group %q", aID, g.Name)
|
|
}
|
|
|
|
func (m *manager) start(ctx context.Context, groupsCfg []config.Group) error {
|
|
return m.update(ctx, groupsCfg, true)
|
|
}
|
|
|
|
func (m *manager) close() {
|
|
if m.rw != nil {
|
|
err := m.rw.Close()
|
|
if err != nil {
|
|
logger.Fatalf("cannot stop the remotewrite: %s", err)
|
|
}
|
|
}
|
|
m.wg.Wait()
|
|
}
|
|
|
|
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
|
m.wg.Add(1)
|
|
id := g.ID()
|
|
go func() {
|
|
defer m.wg.Done()
|
|
if restore {
|
|
g.Start(ctx, m.notifiers, m.rw, m.rr)
|
|
} else {
|
|
g.Start(ctx, m.notifiers, m.rw, nil)
|
|
}
|
|
}()
|
|
m.groups[id] = g
|
|
return nil
|
|
}
|
|
|
|
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
|
|
var rrPresent, arPresent bool
|
|
groupsRegistry := make(map[uint64]*rule.Group)
|
|
for _, cfg := range groupsCfg {
|
|
for _, r := range cfg.Rules {
|
|
if rrPresent && arPresent {
|
|
continue
|
|
}
|
|
if r.Record != "" {
|
|
rrPresent = true
|
|
}
|
|
if r.Alert != "" {
|
|
arPresent = true
|
|
}
|
|
}
|
|
ng := rule.NewGroup(cfg, m.querierBuilder, *evaluationInterval, m.labels)
|
|
groupsRegistry[ng.ID()] = ng
|
|
}
|
|
|
|
if rrPresent && m.rw == nil {
|
|
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
|
|
}
|
|
if arPresent && m.notifiers == nil {
|
|
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
|
|
}
|
|
|
|
type updateItem struct {
|
|
old *rule.Group
|
|
new *rule.Group
|
|
}
|
|
var toUpdate []updateItem
|
|
|
|
m.groupsMu.Lock()
|
|
for _, og := range m.groups {
|
|
ng, ok := groupsRegistry[og.ID()]
|
|
if !ok {
|
|
// old group is not present in new list,
|
|
// so must be stopped and deleted
|
|
og.Close()
|
|
delete(m.groups, og.ID())
|
|
og = nil
|
|
continue
|
|
}
|
|
delete(groupsRegistry, ng.ID())
|
|
if og.Checksum != ng.Checksum {
|
|
toUpdate = append(toUpdate, updateItem{old: og, new: ng})
|
|
}
|
|
}
|
|
for _, ng := range groupsRegistry {
|
|
if err := m.startGroup(ctx, ng, restore); err != nil {
|
|
m.groupsMu.Unlock()
|
|
return err
|
|
}
|
|
}
|
|
m.groupsMu.Unlock()
|
|
|
|
if len(toUpdate) > 0 {
|
|
var wg sync.WaitGroup
|
|
for _, item := range toUpdate {
|
|
wg.Add(1)
|
|
go func(old *rule.Group, new *rule.Group) {
|
|
old.UpdateWith(new)
|
|
wg.Done()
|
|
}(item.old, item.new)
|
|
item.old.InterruptEval()
|
|
}
|
|
wg.Wait()
|
|
}
|
|
return nil
|
|
}
|