mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
app/vmalert: support filtering for /api/v1/rule like Prometheus does (#5787)
Follow-up after 62e5e2a4c8
Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
a379b2c016
commit
8850c7431d
5 changed files with 156 additions and 130 deletions
|
@ -14,6 +14,7 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
)
|
||||
|
@ -88,8 +89,8 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||
return true
|
||||
case "/vmalert/groups":
|
||||
var data []apiGroup
|
||||
ruleType := r.URL.Query().Get("type")
|
||||
data = rh.groups(ruleType)
|
||||
rf := extractRulesFilter(r)
|
||||
data = rh.groups(rf)
|
||||
WriteListGroups(w, r, data)
|
||||
return true
|
||||
case "/vmalert/notifiers":
|
||||
|
@ -102,8 +103,8 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||
// Grafana makes an extra request to `/rules`
|
||||
// handler in addition to `/api/v1/rules` calls in alerts UI,
|
||||
var data []apiGroup
|
||||
ruleType := r.URL.Query().Get("type")
|
||||
data = rh.groups(ruleType)
|
||||
rf := extractRulesFilter(r)
|
||||
data = rh.groups(rf)
|
||||
WriteListGroups(w, r, data)
|
||||
return true
|
||||
|
||||
|
@ -112,8 +113,8 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||
var data []byte
|
||||
var err error
|
||||
|
||||
ruleType := r.URL.Query().Get("type")
|
||||
data, err = rh.listGroups(ruleType)
|
||||
rf := extractRulesFilter(r)
|
||||
data, err = rh.listGroups(rf)
|
||||
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
|
@ -219,42 +220,90 @@ type listGroupsResponse struct {
|
|||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groups(ruleType string) []apiGroup {
|
||||
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
|
||||
type rulesFilter struct {
|
||||
files []string
|
||||
groupNames []string
|
||||
ruleNames []string
|
||||
ruleType string
|
||||
excludeAlerts bool
|
||||
}
|
||||
|
||||
func extractRulesFilter(r *http.Request) rulesFilter {
|
||||
rf := rulesFilter{}
|
||||
|
||||
var ruleType string
|
||||
ruleTypeParam := r.URL.Query().Get("type")
|
||||
// for some reason, `type` in filter doesn't match `type` in response,
|
||||
// so we use this matching here
|
||||
if ruleTypeParam == "alert" {
|
||||
ruleType = ruleTypeAlerting
|
||||
} else if ruleTypeParam == "record" {
|
||||
ruleType = ruleTypeRecording
|
||||
}
|
||||
rf.ruleType = ruleType
|
||||
|
||||
rf.excludeAlerts = httputils.GetBool(r, "exclude_alerts")
|
||||
rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...)
|
||||
rf.groupNames = append([]string{}, r.Form["rule_group[]"]...)
|
||||
rf.files = append([]string{}, r.Form["file[]"]...)
|
||||
return rf
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groups(rf rulesFilter) []apiGroup {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
groups := make([]apiGroup, 0)
|
||||
for _, g := range rh.m.groups {
|
||||
g = g.DeepCopy()
|
||||
var matchedRules []rule.Rule
|
||||
if ruleType == "alert" || ruleType == "record" {
|
||||
for _, r := range g.Rules {
|
||||
if _, ok := r.(*rule.AlertingRule); ok && ruleType == "alert" {
|
||||
matchedRules = append(matchedRules, r)
|
||||
isInList := func(list []string, needle string) bool {
|
||||
if len(list) < 1 {
|
||||
return true
|
||||
}
|
||||
if _, ok := r.(*rule.RecordingRule); ok && ruleType == "record" {
|
||||
matchedRules = append(matchedRules, r)
|
||||
for _, i := range list {
|
||||
if i == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if len(matchedRules) == 0 {
|
||||
continue
|
||||
}
|
||||
g.Rules = matchedRules
|
||||
}
|
||||
groups = append(groups, groupToAPI(g))
|
||||
return false
|
||||
}
|
||||
|
||||
// sort list of alerts for deterministic output
|
||||
groups := make([]apiGroup, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
if !isInList(rf.groupNames, group.Name) {
|
||||
continue
|
||||
}
|
||||
if !isInList(rf.files, group.File) {
|
||||
continue
|
||||
}
|
||||
|
||||
g := groupToAPI(group)
|
||||
// the returned list should always be non-nil
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
||||
filteredRules := make([]apiRule, 0)
|
||||
for _, r := range g.Rules {
|
||||
if rf.ruleType != "" && rf.ruleType != r.Type {
|
||||
continue
|
||||
}
|
||||
if !isInList(rf.ruleNames, r.Name) {
|
||||
continue
|
||||
}
|
||||
if rf.excludeAlerts {
|
||||
r.Alerts = nil
|
||||
}
|
||||
filteredRules = append(filteredRules, r)
|
||||
}
|
||||
g.Rules = filteredRules
|
||||
groups = append(groups, g)
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
sort.Slice(groups, func(i, j int) bool {
|
||||
return groups[i].Name < groups[j].Name
|
||||
})
|
||||
|
||||
return groups
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listGroups(ruleType string) ([]byte, error) {
|
||||
func (rh *requestHandler) listGroups(rf rulesFilter) ([]byte, error) {
|
||||
lr := listGroupsResponse{Status: "success"}
|
||||
lr.Data.Groups = rh.groups(ruleType)
|
||||
lr.Data.Groups = rh.groups(rf)
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
|
|
|
@ -23,6 +23,7 @@ func TestHandler(t *testing.T) {
|
|||
})
|
||||
g := &rule.Group{
|
||||
Name: "group",
|
||||
File: "rules.yaml",
|
||||
Concurrency: 1,
|
||||
}
|
||||
ar := rule.NewAlertingRule(fq, g, config.Rule{ID: 0, Alert: "alert"})
|
||||
|
@ -143,103 +144,6 @@ func TestHandler(t *testing.T) {
|
|||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules?type=alert", func(t *testing.T) {
|
||||
vmRuleType := "alerting"
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+"/api/v1/rules?type=alert", &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != 1 {
|
||||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
|
||||
for _, g := range lr.Data.Groups {
|
||||
if length := len(g.Rules); length != 1 {
|
||||
t.Errorf("expected 1 valid alert got %d", length)
|
||||
}
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != vmRuleType {
|
||||
t.Errorf("expected only alerts here got %s", r.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules?type=record", func(t *testing.T) {
|
||||
vmRuleType := "recording"
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+"/api/v1/rules?type=record", &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != 1 {
|
||||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
|
||||
for _, g := range lr.Data.Groups {
|
||||
if length := len(g.Rules); length != 1 {
|
||||
t.Errorf("expected 1 valid recording got %d", length)
|
||||
}
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != vmRuleType {
|
||||
t.Errorf("expected only records here got %s", r.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("ignore bad params /api/v1/rules?type=badParam", func(t *testing.T) {
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+"/api/v1/rules?type=badParam", &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != 1 {
|
||||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/vmalert/api/v1/rules?type=alert", func(t *testing.T) {
|
||||
vmRuleType := "alerting"
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+"/vmalert/api/v1/rules?type=alert", &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != 1 {
|
||||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
|
||||
for _, g := range lr.Data.Groups {
|
||||
if length := len(g.Rules); length != 1 {
|
||||
t.Errorf("expected 1 valid alert got %d", length)
|
||||
}
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != vmRuleType {
|
||||
t.Errorf("expected only alerts here got %s", r.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/vmalert/api/v1/rules?type=record", func(t *testing.T) {
|
||||
vmRuleType := "recording"
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+"/vmalert/api/v1/rules?type=record", &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != 1 {
|
||||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
|
||||
for _, g := range lr.Data.Groups {
|
||||
if length := len(g.Rules); length != 1 {
|
||||
t.Errorf("expected 1 valid recording got %d", length)
|
||||
}
|
||||
for _, r := range g.Rules {
|
||||
if r.Type != vmRuleType {
|
||||
t.Errorf("expected only records here got %s", r.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("ignore bad params /vmalert/api/v1/rules?type=badParam", func(t *testing.T) {
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+"/vmalert/api/v1/rules?type=badParam", &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != 1 {
|
||||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
||||
expRule := ruleToAPI(ar)
|
||||
gotRule := apiRule{}
|
||||
|
@ -262,6 +166,74 @@ func TestHandler(t *testing.T) {
|
|||
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
||||
check := func(url string, expGroups, expRules int) {
|
||||
t.Helper()
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+url, &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != expGroups {
|
||||
t.Errorf("expected %d groups got %d", expGroups, length)
|
||||
}
|
||||
if len(lr.Data.Groups) < 1 {
|
||||
return
|
||||
}
|
||||
var rulesN int
|
||||
for _, gr := range lr.Data.Groups {
|
||||
rulesN += len(gr.Rules)
|
||||
}
|
||||
if rulesN != expRules {
|
||||
t.Errorf("expected %d rules got %d", expRules, rulesN)
|
||||
}
|
||||
}
|
||||
|
||||
check("/api/v1/rules?type=alert", 1, 1)
|
||||
check("/api/v1/rules?type=record", 1, 1)
|
||||
|
||||
check("/vmalert/api/v1/rules?type=alert", 1, 1)
|
||||
check("/vmalert/api/v1/rules?type=record", 1, 1)
|
||||
|
||||
// no filtering expected due to bad params
|
||||
check("/api/v1/rules?type=badParam", 1, 2)
|
||||
check("/api/v1/rules?foo=bar", 1, 2)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=foo&rule_group[]=bar", 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=foo&rule_group[]=group&rule_group[]=bar", 1, 2)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=foo", 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 1, 2)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 1, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 1, 1)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 1, 2)
|
||||
})
|
||||
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
|
||||
// check if response returns active alerts by default
|
||||
lr := listGroupsResponse{}
|
||||
getResp(ts.URL+"/api/v1/rules?rule_group[]=group&file[]=rules.yaml", &lr, 200)
|
||||
activeAlerts := 0
|
||||
for _, gr := range lr.Data.Groups {
|
||||
for _, r := range gr.Rules {
|
||||
activeAlerts += len(r.Alerts)
|
||||
}
|
||||
}
|
||||
if activeAlerts == 0 {
|
||||
t.Fatalf("expected at least 1 active alert in response; got 0")
|
||||
}
|
||||
|
||||
// disable returning alerts via param
|
||||
lr = listGroupsResponse{}
|
||||
getResp(ts.URL+"/api/v1/rules?rule_group[]=group&file[]=rules.yaml&exclude_alerts=true", &lr, 200)
|
||||
activeAlerts = 0
|
||||
for _, gr := range lr.Data.Groups {
|
||||
for _, r := range gr.Rules {
|
||||
activeAlerts += len(r.Alerts)
|
||||
}
|
||||
}
|
||||
if activeAlerts != 0 {
|
||||
t.Fatalf("expected to get 0 active alert in response; got %d", activeAlerts)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestEmptyResponse(t *testing.T) {
|
||||
|
|
|
@ -193,10 +193,15 @@ func ruleToAPI(r interface{}) apiRule {
|
|||
return apiRule{}
|
||||
}
|
||||
|
||||
const (
|
||||
ruleTypeRecording = "recording"
|
||||
ruleTypeAlerting = "alerting"
|
||||
)
|
||||
|
||||
func recordingToAPI(rr *rule.RecordingRule) apiRule {
|
||||
lastState := rule.GetLastEntry(rr)
|
||||
r := apiRule{
|
||||
Type: "recording",
|
||||
Type: ruleTypeRecording,
|
||||
DatasourceType: rr.Type.String(),
|
||||
Name: rr.Name,
|
||||
Query: rr.Expr,
|
||||
|
@ -224,7 +229,7 @@ func recordingToAPI(rr *rule.RecordingRule) apiRule {
|
|||
func alertingToAPI(ar *rule.AlertingRule) apiRule {
|
||||
lastState := rule.GetLastEntry(ar)
|
||||
r := apiRule{
|
||||
Type: "alerting",
|
||||
Type: ruleTypeAlerting,
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
|
|
|
@ -39,7 +39,7 @@ The sandbox cluster installation is running under the constant load generated by
|
|||
* FEATURE: add `-search.resetRollupResultCacheOnStartup` command-line flag for resetting [query cache](https://docs.victoriametrics.com/#rollup-result-cache) on startup. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/834).
|
||||
* FEATURE: [dashboards/vmagent](https://grafana.com/grafana/dashboards/12683): add `Targets scraped/s` stat panel showing the number of targets scraped by the vmagent per-second.
|
||||
* FEATURE: [dashboards/all](https://grafana.com/orgs/victoriametrics): add new panel `CPU spent on GC`. It should help identifying cases when too much CPU is spent on garbage collection, and advice users on how this can be addressed.
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/#vmalert): support filtering alerting and recording rules using `type` parameter in API `/vmalert/groups`, `/rules`, `/vmalert/api/v1/rules` and `/api/v1/rules`. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5749) by @victoramsantos.
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/#vmalert): support [filtering](https://prometheus.io/docs/prometheus/2.49/querying/api/#rules) for `/api/v1/rules` API. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5749) by @victoramsantos.
|
||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): support client-side TLS configuration for creating and deleting snapshots via `-snapshot.tls*` cmd-line flags. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5724). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5738).
|
||||
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly propagate [label filters](https://docs.victoriametrics.com/keyconcepts/#filtering) from multiple arguments passed to [aggregate functions](https://docs.victoriametrics.com/metricsql/#aggregate-functions). For example, `sum({job="foo"}, {job="bar"}) by (job) + a` was improperly optimized to `sum({job="foo"}, {job="bar"}) by (job) + a{job="foo"}` before being executed. This could lead to unexpected results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5604).
|
||||
|
|
|
@ -655,7 +655,7 @@ or time series modification via [relabeling](https://docs.victoriametrics.com/vm
|
|||
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
|
||||
|
||||
* `http://<vmalert-addr>` - UI;
|
||||
* `http://<vmalert-addr>/api/v1/rules` - list of all loaded groups and rules. You can also pass `type` as parameter and filter the answer by `alert` or `record` for only `alerting` or `recording` rules respectively;
|
||||
* `http://<vmalert-addr>/api/v1/rules` - list of all loaded groups and rules. Supports additional [filtering](https://prometheus.io/docs/prometheus/2.49/querying/api/#rules);
|
||||
* `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
|
||||
* `http://<vmalert-addr>/vmalert/api/v1/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in JSON format.
|
||||
Used as alert source in AlertManager.
|
||||
|
|
Loading…
Reference in a new issue