diff --git a/app/vmalert/web.go b/app/vmalert/web.go index 2524f13bd1..fff17abded 100644 --- a/app/vmalert/web.go +++ b/app/vmalert/web.go @@ -14,6 +14,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl" "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" ) @@ -88,8 +89,8 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool { return true case "/vmalert/groups": var data []apiGroup - ruleType := r.URL.Query().Get("type") - data = rh.groups(ruleType) + rf := extractRulesFilter(r) + data = rh.groups(rf) WriteListGroups(w, r, data) return true case "/vmalert/notifiers": @@ -102,8 +103,8 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool { // Grafana makes an extra request to `/rules` // handler in addition to `/api/v1/rules` calls in alerts UI, var data []apiGroup - ruleType := r.URL.Query().Get("type") - data = rh.groups(ruleType) + rf := extractRulesFilter(r) + data = rh.groups(rf) WriteListGroups(w, r, data) return true @@ -112,8 +113,8 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool { var data []byte var err error - ruleType := r.URL.Query().Get("type") - data, err = rh.listGroups(ruleType) + rf := extractRulesFilter(r) + data, err = rh.listGroups(rf) if err != nil { httpserver.Errorf(w, r, "%s", err) @@ -219,42 +220,90 @@ type listGroupsResponse struct { } `json:"data"` } -func (rh *requestHandler) groups(ruleType string) []apiGroup { +// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules +type rulesFilter struct { + files []string + groupNames []string + ruleNames []string + ruleType string + excludeAlerts bool +} + +func extractRulesFilter(r *http.Request) rulesFilter { + rf := rulesFilter{} + + var ruleType string + ruleTypeParam := r.URL.Query().Get("type") + // for some reason, `type` in filter doesn't match `type` in response, + // so we use this matching here + if ruleTypeParam == "alert" { + ruleType = ruleTypeAlerting + } else if ruleTypeParam == "record" { + ruleType = ruleTypeRecording + } + rf.ruleType = ruleType + + rf.excludeAlerts = httputils.GetBool(r, "exclude_alerts") + rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...) + rf.groupNames = append([]string{}, r.Form["rule_group[]"]...) + rf.files = append([]string{}, r.Form["file[]"]...) + return rf +} + +func (rh *requestHandler) groups(rf rulesFilter) []apiGroup { rh.m.groupsMu.RLock() defer rh.m.groupsMu.RUnlock() - groups := make([]apiGroup, 0) - for _, g := range rh.m.groups { - g = g.DeepCopy() - var matchedRules []rule.Rule - if ruleType == "alert" || ruleType == "record" { - for _, r := range g.Rules { - if _, ok := r.(*rule.AlertingRule); ok && ruleType == "alert" { - matchedRules = append(matchedRules, r) - } - if _, ok := r.(*rule.RecordingRule); ok && ruleType == "record" { - matchedRules = append(matchedRules, r) - } - } - if len(matchedRules) == 0 { - continue - } - g.Rules = matchedRules + isInList := func(list []string, needle string) bool { + if len(list) < 1 { + return true } - groups = append(groups, groupToAPI(g)) + for _, i := range list { + if i == needle { + return true + } + } + return false } - // sort list of alerts for deterministic output + groups := make([]apiGroup, 0) + for _, group := range rh.m.groups { + if !isInList(rf.groupNames, group.Name) { + continue + } + if !isInList(rf.files, group.File) { + continue + } + + g := groupToAPI(group) + // the returned list should always be non-nil + // https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221 + filteredRules := make([]apiRule, 0) + for _, r := range g.Rules { + if rf.ruleType != "" && rf.ruleType != r.Type { + continue + } + if !isInList(rf.ruleNames, r.Name) { + continue + } + if rf.excludeAlerts { + r.Alerts = nil + } + filteredRules = append(filteredRules, r) + } + g.Rules = filteredRules + groups = append(groups, g) + } + // sort list of groups for deterministic output sort.Slice(groups, func(i, j int) bool { return groups[i].Name < groups[j].Name }) - return groups } -func (rh *requestHandler) listGroups(ruleType string) ([]byte, error) { +func (rh *requestHandler) listGroups(rf rulesFilter) ([]byte, error) { lr := listGroupsResponse{Status: "success"} - lr.Data.Groups = rh.groups(ruleType) + lr.Data.Groups = rh.groups(rf) b, err := json.Marshal(lr) if err != nil { return nil, &httpserver.ErrorWithStatusCode{ diff --git a/app/vmalert/web_test.go b/app/vmalert/web_test.go index cd87de690c..cb6d9bdcbd 100644 --- a/app/vmalert/web_test.go +++ b/app/vmalert/web_test.go @@ -23,6 +23,7 @@ func TestHandler(t *testing.T) { }) g := &rule.Group{ Name: "group", + File: "rules.yaml", Concurrency: 1, } ar := rule.NewAlertingRule(fq, g, config.Rule{ID: 0, Alert: "alert"}) @@ -143,103 +144,6 @@ func TestHandler(t *testing.T) { t.Errorf("expected 1 group got %d", length) } }) - - t.Run("/api/v1/rules?type=alert", func(t *testing.T) { - vmRuleType := "alerting" - lr := listGroupsResponse{} - getResp(ts.URL+"/api/v1/rules?type=alert", &lr, 200) - if length := len(lr.Data.Groups); length != 1 { - t.Errorf("expected 1 group got %d", length) - } - - for _, g := range lr.Data.Groups { - if length := len(g.Rules); length != 1 { - t.Errorf("expected 1 valid alert got %d", length) - } - for _, r := range g.Rules { - if r.Type != vmRuleType { - t.Errorf("expected only alerts here got %s", r.Type) - } - } - } - }) - - t.Run("/api/v1/rules?type=record", func(t *testing.T) { - vmRuleType := "recording" - lr := listGroupsResponse{} - getResp(ts.URL+"/api/v1/rules?type=record", &lr, 200) - if length := len(lr.Data.Groups); length != 1 { - t.Errorf("expected 1 group got %d", length) - } - - for _, g := range lr.Data.Groups { - if length := len(g.Rules); length != 1 { - t.Errorf("expected 1 valid recording got %d", length) - } - for _, r := range g.Rules { - if r.Type != vmRuleType { - t.Errorf("expected only records here got %s", r.Type) - } - } - } - }) - - t.Run("ignore bad params /api/v1/rules?type=badParam", func(t *testing.T) { - lr := listGroupsResponse{} - getResp(ts.URL+"/api/v1/rules?type=badParam", &lr, 200) - if length := len(lr.Data.Groups); length != 1 { - t.Errorf("expected 1 group got %d", length) - } - }) - - t.Run("/vmalert/api/v1/rules?type=alert", func(t *testing.T) { - vmRuleType := "alerting" - lr := listGroupsResponse{} - getResp(ts.URL+"/vmalert/api/v1/rules?type=alert", &lr, 200) - if length := len(lr.Data.Groups); length != 1 { - t.Errorf("expected 1 group got %d", length) - } - - for _, g := range lr.Data.Groups { - if length := len(g.Rules); length != 1 { - t.Errorf("expected 1 valid alert got %d", length) - } - for _, r := range g.Rules { - if r.Type != vmRuleType { - t.Errorf("expected only alerts here got %s", r.Type) - } - } - } - }) - - t.Run("/vmalert/api/v1/rules?type=record", func(t *testing.T) { - vmRuleType := "recording" - lr := listGroupsResponse{} - getResp(ts.URL+"/vmalert/api/v1/rules?type=record", &lr, 200) - if length := len(lr.Data.Groups); length != 1 { - t.Errorf("expected 1 group got %d", length) - } - - for _, g := range lr.Data.Groups { - if length := len(g.Rules); length != 1 { - t.Errorf("expected 1 valid recording got %d", length) - } - for _, r := range g.Rules { - if r.Type != vmRuleType { - t.Errorf("expected only records here got %s", r.Type) - } - } - } - }) - - t.Run("ignore bad params /vmalert/api/v1/rules?type=badParam", func(t *testing.T) { - lr := listGroupsResponse{} - getResp(ts.URL+"/vmalert/api/v1/rules?type=badParam", &lr, 200) - if length := len(lr.Data.Groups); length != 1 { - t.Errorf("expected 1 group got %d", length) - } - }) - t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) { expRule := ruleToAPI(ar) gotRule := apiRule{} @@ -262,6 +166,74 @@ func TestHandler(t *testing.T) { t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates) } }) + + t.Run("/api/v1/rules&filters", func(t *testing.T) { + check := func(url string, expGroups, expRules int) { + t.Helper() + lr := listGroupsResponse{} + getResp(ts.URL+url, &lr, 200) + if length := len(lr.Data.Groups); length != expGroups { + t.Errorf("expected %d groups got %d", expGroups, length) + } + if len(lr.Data.Groups) < 1 { + return + } + var rulesN int + for _, gr := range lr.Data.Groups { + rulesN += len(gr.Rules) + } + if rulesN != expRules { + t.Errorf("expected %d rules got %d", expRules, rulesN) + } + } + + check("/api/v1/rules?type=alert", 1, 1) + check("/api/v1/rules?type=record", 1, 1) + + check("/vmalert/api/v1/rules?type=alert", 1, 1) + check("/vmalert/api/v1/rules?type=record", 1, 1) + + // no filtering expected due to bad params + check("/api/v1/rules?type=badParam", 1, 2) + check("/api/v1/rules?foo=bar", 1, 2) + + check("/api/v1/rules?rule_group[]=foo&rule_group[]=bar", 0, 0) + check("/api/v1/rules?rule_group[]=foo&rule_group[]=group&rule_group[]=bar", 1, 2) + + check("/api/v1/rules?rule_group[]=group&file[]=foo", 0, 0) + check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 1, 2) + + check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 1, 0) + check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 1, 1) + check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 1, 2) + }) + t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) { + // check if response returns active alerts by default + lr := listGroupsResponse{} + getResp(ts.URL+"/api/v1/rules?rule_group[]=group&file[]=rules.yaml", &lr, 200) + activeAlerts := 0 + for _, gr := range lr.Data.Groups { + for _, r := range gr.Rules { + activeAlerts += len(r.Alerts) + } + } + if activeAlerts == 0 { + t.Fatalf("expected at least 1 active alert in response; got 0") + } + + // disable returning alerts via param + lr = listGroupsResponse{} + getResp(ts.URL+"/api/v1/rules?rule_group[]=group&file[]=rules.yaml&exclude_alerts=true", &lr, 200) + activeAlerts = 0 + for _, gr := range lr.Data.Groups { + for _, r := range gr.Rules { + activeAlerts += len(r.Alerts) + } + } + if activeAlerts != 0 { + t.Fatalf("expected to get 0 active alert in response; got %d", activeAlerts) + } + }) } func TestEmptyResponse(t *testing.T) { diff --git a/app/vmalert/web_types.go b/app/vmalert/web_types.go index 61c0d21912..1da1d941d6 100644 --- a/app/vmalert/web_types.go +++ b/app/vmalert/web_types.go @@ -193,10 +193,15 @@ func ruleToAPI(r interface{}) apiRule { return apiRule{} } +const ( + ruleTypeRecording = "recording" + ruleTypeAlerting = "alerting" +) + func recordingToAPI(rr *rule.RecordingRule) apiRule { lastState := rule.GetLastEntry(rr) r := apiRule{ - Type: "recording", + Type: ruleTypeRecording, DatasourceType: rr.Type.String(), Name: rr.Name, Query: rr.Expr, @@ -224,7 +229,7 @@ func recordingToAPI(rr *rule.RecordingRule) apiRule { func alertingToAPI(ar *rule.AlertingRule) apiRule { lastState := rule.GetLastEntry(ar) r := apiRule{ - Type: "alerting", + Type: ruleTypeAlerting, DatasourceType: ar.Type.String(), Name: ar.Name, Query: ar.Expr, diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 0c736dcfa9..b551d865b6 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -39,7 +39,7 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: add `-search.resetRollupResultCacheOnStartup` command-line flag for resetting [query cache](https://docs.victoriametrics.com/#rollup-result-cache) on startup. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/834). * FEATURE: [dashboards/vmagent](https://grafana.com/grafana/dashboards/12683): add `Targets scraped/s` stat panel showing the number of targets scraped by the vmagent per-second. * FEATURE: [dashboards/all](https://grafana.com/orgs/victoriametrics): add new panel `CPU spent on GC`. It should help identifying cases when too much CPU is spent on garbage collection, and advice users on how this can be addressed. -* FEATURE: [vmalert](https://docs.victoriametrics.com/#vmalert): support filtering alerting and recording rules using `type` parameter in API `/vmalert/groups`, `/rules`, `/vmalert/api/v1/rules` and `/api/v1/rules`. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5749) by @victoramsantos. +* FEATURE: [vmalert](https://docs.victoriametrics.com/#vmalert): support [filtering](https://prometheus.io/docs/prometheus/2.49/querying/api/#rules) for `/api/v1/rules` API. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5749) by @victoramsantos. * FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): support client-side TLS configuration for creating and deleting snapshots via `-snapshot.tls*` cmd-line flags. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5724). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5738). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly propagate [label filters](https://docs.victoriametrics.com/keyconcepts/#filtering) from multiple arguments passed to [aggregate functions](https://docs.victoriametrics.com/metricsql/#aggregate-functions). For example, `sum({job="foo"}, {job="bar"}) by (job) + a` was improperly optimized to `sum({job="foo"}, {job="bar"}) by (job) + a{job="foo"}` before being executed. This could lead to unexpected results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5604). diff --git a/docs/vmalert.md b/docs/vmalert.md index ad70e99ce4..dc8d292485 100644 --- a/docs/vmalert.md +++ b/docs/vmalert.md @@ -655,7 +655,7 @@ or time series modification via [relabeling](https://docs.victoriametrics.com/vm `vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints: * `http://` - UI; -* `http:///api/v1/rules` - list of all loaded groups and rules. You can also pass `type` as parameter and filter the answer by `alert` or `record` for only `alerting` or `recording` rules respectively; +* `http:///api/v1/rules` - list of all loaded groups and rules. Supports additional [filtering](https://prometheus.io/docs/prometheus/2.49/querying/api/#rules); * `http:///api/v1/alerts` - list of all active alerts; * `http:///vmalert/api/v1/alert?group_id=&alert_id=` - get alert status in JSON format. Used as alert source in AlertManager.