mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
app/vmalert: detect alerting rules which don't match any series at all (#4198)
app/vmalert: detect alerting rules which don't match any series at all vmalert starts to understand /query responses which contain object: ``` "stats":{"seriesFetched": "42"} ``` If object is present, vmalert parses it and populates a new field `SeriesFetched`. This field is then used to populate the new metric `vmalert_alerting_rules_last_evaluation_series_fetched` and to display warnings in the vmalert's UI. If response doesn't contain the new object (Prometheus or VictoriaMetrics earlier than v1.90), then `SeriesFetched=nil`. In this case, UI will contain no additional warnings. And `vmalert_alerting_rules_last_evaluation_series_fetched` will be set to `-1`. Negative value of the metric will help to compile correct alerting rule in follow-up. Thanks for the initial implementation to @Haleygo See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4056 See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039 Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
2856e15a6e
commit
4edb97f4da
18 changed files with 843 additions and 542 deletions
|
@ -29,7 +29,8 @@ Use this feature for the following cases:
|
|||
* Recording and Alerting rules backfilling (aka `replay`). See [these docs](#rules-backfilling);
|
||||
* Lightweight and without extra dependencies.
|
||||
* Supports [reusable templates](#reusable-templates) for annotations;
|
||||
* Load of recording and alerting rules from local filesystem, GCS and S3.
|
||||
* Load of recording and alerting rules from local filesystem, GCS and S3;
|
||||
* Detect alerting rules which [don't match any series](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
||||
|
||||
## Limitations
|
||||
|
||||
|
@ -812,6 +813,22 @@ and vmalert will start printing additional log messages:
|
|||
2022-09-15T13:36:56.153Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:36:56+02:00: alert 10705778000901301787 {alertgroup="TestGroup",alertname="Conns",cluster="east-1",instance="localhost:8429",replica="a"} PENDING => FIRING: 1m0s since becoming active at 2022-09-15 15:35:56.126006 +0200 CEST m=+39.384575417
|
||||
```
|
||||
|
||||
### Never-firing alerts
|
||||
|
||||
vmalert can detect if alert's expression doesn't match any time series in runtime. This problem usually happens
|
||||
when alerting expression selects time series which aren't present in the datasource (i.e. wrong `job` label)
|
||||
or there is a typo in the series selector (i.e. `env=rpod`). Such alerting rules will be marked with special icon in
|
||||
vmalert's UI and exposed via `vmalert_alerting_rules_last_evaluation_series_fetched` metric. The metric's value will
|
||||
show how many time series were matched before the filtering by rule's expression. If metric's value is `-1`, then
|
||||
this feature is not supported by the datasource (old versions of VictoriaMetrics). The following expression can be
|
||||
used to detect rules matching no series:
|
||||
```
|
||||
max(vmalert_alerting_rules_last_evaluation_series_fetched) by(group, alertname) == 0
|
||||
```
|
||||
|
||||
See more details [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
||||
This feature is available only if vmalert is using VictoriaMetrics v1.90 or higher as a datasource.
|
||||
|
||||
|
||||
## Profiling
|
||||
|
||||
|
|
|
@ -47,10 +47,11 @@ type AlertingRule struct {
|
|||
}
|
||||
|
||||
type alertingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
errors *utils.Gauge
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
seriesFetched *utils.Gauge
|
||||
}
|
||||
|
||||
func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *AlertingRule {
|
||||
|
@ -121,6 +122,15 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
|||
e := ar.state.getLast()
|
||||
return float64(e.samples)
|
||||
})
|
||||
ar.metrics.seriesFetched = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_series_fetched{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.seriesFetched == nil {
|
||||
// means seriesFetched is unsupported
|
||||
return -1
|
||||
}
|
||||
return float64(*e.seriesFetched)
|
||||
})
|
||||
return ar
|
||||
}
|
||||
|
||||
|
@ -130,6 +140,7 @@ func (ar *AlertingRule) Close() {
|
|||
ar.metrics.pending.Unregister()
|
||||
ar.metrics.errors.Unregister()
|
||||
ar.metrics.samples.Unregister()
|
||||
ar.metrics.seriesFetched.Unregister()
|
||||
}
|
||||
|
||||
// String implements Stringer interface
|
||||
|
@ -234,7 +245,7 @@ func (ar *AlertingRule) toLabels(m datasource.Metric, qFn templates.QueryFn) (*l
|
|||
// to get time series for backfilling.
|
||||
// It returns ALERT and ALERT_FOR_STATE time series as result.
|
||||
func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
series, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||
res, err := ar.q.QueryRange(ctx, ar.Expr, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -242,7 +253,7 @@ func (ar *AlertingRule) ExecRange(ctx context.Context, start, end time.Time) ([]
|
|||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
return nil, fmt.Errorf("`query` template isn't supported in replay mode")
|
||||
}
|
||||
for _, s := range series {
|
||||
for _, s := range res.Data {
|
||||
a, err := ar.newAlert(s, nil, time.Time{}, qFn) // initial alert
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create alert: %s", err)
|
||||
|
@ -282,14 +293,15 @@ const resolvedRetention = 15 * time.Minute
|
|||
// Based on the Querier results AlertingRule maintains notifier.Alerts
|
||||
func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
start := time.Now()
|
||||
qMetrics, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||
res, req, err := ar.q.Query(ctx, ar.Expr, ts)
|
||||
curState := ruleStateEntry{
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(qMetrics),
|
||||
err: err,
|
||||
curl: requestToCurl(req),
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(res.Data),
|
||||
seriesFetched: res.SeriesFetched,
|
||||
err: err,
|
||||
curl: requestToCurl(req),
|
||||
}
|
||||
|
||||
defer func() {
|
||||
|
@ -315,11 +327,11 @@ func (ar *AlertingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||
|
||||
qFn := func(query string) ([]datasource.Metric, error) {
|
||||
res, _, err := ar.q.Query(ctx, query, ts)
|
||||
return res, err
|
||||
return res.Data, err
|
||||
}
|
||||
updated := make(map[uint64]struct{})
|
||||
// update list of active alerts
|
||||
for _, m := range qMetrics {
|
||||
for _, m := range res.Data {
|
||||
ls, err := ar.toLabels(m, qFn)
|
||||
if err != nil {
|
||||
curState.err = fmt.Errorf("failed to expand labels: %s", err)
|
||||
|
@ -485,22 +497,23 @@ func (ar *AlertingRule) AlertAPI(id uint64) *APIAlert {
|
|||
func (ar *AlertingRule) ToAPI() APIRule {
|
||||
lastState := ar.state.getLast()
|
||||
r := APIRule{
|
||||
Type: "alerting",
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
Duration: ar.For.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.samples,
|
||||
MaxUpdates: ar.state.size(),
|
||||
Updates: ar.state.getAll(),
|
||||
Debug: ar.Debug,
|
||||
Type: "alerting",
|
||||
DatasourceType: ar.Type.String(),
|
||||
Name: ar.Name,
|
||||
Query: ar.Expr,
|
||||
Duration: ar.For.Seconds(),
|
||||
Labels: ar.Labels,
|
||||
Annotations: ar.Annotations,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
State: "inactive",
|
||||
Alerts: ar.AlertsToAPI(),
|
||||
LastSamples: lastState.samples,
|
||||
LastSeriesFetched: lastState.seriesFetched,
|
||||
MaxUpdates: ar.state.size(),
|
||||
Updates: ar.state.getAll(),
|
||||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
|
@ -637,11 +650,12 @@ func (ar *AlertingRule) Restore(ctx context.Context, q datasource.Querier, ts ti
|
|||
|
||||
ar.logDebugf(ts, nil, "restoring alert state via query %q", expr)
|
||||
|
||||
qMetrics, _, err := q.Query(ctx, expr, ts)
|
||||
res, _, err := q.Query(ctx, expr, ts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
qMetrics := res.Data
|
||||
if len(qMetrics) < 1 {
|
||||
ar.logDebugf(ts, nil, "no response was received from restore query")
|
||||
continue
|
||||
|
|
|
@ -13,11 +13,22 @@ type Querier interface {
|
|||
// It returns list of Metric in response, the http.Request used for sending query
|
||||
// and error if any. Returned http.Request can't be reused and its body is already read.
|
||||
// Query should stop once ctx is cancelled.
|
||||
Query(ctx context.Context, query string, ts time.Time) ([]Metric, *http.Request, error)
|
||||
Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error)
|
||||
// QueryRange executes range request with the given query on the given time range.
|
||||
// It returns list of Metric in response and error if any.
|
||||
// QueryRange should stop once ctx is cancelled.
|
||||
QueryRange(ctx context.Context, query string, from, to time.Time) ([]Metric, error)
|
||||
QueryRange(ctx context.Context, query string, from, to time.Time) (Result, error)
|
||||
}
|
||||
|
||||
// Result represents expected response from the datasource
|
||||
type Result struct {
|
||||
// Data contains list of received Metric
|
||||
Data []Metric
|
||||
// SeriesFetched contains amount of time series processed by datasource
|
||||
// during query evaluation.
|
||||
// If nil, then this feature is not supported by the datasource.
|
||||
// SeriesFetched is supported by VictoriaMetrics since v1.90.
|
||||
SeriesFetched *int
|
||||
}
|
||||
|
||||
// QuerierBuilder builds Querier with given params.
|
||||
|
|
|
@ -99,10 +99,10 @@ func NewVMStorage(baseURL string, authCfg *promauth.Config, lookBack time.Durati
|
|||
}
|
||||
|
||||
// Query executes the given query and returns parsed response
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Metric, *http.Request, error) {
|
||||
func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) (Result, *http.Request, error) {
|
||||
req, err := s.newRequestPOST()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return Result{}, nil, err
|
||||
}
|
||||
|
||||
switch s.dataSourceType {
|
||||
|
@ -111,12 +111,12 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Me
|
|||
case datasourceGraphite:
|
||||
s.setGraphiteReqParams(req, query, ts)
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
|
||||
return Result{}, nil, fmt.Errorf("engine not found: %q", s.dataSourceType)
|
||||
}
|
||||
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
return nil, req, err
|
||||
return Result{}, req, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
|
@ -133,24 +133,24 @@ func (s *VMStorage) Query(ctx context.Context, query string, ts time.Time) ([]Me
|
|||
// QueryRange executes the given query on the given time range.
|
||||
// For Prometheus type see https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
|
||||
// Graphite type isn't supported.
|
||||
func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end time.Time) ([]Metric, error) {
|
||||
func (s *VMStorage) QueryRange(ctx context.Context, query string, start, end time.Time) (res Result, err error) {
|
||||
if s.dataSourceType != datasourcePrometheus {
|
||||
return nil, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
|
||||
return res, fmt.Errorf("%q is not supported for QueryRange", s.dataSourceType)
|
||||
}
|
||||
req, err := s.newRequestPOST()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return res, err
|
||||
}
|
||||
if start.IsZero() {
|
||||
return nil, fmt.Errorf("start param is missing")
|
||||
return res, fmt.Errorf("start param is missing")
|
||||
}
|
||||
if end.IsZero() {
|
||||
return nil, fmt.Errorf("end param is missing")
|
||||
return res, fmt.Errorf("end param is missing")
|
||||
}
|
||||
s.setPrometheusRangeReqParams(req, query, start, end)
|
||||
resp, err := s.do(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return res, err
|
||||
}
|
||||
defer func() {
|
||||
_ = resp.Body.Close()
|
||||
|
|
|
@ -35,12 +35,12 @@ func (r graphiteResponse) metrics() []Metric {
|
|||
return ms
|
||||
}
|
||||
|
||||
func parseGraphiteResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
|
||||
func parseGraphiteResponse(req *http.Request, resp *http.Response) (Result, error) {
|
||||
r := &graphiteResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
|
||||
return Result{}, fmt.Errorf("error parsing graphite metrics for %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
return r.metrics(), nil
|
||||
return Result{Data: r.metrics()}, nil
|
||||
}
|
||||
|
||||
const (
|
||||
|
|
|
@ -22,6 +22,10 @@ type promResponse struct {
|
|||
ResultType string `json:"resultType"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
} `json:"data"`
|
||||
// Stats supported by VictoriaMetrics since v1.90
|
||||
Stats struct {
|
||||
SeriesFetched *string `json:"seriesFetched,omitempty"`
|
||||
} `json:"stats,omitempty"`
|
||||
}
|
||||
|
||||
type promInstant struct {
|
||||
|
@ -96,39 +100,54 @@ const (
|
|||
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
||||
)
|
||||
|
||||
func parsePrometheusResponse(req *http.Request, resp *http.Response) ([]Metric, error) {
|
||||
func parsePrometheusResponse(req *http.Request, resp *http.Response) (res Result, err error) {
|
||||
r := &promResponse{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL.Redacted(), err)
|
||||
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return res, fmt.Errorf("error parsing prometheus metrics for %s: %w", req.URL.Redacted(), err)
|
||||
}
|
||||
if r.Status == statusError {
|
||||
return nil, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
return res, fmt.Errorf("response error, query: %s, errorType: %s, error: %s", req.URL.Redacted(), r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return nil, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
return res, fmt.Errorf("unknown status: %s, Expected success or error ", r.Status)
|
||||
}
|
||||
var parseFn func() ([]Metric, error)
|
||||
switch r.Data.ResultType {
|
||||
case rtVector:
|
||||
var pi promInstant
|
||||
if err := json.Unmarshal(r.Data.Result, &pi.Result); err != nil {
|
||||
return nil, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
|
||||
return res, fmt.Errorf("umarshal err %s; \n %#v", err, string(r.Data.Result))
|
||||
}
|
||||
return pi.metrics()
|
||||
parseFn = pi.metrics
|
||||
case rtMatrix:
|
||||
var pr promRange
|
||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||
return nil, err
|
||||
return res, err
|
||||
}
|
||||
return pr.metrics()
|
||||
parseFn = pr.metrics
|
||||
case rScalar:
|
||||
var ps promScalar
|
||||
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
||||
return nil, err
|
||||
return res, err
|
||||
}
|
||||
return ps.metrics()
|
||||
parseFn = ps.metrics
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||
}
|
||||
|
||||
ms, err := parseFn()
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
res = Result{Data: ms}
|
||||
if r.Stats.SeriesFetched != nil {
|
||||
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
|
||||
}
|
||||
res.SeriesFetched = &intV
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
||||
|
|
|
@ -35,13 +35,6 @@ func TestVMInstantQuery(t *testing.T) {
|
|||
t.Errorf("should not be called")
|
||||
})
|
||||
c := -1
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, request *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 7:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/api/v1/query", func(w http.ResponseWriter, r *http.Request) {
|
||||
c++
|
||||
if r.Method != http.MethodPost {
|
||||
|
@ -75,6 +68,15 @@ func TestVMInstantQuery(t *testing.T) {
|
|||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
case 6:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
case 7:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, request *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 8:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
|
||||
|
@ -107,12 +109,12 @@ func TestVMInstantQuery(t *testing.T) {
|
|||
expErr("unknown status") // 3
|
||||
expErr("unexpected end of JSON input") // 4
|
||||
|
||||
m, _, err := pq.Query(ctx, query, ts) // 6 - vector
|
||||
res, _, err := pq.Query(ctx, query, ts) // 5 - vector
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 2 {
|
||||
t.Fatalf("expected 2 metrics got %d in %+v", len(m), m)
|
||||
if len(res.Data) != 2 {
|
||||
t.Fatalf("expected 2 metrics got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
expected := []Metric{
|
||||
{
|
||||
|
@ -126,17 +128,17 @@ func TestVMInstantQuery(t *testing.T) {
|
|||
Values: []float64{2000},
|
||||
},
|
||||
}
|
||||
metricsEqual(t, m, expected)
|
||||
metricsEqual(t, res.Data, expected)
|
||||
|
||||
m, req, err := pq.Query(ctx, query, ts) // 7 - scalar
|
||||
res, req, err := pq.Query(ctx, query, ts) // 6 - scalar
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if req == nil {
|
||||
t.Fatalf("expected request to be non-nil")
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(m), m)
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
expected = []Metric{
|
||||
{
|
||||
|
@ -144,18 +146,44 @@ func TestVMInstantQuery(t *testing.T) {
|
|||
Values: []float64{1},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(m, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m, expected)
|
||||
if !reflect.DeepEqual(res.Data, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", res.Data, expected)
|
||||
}
|
||||
|
||||
if res.SeriesFetched != nil {
|
||||
t.Fatalf("expected `seriesFetched` field to be nil when it is missing in datasource response; got %v instead",
|
||||
res.SeriesFetched)
|
||||
}
|
||||
|
||||
res, _, err = pq.Query(ctx, query, ts) // 7 - scalar with stats
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(res.Data), res)
|
||||
}
|
||||
expected = []Metric{
|
||||
{
|
||||
Timestamps: []int64{1583786142},
|
||||
Values: []float64{1},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(res.Data, expected) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", res.Data, expected)
|
||||
}
|
||||
if *res.SeriesFetched != 42 {
|
||||
t.Fatalf("expected `seriesFetched` field to be 42; got %d instead",
|
||||
*res.SeriesFetched)
|
||||
}
|
||||
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
m, _, err = gq.Query(ctx, queryRender, ts) // 8 - graphite
|
||||
res, _, err = gq.Query(ctx, queryRender, ts) // 8 - graphite
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
if len(res.Data) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(res.Data), res.Data)
|
||||
}
|
||||
exp := []Metric{
|
||||
{
|
||||
|
@ -164,7 +192,7 @@ func TestVMInstantQuery(t *testing.T) {
|
|||
Values: []float64{10},
|
||||
},
|
||||
}
|
||||
metricsEqual(t, m, exp)
|
||||
metricsEqual(t, res.Data, exp)
|
||||
|
||||
}
|
||||
|
||||
|
@ -213,10 +241,11 @@ func TestVMInstantQueryWithRetry(t *testing.T) {
|
|||
}
|
||||
|
||||
expValue := func(v float64) {
|
||||
m, _, err := pq.Query(ctx, query, time.Now())
|
||||
res, _, err := pq.Query(ctx, query, time.Now())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m := res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metrics got %d in %+v", len(m), m)
|
||||
}
|
||||
|
@ -319,10 +348,11 @@ func TestVMRangeQuery(t *testing.T) {
|
|||
|
||||
start, end := time.Now().Add(-time.Minute), time.Now()
|
||||
|
||||
m, err := pq.QueryRange(ctx, query, start, end)
|
||||
res, err := pq.QueryRange(ctx, query, start, end)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m := res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
|
|
|
@ -44,21 +44,21 @@ func (fq *fakeQuerier) BuildWithParams(_ datasource.QuerierParams) datasource.Qu
|
|||
return fq
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) ([]datasource.Metric, error) {
|
||||
func (fq *fakeQuerier) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
|
||||
req, _, err := fq.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) ([]datasource.Metric, *http.Request, error) {
|
||||
func (fq *fakeQuerier) Query(_ context.Context, _ string, _ time.Time) (datasource.Result, *http.Request, error) {
|
||||
fq.Lock()
|
||||
defer fq.Unlock()
|
||||
if fq.err != nil {
|
||||
return nil, nil, fq.err
|
||||
return datasource.Result{}, nil, fq.err
|
||||
}
|
||||
cp := make([]datasource.Metric, len(fq.metrics))
|
||||
copy(cp, fq.metrics)
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
return cp, req, nil
|
||||
return datasource.Result{Data: cp}, req, nil
|
||||
}
|
||||
|
||||
type fakeQuerierWithRegistry struct {
|
||||
|
@ -85,23 +85,23 @@ func (fqr *fakeQuerierWithRegistry) BuildWithParams(_ datasource.QuerierParams)
|
|||
return fqr
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) ([]datasource.Metric, error) {
|
||||
func (fqr *fakeQuerierWithRegistry) QueryRange(ctx context.Context, q string, _, _ time.Time) (datasource.Result, error) {
|
||||
req, _, err := fqr.Query(ctx, q, time.Now())
|
||||
return req, err
|
||||
}
|
||||
|
||||
func (fqr *fakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) ([]datasource.Metric, *http.Request, error) {
|
||||
func (fqr *fakeQuerierWithRegistry) Query(_ context.Context, expr string, _ time.Time) (datasource.Result, *http.Request, error) {
|
||||
fqr.Lock()
|
||||
defer fqr.Unlock()
|
||||
|
||||
req, _ := http.NewRequest(http.MethodPost, "foo.com", nil)
|
||||
metrics, ok := fqr.registry[expr]
|
||||
if !ok {
|
||||
return nil, req, nil
|
||||
return datasource.Result{}, req, nil
|
||||
}
|
||||
cp := make([]datasource.Metric, len(metrics))
|
||||
copy(cp, metrics)
|
||||
return cp, req, nil
|
||||
return datasource.Result{Data: cp}, req, nil
|
||||
}
|
||||
|
||||
type fakeQuerierWithDelay struct {
|
||||
|
@ -109,7 +109,7 @@ type fakeQuerierWithDelay struct {
|
|||
delay time.Duration
|
||||
}
|
||||
|
||||
func (fqd *fakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) ([]datasource.Metric, *http.Request, error) {
|
||||
func (fqd *fakeQuerierWithDelay) Query(ctx context.Context, expr string, ts time.Time) (datasource.Result, *http.Request, error) {
|
||||
timer := time.NewTimer(fqd.delay)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
|
|
@ -99,13 +99,13 @@ func (rr *RecordingRule) Close() {
|
|||
// It doesn't update internal states of the Rule and meant to be used just
|
||||
// to get time series for backfilling.
|
||||
func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([]prompbmarshal.TimeSeries, error) {
|
||||
series, err := rr.q.QueryRange(ctx, rr.Expr, start, end)
|
||||
res, err := rr.q.QueryRange(ctx, rr.Expr, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
duplicates := make(map[string]struct{}, len(series))
|
||||
duplicates := make(map[string]struct{}, len(res.Data))
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
for _, s := range series {
|
||||
for _, s := range res.Data {
|
||||
ts := rr.toTimeSeries(s)
|
||||
key := stringifyLabels(ts)
|
||||
if _, ok := duplicates[key]; ok {
|
||||
|
@ -120,13 +120,14 @@ func (rr *RecordingRule) ExecRange(ctx context.Context, start, end time.Time) ([
|
|||
// Exec executes RecordingRule expression via the given Querier.
|
||||
func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]prompbmarshal.TimeSeries, error) {
|
||||
start := time.Now()
|
||||
qMetrics, req, err := rr.q.Query(ctx, rr.Expr, ts)
|
||||
res, req, err := rr.q.Query(ctx, rr.Expr, ts)
|
||||
curState := ruleStateEntry{
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(qMetrics),
|
||||
curl: requestToCurl(req),
|
||||
time: start,
|
||||
at: ts,
|
||||
duration: time.Since(start),
|
||||
samples: len(res.Data),
|
||||
seriesFetched: res.SeriesFetched,
|
||||
curl: requestToCurl(req),
|
||||
}
|
||||
|
||||
defer func() {
|
||||
|
@ -138,6 +139,7 @@ func (rr *RecordingRule) Exec(ctx context.Context, ts time.Time, limit int) ([]p
|
|||
return nil, curState.err
|
||||
}
|
||||
|
||||
qMetrics := res.Data
|
||||
numSeries := len(qMetrics)
|
||||
if limit > 0 && numSeries > limit {
|
||||
curState.err = fmt.Errorf("exec exceeded limit of %d with %d series", limit, numSeries)
|
||||
|
@ -208,17 +210,18 @@ func (rr *RecordingRule) UpdateWith(r Rule) error {
|
|||
func (rr *RecordingRule) ToAPI() APIRule {
|
||||
lastState := rr.state.getLast()
|
||||
r := APIRule{
|
||||
Type: "recording",
|
||||
DatasourceType: rr.Type.String(),
|
||||
Name: rr.Name,
|
||||
Query: rr.Expr,
|
||||
Labels: rr.Labels,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
LastSamples: lastState.samples,
|
||||
MaxUpdates: rr.state.size(),
|
||||
Updates: rr.state.getAll(),
|
||||
Type: "recording",
|
||||
DatasourceType: rr.Type.String(),
|
||||
Name: rr.Name,
|
||||
Query: rr.Expr,
|
||||
Labels: rr.Labels,
|
||||
LastEvaluation: lastState.time,
|
||||
EvaluationTime: lastState.duration.Seconds(),
|
||||
Health: "ok",
|
||||
LastSamples: lastState.samples,
|
||||
LastSeriesFetched: lastState.seriesFetched,
|
||||
MaxUpdates: rr.state.size(),
|
||||
Updates: rr.state.getAll(),
|
||||
|
||||
// encode as strings to avoid rounding
|
||||
ID: fmt.Sprintf("%d", rr.ID()),
|
||||
|
|
|
@ -20,21 +20,21 @@ func (fr *fakeReplayQuerier) BuildWithParams(_ datasource.QuerierParams) datasou
|
|||
return fr
|
||||
}
|
||||
|
||||
func (fr *fakeReplayQuerier) QueryRange(_ context.Context, q string, from, to time.Time) ([]datasource.Metric, error) {
|
||||
func (fr *fakeReplayQuerier) QueryRange(_ context.Context, q string, from, to time.Time) (res datasource.Result, err error) {
|
||||
key := fmt.Sprintf("%s+%s", from.Format("15:04:05"), to.Format("15:04:05"))
|
||||
dps, ok := fr.registry[q]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected query received: %q", q)
|
||||
return res, fmt.Errorf("unexpected query received: %q", q)
|
||||
}
|
||||
_, ok = dps[key]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected time range received: %q", key)
|
||||
return res, fmt.Errorf("unexpected time range received: %q", key)
|
||||
}
|
||||
delete(dps, key)
|
||||
if len(fr.registry[q]) < 1 {
|
||||
delete(fr.registry, q)
|
||||
}
|
||||
return nil, nil
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func TestReplay(t *testing.T) {
|
||||
|
|
|
@ -53,6 +53,12 @@ type ruleStateEntry struct {
|
|||
// stores the number of samples returned during
|
||||
// the last evaluation
|
||||
samples int
|
||||
// stores the number of time series fetched during
|
||||
// the last evaluation.
|
||||
// Is supported by VictoriaMetrics only, starting from v1.90.0
|
||||
// If seriesFetched == nil, then this attribute was missing in
|
||||
// datasource response (unsupported).
|
||||
seriesFetched *int
|
||||
// stores the curl command reflecting the HTTP request used during rule.Exec
|
||||
curl string
|
||||
}
|
||||
|
|
|
@ -38,6 +38,10 @@
|
|||
group.click();
|
||||
}
|
||||
});
|
||||
|
||||
$(document).ready(function() {
|
||||
$('[data-bs-toggle="tooltip"]').tooltip();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -73,35 +73,39 @@ func StreamFooter(qw422016 *qt422016.Writer, r *http.Request) {
|
|||
group.click();
|
||||
}
|
||||
});
|
||||
|
||||
$(document).ready(function() {
|
||||
$('[data-bs-toggle="tooltip"]').tooltip();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
func WriteFooter(qq422016 qtio422016.Writer, r *http.Request) {
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
StreamFooter(qw422016, r)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
}
|
||||
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
func Footer(r *http.Request) string {
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
WriteFooter(qb422016, r)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
return qs422016
|
||||
//line app/vmalert/tpl/footer.qtpl:44
|
||||
//line app/vmalert/tpl/footer.qtpl:48
|
||||
}
|
||||
|
|
|
@ -91,7 +91,9 @@
|
|||
{% else %}
|
||||
<b>record:</b> {%s r.Name %}
|
||||
{% endif %}
|
||||
| <span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||
|
|
||||
{%= seriesFetchedWarn(r) %}
|
||||
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||
</div>
|
||||
<div class="col-12">
|
||||
<code><pre>{%s r.Query %}</pre></code>
|
||||
|
@ -377,8 +379,20 @@
|
|||
annotationKeys = append(annotationKeys, k)
|
||||
}
|
||||
sort.Strings(annotationKeys)
|
||||
|
||||
var seriesFetchedEnabled bool
|
||||
var seriesFetchedWarning bool
|
||||
for _, u := range rule.Updates {
|
||||
if u.seriesFetched != nil {
|
||||
seriesFetchedEnabled = true
|
||||
if *u.seriesFetched == 0 && u.samples == 0{
|
||||
seriesFetchedWarning = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
%}
|
||||
<div class="display-6 pb-3 mb-3">Rule: {%s rule.Name %}<span class="ms-2 badge {% if rule.Health!="ok" %}bg-danger{% else %} bg-warning text-dark{% endif %}">{%s rule.Health %}</span></div>
|
||||
<div class="display-6 pb-3 mb-3">Rule: {%s rule.Name %}<span class="ms-2 badge {% if rule.Health!="ok" %}bg-danger{% else %} bg-success text-dark{% endif %}">{%s rule.Health %}</span></div>
|
||||
<div class="container border-bottom p-2">
|
||||
<div class="row">
|
||||
<div class="col-2">
|
||||
|
@ -450,12 +464,26 @@
|
|||
</div>
|
||||
|
||||
<br>
|
||||
{% if seriesFetchedWarning %}
|
||||
<div class="alert alert-warning" role="alert">
|
||||
<strong>Warning:</strong> some of updates have "Series fetched" equal to 0.<br>
|
||||
It might be that either this data is missing in the datasource or there is a typo in rule's expression.
|
||||
For example, <strong>foo{label="bar"} > 0</strong> could never trigger because <strong>foo{label="bar"}</strong>
|
||||
metric doesn't exist.
|
||||
<br>
|
||||
Rule's expressions without time series selector, like <strong>expr: 42</strong> or <strong>expr: time()</strong>
|
||||
aren't fetching time series from datasource, so they could have "Series fetched" equal to 0 and this won't be a problem.
|
||||
<br>
|
||||
See more details about this detection <a target="_blank" href="https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039">here</a>.
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="display-6 pb-3">Last {%d len(rule.Updates) %}/{%d rule.MaxUpdates %} updates</span>:</div>
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many samples were returned">Samples</th>
|
||||
{% if seriesFetchedEnabled %}<th scope="col" style="width: 10%" class="text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
|
@ -468,7 +496,8 @@
|
|||
<td>
|
||||
<span class="badge bg-primary rounded-pill me-3" title="Updated at">{%s u.time.Format(time.RFC3339) %}</span>
|
||||
</td>
|
||||
<td class="text-center" wi>{%d u.samples %}</td>
|
||||
<td class="text-center">{%d u.samples %}</td>
|
||||
{% if seriesFetchedEnabled %}<td class="text-center">{% if u.seriesFetched != nil %}{%d *u.seriesFetched %}{% endif %}</td>{% endif %}
|
||||
<td class="text-center">{%f.3 u.duration.Seconds() %}s</td>
|
||||
<td class="text-center">{%s u.at.Format(time.RFC3339) %}</td>
|
||||
<td>
|
||||
|
@ -478,7 +507,7 @@
|
|||
</li>
|
||||
{% if u.err != nil %}
|
||||
<tr{% if u.err != nil %} class="alert-danger"{% endif %}>
|
||||
<td colspan="5">
|
||||
<td colspan="{% if seriesFetchedEnabled %}6{%else%}5{%endif%}">
|
||||
<span class="alert-danger">{%v u.err %}</span>
|
||||
</td>
|
||||
</tr>
|
||||
|
@ -503,3 +532,16 @@
|
|||
{% func badgeRestored() %}
|
||||
<span class="badge bg-warning text-dark" title="Alert state was restored after the service restart from remote storage">restored</span>
|
||||
{% endfunc %}
|
||||
|
||||
{% func seriesFetchedWarn(r APIRule) %}
|
||||
{% if r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0 %}
|
||||
<svg xmlns="http://www.w3.org/2000/svg"
|
||||
data-bs-toggle="tooltip"
|
||||
title="This rule last evaluation hasn't selected any time series from the datasource.
|
||||
It might be that either this data is missing in the datasource or there is a typo in rule's expression.
|
||||
See more in Details."
|
||||
width="18" height="18" fill="currentColor" class="bi bi-exclamation-triangle-fill flex-shrink-0 me-2" viewBox="0 0 16 16" role="img" aria-label="Warning:">
|
||||
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm.93-9.412-1 4.705c-.07.34.029.533.304.533.194 0 .487-.07.686-.246l-.088.416c-.287.346-.92.598-1.465.598-.703 0-1.002-.422-.808-1.319l.738-3.468c.064-.293.006-.399-.287-.47l-.451-.081.082-.381 2.29-.287zM8 5.5a1 1 0 1 1 0-2 1 1 0 0 1 0 2z"/>
|
||||
</svg>
|
||||
{% endif %}
|
||||
{% endfunc %}
|
File diff suppressed because it is too large
Load diff
|
@ -117,7 +117,12 @@ type APIRule struct {
|
|||
|
||||
// DatasourceType of the rule: prometheus or graphite
|
||||
DatasourceType string `json:"datasourceType"`
|
||||
LastSamples int `json:"lastSamples"`
|
||||
// LastSamples stores the amount of data samples received on last evaluation
|
||||
LastSamples int `json:"lastSamples"`
|
||||
// LastSeriesFetched stores the amount of time series fetched by datasource
|
||||
// during the last evaluation
|
||||
LastSeriesFetched *int `json:"lastSeriesFetched,omitempty"`
|
||||
|
||||
// ID is a unique Alert's ID within a group
|
||||
ID string `json:"id"`
|
||||
// GroupID is an unique Group's ID
|
||||
|
|
|
@ -41,6 +41,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
|||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to specify default route (`default_url`) for processing non-matched requests. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4084).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to filter incoming requests by IP. See [these docs](https://docs.victoriametrics.com/vmauth.html#ip-filters) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3491).
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): support configuring of custom HTTP headers sent to notifiers on the Group level. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3260).
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): detect alerting rules which don't match any series. See this [issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039) for details.
|
||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): add `-s3StorageClass` command-line flag for setting the storage class for AWS S3 backups. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4164). Thanks to @justcompile for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4166).
|
||||
|
||||
* BUGFIX: reduce the probability of sudden increase in the number of small parts on systems with small number of CPU cores.
|
||||
|
|
|
@ -33,7 +33,8 @@ Use this feature for the following cases:
|
|||
* Recording and Alerting rules backfilling (aka `replay`). See [these docs](#rules-backfilling);
|
||||
* Lightweight and without extra dependencies.
|
||||
* Supports [reusable templates](#reusable-templates) for annotations;
|
||||
* Load of recording and alerting rules from local filesystem, GCS and S3.
|
||||
* Load of recording and alerting rules from local filesystem, GCS and S3;
|
||||
* Detect alerting rules which [don't match any series](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
||||
|
||||
## Limitations
|
||||
|
||||
|
@ -816,6 +817,22 @@ and vmalert will start printing additional log messages:
|
|||
2022-09-15T13:36:56.153Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:36:56+02:00: alert 10705778000901301787 {alertgroup="TestGroup",alertname="Conns",cluster="east-1",instance="localhost:8429",replica="a"} PENDING => FIRING: 1m0s since becoming active at 2022-09-15 15:35:56.126006 +0200 CEST m=+39.384575417
|
||||
```
|
||||
|
||||
### Never-firing alerts
|
||||
|
||||
vmalert can detect if alert's expression doesn't match any time series in runtime. This problem usually happens
|
||||
when alerting expression selects time series which aren't present in the datasource (i.e. wrong `job` label)
|
||||
or there is a typo in the series selector (i.e. `env=rpod`). Such alerting rules will be marked with special icon in
|
||||
vmalert's UI and exposed via `vmalert_alerting_rules_last_evaluation_series_fetched` metric. The metric's value will
|
||||
show how many time series were matched before the filtering by rule's expression. If metric's value is `-1`, then
|
||||
this feature is not supported by the datasource (old versions of VictoriaMetrics). The following expression can be
|
||||
used to detect rules matching no series:
|
||||
```
|
||||
max(vmalert_alerting_rules_last_evaluation_series_fetched) by(group, alertname) == 0
|
||||
```
|
||||
|
||||
See more details [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4039).
|
||||
This feature is available only if vmalert is using VictoriaMetrics v1.90 or higher as a datasource.
|
||||
|
||||
|
||||
## Profiling
|
||||
|
||||
|
|
Loading…
Reference in a new issue