app/vmalert: add label file pointing to the group's filename to metrics (#5281)

The filename should help identifying alerting rules belonging to specific groups
with identical names but different filenames.

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5267

Signed-off-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit b5254199c6)
This commit is contained in:
Roman Khavronenko 2023-11-02 16:01:31 +01:00 committed by hagen1778
parent 3773510e8f
commit 4e8c762fd9
No known key found for this signature in database
GPG key ID: 3BF75F3741CA9640
4 changed files with 7 additions and 6 deletions

View file

@ -91,7 +91,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
entries: make([]StateEntry, entrySize),
}
labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID())
labels := fmt.Sprintf(`alertname=%q, group=%q, file=%q, id="%d"`, ar.Name, group.Name, group.File, ar.ID())
ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels),
func() float64 {
ar.alertsMu.RLock()

View file

@ -78,7 +78,7 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
entries: make([]StateEntry, entrySize),
}
labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID())
labels := fmt.Sprintf(`recording=%q, group=%q, file=%q, id="%d"`, rr.Name, group.Name, group.File, rr.ID())
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
func() float64 {
e := rr.state.getLast()

View file

@ -18,7 +18,7 @@ groups:
Check vmalert's logs for detailed error message."
- alert: AlertingRulesError
expr: sum(vmalert_alerting_rules_error) by(job, instance, group) > 0
expr: sum(vmalert_alerting_rules_error) by(job, instance, group, file) > 0
for: 5m
labels:
severity: warning
@ -29,7 +29,7 @@ groups:
Check vmalert's logs for detailed error message."
- alert: RecordingRulesError
expr: sum(vmalert_recording_rules_error) by(job, instance, group) > 0
expr: sum(vmalert_recording_rules_error) by(job, instance, group, file) > 0
for: 5m
labels:
severity: warning
@ -40,7 +40,7 @@ groups:
Check vmalert's logs for detailed error message."
- alert: RecordingRulesNoData
expr: sum(vmalert_recording_rules_last_evaluation_samples) by(job, group, recording) < 1
expr: sum(vmalert_recording_rules_last_evaluation_samples) by(job, group, recording, file) < 1
for: 30m
labels:
severity: info
@ -52,7 +52,7 @@ groups:
or incorrect query expression."
- alert: TooManyMissedIterations
expr: sum(increase(vmalert_iteration_missed_total[5m])) by(job, instance, group) > 0
expr: sum(increase(vmalert_iteration_missed_total[5m])) by(job, instance, group, file) > 0
for: 15m
labels:
severity: warning

View file

@ -51,6 +51,7 @@ The sandbox cluster installation is running under the constant load generated by
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `-rule.evalDelay` flag and `eval_delay` attribute for [Groups](https://docs.victoriametrics.com/vmalert.html#groups). The new flag and param can be used to adjust the `time` parameter for rule evaluation requests to match [intentional query delay](https://docs.victoriametrics.com/keyConcepts.html#query-latency) from the datasource. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): allow specifying full url in notifier static_configs target address, like `http://alertmanager:9093/test/api/v2/alerts`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5184).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): reduce the number of queries for restoring alerts state on start-up. The change should speed up the restore process and reduce pressure on `remoteRead.url`. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5265).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add label `file` pointing to the group's filename to metrics `vmalert_recording_.*` and `vmalert_alerts_.*`. The filename should help identifying alerting rules belonging to specific groups with identical names but different filenames. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5267).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support data ingestion from [NewRelic infrastructure agent](https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-newrelic-agent), [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3520) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4712).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.labels` command-line flag, which can be used for specifying a list of labels for sharding outgoing samples among the configured `-remoteWrite.url` destinations if `-remoteWrite.shardByURL` command-line flag is set. See [these docs](https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4942) for details.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not exit on startup when [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) refer to non-existing or invalid files with auth configs, since these files may appear / updated later. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4959) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5153).