diff --git a/app/vmalert/README.md b/app/vmalert/README.md index 6580290b4..cce14e58d 100644 --- a/app/vmalert/README.md +++ b/app/vmalert/README.md @@ -191,6 +191,11 @@ expr: # Is applicable to alerting rules only. [ debug: | default = false ] +# Defines the number of rule's updates entries stored in memory +# and available for view on rule's Details page. +# Overrides `rule.updateEntriesLimit` value for this specific rule. +[ update_entries_limit: | default 0 ] + # Labels to add or overwrite for each alert. labels: [ : ] @@ -319,6 +324,12 @@ expr: # Labels to add or overwrite before storing the result. labels: [ : ] + + +# Defines the number of rule's updates entries stored in memory +# and available for view on rule's Details page. +# Overrides `rule.updateEntriesLimit` value for this specific rule. +[ update_entries_limit: | default 0 ] ``` For recording rules to work `-remoteWrite.url` must be specified. @@ -695,7 +706,7 @@ may get empty response from datasource and produce empty recording rules or rese vmalert evaluation when data is delayed -By default recently written samples to VictoriaMetrics aren't visible for queries for up to 30s. +By default, recently written samples to VictoriaMetrics aren't visible for queries for up to 30s. This behavior is controlled by `-search.latencyOffset` command-line flag and the `latency_offset` query ag at `vmselect`. Usually, this results into a 30s shift for recording rules results. Note that too small value passed to `-search.latencyOffset` or to `latency_offest` query arg may lead to incomplete query results. @@ -721,8 +732,9 @@ If `-remoteWrite.url` command-line flag is configured, vmalert will persist aler [vmui](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmui) or Grafana to track how alerts state changed in time. -vmalert also stores last N state updates for each rule. To check updates, click on `Details` link next to rule's name -on `/vmalert/groups` page and check the `Last updates` section: +vmalert stores last `-rule.maxUpdateEntries` (or `update_entries_limit` [per-rule config](https://docs.victoriametrics.com/vmalert.html#alerting-rules)) +state updates for each rule. To check updates, click on `Details` link next to rule's name on `/vmalert/groups` page +and check the `Last updates` section: vmalert state @@ -731,7 +743,7 @@ HTTP request sent by vmalert to the `-datasource.url` during evaluation. If spec no samples returned and curl command returns data - then it is very likely there was no data in datasource on the moment when rule was evaluated. -vmalert also alows configuring more detailed logging for specific rule. Just set `debug: true` in rule's configuration +vmalert allows configuring more detailed logging for specific alerting rule. Just set `debug: true` in rule's configuration and vmalert will start printing additional log messages: ```terminal 2022-09-15T13:35:41.155Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:41+02:00: query returned 0 samples (elapsed: 5.896041ms) @@ -890,6 +902,8 @@ The shortlist of configuration flags is the following: Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit -loggerFormat string Format for logs. Possible values: default, json (default "default") + -loggerJSONFields string + Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg -loggerLevel string Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO") -loggerOutput string @@ -1092,6 +1106,8 @@ The shortlist of configuration flags is the following: Interval for checking for changes in '-rule' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead -rule.maxResolveDuration duration Limits the maximum duration for automatic alert expiration, which is by default equal to 3 evaluation intervals of the parent group. + -rule.maxUpdateEntries int + Defines the max number of rule's state updates. (default 20) -rule.resendDelay duration Minimum amount of time to wait before resending an alert to notifier -rule.templates array diff --git a/app/vmalert/alerting.go b/app/vmalert/alerting.go index 368785105..875c36033 100644 --- a/app/vmalert/alerting.go +++ b/app/vmalert/alerting.go @@ -74,10 +74,15 @@ func newAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule Debug: cfg.Debug, }), alerts: make(map[uint64]*notifier.Alert), - state: newRuleState(), metrics: &alertingRuleMetrics{}, } + if cfg.UpdateEntriesLimit != nil { + ar.state = newRuleState(*cfg.UpdateEntriesLimit) + } else { + ar.state = newRuleState(*ruleUpdateEntriesLimit) + } + labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID()) ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels), func() float64 { @@ -491,6 +496,7 @@ func (ar *AlertingRule) ToAPI() APIRule { State: "inactive", Alerts: ar.AlertsToAPI(), LastSamples: lastState.samples, + MaxUpdates: ar.state.size(), Updates: ar.state.getAll(), // encode as strings to avoid rounding in JSON diff --git a/app/vmalert/alerting_test.go b/app/vmalert/alerting_test.go index 89607116b..106028be0 100644 --- a/app/vmalert/alerting_test.go +++ b/app/vmalert/alerting_test.go @@ -709,7 +709,6 @@ func TestAlertingRule_Template(t *testing.T) { "summary": `{{ $labels.alertname }}: Too high connection number for "{{ $labels.instance }}"`, }, alerts: make(map[uint64]*notifier.Alert), - state: newRuleState(), }, []datasource.Metric{ metricWithValueAndLabels(t, 1, "instance", "foo"), @@ -749,7 +748,6 @@ func TestAlertingRule_Template(t *testing.T) { "description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`, }, alerts: make(map[uint64]*notifier.Alert), - state: newRuleState(), }, []datasource.Metric{ metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"), @@ -789,7 +787,6 @@ func TestAlertingRule_Template(t *testing.T) { "summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`, }, alerts: make(map[uint64]*notifier.Alert), - state: newRuleState(), }, []datasource.Metric{ metricWithValueAndLabels(t, 1, @@ -820,6 +817,7 @@ func TestAlertingRule_Template(t *testing.T) { fq := &fakeQuerier{} tc.rule.GroupID = fakeGroup.ID() tc.rule.q = fq + tc.rule.state = newRuleState(10) fq.add(tc.metrics...) if _, err := tc.rule.Exec(context.TODO(), time.Now(), 0); err != nil { t.Fatalf("unexpected err: %s", err) @@ -936,6 +934,6 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule { For: waitFor, EvalInterval: waitFor, alerts: make(map[uint64]*notifier.Alert), - state: newRuleState(), + state: newRuleState(10), } } diff --git a/app/vmalert/config/config.go b/app/vmalert/config/config.go index e9f96a64f..9be2edb55 100644 --- a/app/vmalert/config/config.go +++ b/app/vmalert/config/config.go @@ -114,6 +114,9 @@ type Rule struct { Labels map[string]string `yaml:"labels,omitempty"` Annotations map[string]string `yaml:"annotations,omitempty"` Debug bool `yaml:"debug,omitempty"` + // UpdateEntriesLimit defines max number of rule's state updates stored in memory. + // Overrides `-rule.updateEntriesLimit`. + UpdateEntriesLimit *int `yaml:"update_entries_limit,omitempty"` // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` diff --git a/app/vmalert/config/config_test.go b/app/vmalert/config/config_test.go index 28b7f9db4..694e6ff0b 100644 --- a/app/vmalert/config/config_test.go +++ b/app/vmalert/config/config_test.go @@ -550,6 +550,20 @@ rules: - alert: foo expr: sum by(job) (up == 1) debug: true +`) + }) + t.Run("`update_entries_limit` change", func(t *testing.T) { + f(t, ` +name: TestGroup +rules: + - alert: foo + expr: sum by(job) (up == 1) +`, ` +name: TestGroup +rules: + - alert: foo + expr: sum by(job) (up == 1) + update_entries_limit: 33 `) }) } diff --git a/app/vmalert/config/testdata/rules/rules2-good.rules b/app/vmalert/config/testdata/rules/rules2-good.rules index 545a42d55..7b17282a0 100644 --- a/app/vmalert/config/testdata/rules/rules2-good.rules +++ b/app/vmalert/config/testdata/rules/rules2-good.rules @@ -12,6 +12,7 @@ groups: expr: vm_tcplistener_conns > 0 for: 3m debug: true + update_entries_limit: 40 annotations: labels: "Available labels: {{ $labels }}" summary: Too high connection number for {{ $labels.instance }} @@ -20,6 +21,7 @@ groups: {{ end }} description: "It is {{ $value }} connections for {{$labels.instance}}" - alert: ExampleAlertAlwaysFiring + update_entries_limit: -1 expr: sum by(job) (up == 1) labels: diff --git a/app/vmalert/config/testdata/rules/rules3-good.rules b/app/vmalert/config/testdata/rules/rules3-good.rules index 0eedbe0e8..eb2a78d4f 100644 --- a/app/vmalert/config/testdata/rules/rules3-good.rules +++ b/app/vmalert/config/testdata/rules/rules3-good.rules @@ -7,6 +7,7 @@ groups: - alert: Conns expr: filterSeries(sumSeries(host.receiver.interface.cons),'last','>', 500) for: 3m + annotations: summary: Too high connection number for {{$labels.instance}} description: "It is {{ $value }} connections for {{$labels.instance}}" diff --git a/app/vmalert/group_test.go b/app/vmalert/group_test.go index c4f2d2a82..6f215a37c 100644 --- a/app/vmalert/group_test.go +++ b/app/vmalert/group_test.go @@ -460,7 +460,7 @@ func TestFaultyRW(t *testing.T) { r := &RecordingRule{ Name: "test", - state: newRuleState(), + state: newRuleState(10), q: fq, } diff --git a/app/vmalert/main.go b/app/vmalert/main.go index 4c26015ca..451948eec 100644 --- a/app/vmalert/main.go +++ b/app/vmalert/main.go @@ -56,7 +56,9 @@ absolute path to all .tpl files in root.`) validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine") maxResolveDuration = flag.Duration("rule.maxResolveDuration", 0, "Limits the maximum duration for automatic alert expiration, "+ "which is by default equal to 3 evaluation intervals of the parent group.") - resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier") + resendDelay = flag.Duration("rule.resendDelay", 0, "Minimum amount of time to wait before resending an alert to notifier") + ruleUpdateEntriesLimit = flag.Int("rule.updateEntriesLimit", 20, "Defines the max number of rule's state updates stored in-memory. "+ + "Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overriden per rule via update_entries_limit param.") externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier") externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager `+ diff --git a/app/vmalert/recording.go b/app/vmalert/recording.go index 687755431..7cf0b2a33 100644 --- a/app/vmalert/recording.go +++ b/app/vmalert/recording.go @@ -58,7 +58,6 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul Labels: cfg.Labels, GroupID: group.ID(), metrics: &recordingRuleMetrics{}, - state: newRuleState(), q: qb.BuildWithParams(datasource.QuerierParams{ DataSourceType: group.Type.String(), EvaluationInterval: group.Interval, @@ -67,6 +66,12 @@ func newRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul }), } + if cfg.UpdateEntriesLimit != nil { + rr.state = newRuleState(*cfg.UpdateEntriesLimit) + } else { + rr.state = newRuleState(*ruleUpdateEntriesLimit) + } + labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID()) rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels), func() float64 { @@ -212,6 +217,7 @@ func (rr *RecordingRule) ToAPI() APIRule { EvaluationTime: lastState.duration.Seconds(), Health: "ok", LastSamples: lastState.samples, + MaxUpdates: rr.state.size(), Updates: rr.state.getAll(), // encode as strings to avoid rounding diff --git a/app/vmalert/recording_test.go b/app/vmalert/recording_test.go index a03f356ca..2cc842f1b 100644 --- a/app/vmalert/recording_test.go +++ b/app/vmalert/recording_test.go @@ -19,7 +19,7 @@ func TestRecordingRule_Exec(t *testing.T) { expTS []prompbmarshal.TimeSeries }{ { - &RecordingRule{Name: "foo", state: newRuleState()}, + &RecordingRule{Name: "foo"}, []datasource.Metric{metricWithValueAndLabels(t, 10, "__name__", "bar", )}, @@ -30,7 +30,7 @@ func TestRecordingRule_Exec(t *testing.T) { }, }, { - &RecordingRule{Name: "foobarbaz", state: newRuleState()}, + &RecordingRule{Name: "foobarbaz"}, []datasource.Metric{ metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "foo"), metricWithValueAndLabels(t, 2, "__name__", "bar", "job", "bar"), @@ -53,8 +53,7 @@ func TestRecordingRule_Exec(t *testing.T) { }, { &RecordingRule{ - Name: "job:foo", - state: newRuleState(), + Name: "job:foo", Labels: map[string]string{ "source": "test", }}, @@ -80,6 +79,7 @@ func TestRecordingRule_Exec(t *testing.T) { fq := &fakeQuerier{} fq.add(tc.metrics...) tc.rule.q = fq + tc.rule.state = newRuleState(10) tss, err := tc.rule.Exec(context.TODO(), time.Now(), 0) if err != nil { t.Fatalf("unexpected Exec err: %s", err) @@ -198,7 +198,7 @@ func TestRecordingRuleLimit(t *testing.T) { metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"), metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"), } - rule := &RecordingRule{Name: "job:foo", state: newRuleState(), Labels: map[string]string{ + rule := &RecordingRule{Name: "job:foo", state: newRuleState(10), Labels: map[string]string{ "source": "test_limit", }} var err error @@ -216,7 +216,7 @@ func TestRecordingRuleLimit(t *testing.T) { func TestRecordingRule_ExecNegative(t *testing.T) { rr := &RecordingRule{ Name: "job:foo", - state: newRuleState(), + state: newRuleState(10), Labels: map[string]string{ "job": "test", }, diff --git a/app/vmalert/rule.go b/app/vmalert/rule.go index 48f9a3bcc..ee854c52a 100644 --- a/app/vmalert/rule.go +++ b/app/vmalert/rule.go @@ -37,6 +37,8 @@ type ruleState struct { sync.RWMutex entries []ruleStateEntry cur int + // disabled defines whether ruleState tracks ruleStateEntry + disabled bool } type ruleStateEntry struct { @@ -57,21 +59,36 @@ type ruleStateEntry struct { curl string } -const defaultStateEntriesLimit = 20 - -func newRuleState() *ruleState { +func newRuleState(size int) *ruleState { + if size < 1 { + return &ruleState{disabled: true} + } return &ruleState{ - entries: make([]ruleStateEntry, defaultStateEntriesLimit), + entries: make([]ruleStateEntry, size), } } func (s *ruleState) getLast() ruleStateEntry { + if s.disabled { + return ruleStateEntry{} + } + s.RLock() defer s.RUnlock() return s.entries[s.cur] } +func (s *ruleState) size() int { + s.RLock() + defer s.RUnlock() + return len(s.entries) +} + func (s *ruleState) getAll() []ruleStateEntry { + if s.disabled { + return nil + } + entries := make([]ruleStateEntry, 0) s.RLock() @@ -94,6 +111,10 @@ func (s *ruleState) getAll() []ruleStateEntry { } func (s *ruleState) add(e ruleStateEntry) { + if s.disabled { + return + } + s.Lock() defer s.Unlock() diff --git a/app/vmalert/rule_test.go b/app/vmalert/rule_test.go index 5af5f3c8c..28205da90 100644 --- a/app/vmalert/rule_test.go +++ b/app/vmalert/rule_test.go @@ -6,8 +6,27 @@ import ( "time" ) +func TestRule_stateDisabled(t *testing.T) { + state := newRuleState(-1) + e := state.getLast() + if !e.at.IsZero() { + t.Fatalf("expected entry to be zero") + } + + state.add(ruleStateEntry{at: time.Now()}) + if !e.at.IsZero() { + t.Fatalf("expected entry to be zero") + } + + if len(state.getAll()) != 0 { + t.Fatalf("expected for state to have %d entries; got %d", + 0, len(state.getAll()), + ) + } +} func TestRule_state(t *testing.T) { - state := newRuleState() + stateEntriesN := 20 + state := newRuleState(stateEntriesN) e := state.getLast() if !e.at.IsZero() { t.Fatalf("expected entry to be zero") @@ -39,7 +58,7 @@ func TestRule_state(t *testing.T) { } var last time.Time - for i := 0; i < defaultStateEntriesLimit*2; i++ { + for i := 0; i < stateEntriesN*2; i++ { last = time.Now() state.add(ruleStateEntry{at: last}) } @@ -50,9 +69,9 @@ func TestRule_state(t *testing.T) { e.at, last) } - if len(state.getAll()) != defaultStateEntriesLimit { + if len(state.getAll()) != stateEntriesN { t.Fatalf("expected for state to have %d entries only; got %d", - defaultStateEntriesLimit, len(state.getAll()), + stateEntriesN, len(state.getAll()), ) } } @@ -61,7 +80,7 @@ func TestRule_state(t *testing.T) { // execution of state updates. // Should be executed with -race flag func TestRule_stateConcurrent(t *testing.T) { - state := newRuleState() + state := newRuleState(20) const workers = 50 const iterations = 100 diff --git a/app/vmalert/web.qtpl b/app/vmalert/web.qtpl index 78c2914c8..dbeb2bd74 100644 --- a/app/vmalert/web.qtpl +++ b/app/vmalert/web.qtpl @@ -440,7 +440,7 @@
-
Last {%d len(rule.Updates) %} updates:
+
Last {%d len(rule.Updates) %}/{%d rule.MaxUpdates %} updates:
diff --git a/app/vmalert/web.qtpl.go b/app/vmalert/web.qtpl.go index b31a01afb..a295cd4d2 100644 --- a/app/vmalert/web.qtpl.go +++ b/app/vmalert/web.qtpl.go @@ -1345,6 +1345,10 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule APIRule)
Last `) //line app/vmalert/web.qtpl:443 qw422016.N().D(len(rule.Updates)) +//line app/vmalert/web.qtpl:443 + qw422016.N().S(`/`) +//line app/vmalert/web.qtpl:443 + qw422016.N().D(rule.MaxUpdates) //line app/vmalert/web.qtpl:443 qw422016.N().S(` updates:
diff --git a/app/vmalert/web_test.go b/app/vmalert/web_test.go index 52bd91f63..5648087c6 100644 --- a/app/vmalert/web_test.go +++ b/app/vmalert/web_test.go @@ -17,7 +17,7 @@ func TestHandler(t *testing.T) { alerts: map[uint64]*notifier.Alert{ 0: {State: notifier.StateFiring}, }, - state: newRuleState(), + state: newRuleState(10), } g := &Group{ Name: "group", diff --git a/app/vmalert/web_types.go b/app/vmalert/web_types.go index 4c2075224..fcd6e0b3c 100644 --- a/app/vmalert/web_types.go +++ b/app/vmalert/web_types.go @@ -121,6 +121,8 @@ type APIRule struct { // GroupID is an unique Group's ID GroupID string `json:"group_id"` + // MaxUpdates is the max number of recorded ruleStateEntry objects + MaxUpdates int `json:"max_updates_entries"` // Updates contains the ordered list of recorded ruleStateEntry objects Updates []ruleStateEntry `json:"updates"` } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 7d36cc029..139e123ec 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -18,6 +18,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add ability to explore metrics exported by a particular `job` / `instance`. See [these docs](https://docs.victoriametrics.com/#metrics-explorer) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3386). * FEATURE: allow passing partial `RFC3339` date/time to `time`, `start` and `end` query args at [querying APIs](https://docs.victoriametrics.com/#prometheus-querying-api-usage) and [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series). For example, `2022` is equivalent to `2022-01-01T00:00:00Z`, while `2022-01-30T14` is equivalent to `2022-01-30T14:00:00Z`. See [these docs](https://docs.victoriametrics.com/#timestamp-formats). * FEATURE: [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling): add support for `keepequal` and `dropequal` relabeling actions, which are supported by Prometheus starting from [v2.41.0](https://github.com/prometheus/prometheus/releases/tag/v2.41.0). These relabeling actions are almost identical to `keep_if_equal` and `drop_if_equal` relabeling actions supported by VictoriaMetrics since `v1.38.0` - see [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling-enhancements) - so it is recommended sticking to `keep_if_equal` and `drop_if_equal` actions instead of switching to `keepequal` and `dropequal`. +* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): allow configuring the default number of stored rule's update states in memory via global `-rule.updateEntriesLimit` command-line flag or per-rule via rule's `update_entries_limit` configuration param. * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): properly update the `step` value in url after the `step` input field has been manually changed. This allows preserving the proper `step` when copy-n-pasting the url to another instance of web browser. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3513). diff --git a/docs/vmalert.md b/docs/vmalert.md index 9c037d3ea..06753e58c 100644 --- a/docs/vmalert.md +++ b/docs/vmalert.md @@ -195,6 +195,11 @@ expr: # Is applicable to alerting rules only. [ debug: | default = false ] +# Defines the number of rule's updates entries stored in memory +# and available for view on rule's Details page. +# Overrides `rule.updateEntriesLimit` value for this specific rule. +[ update_entries_limit: | default 0 ] + # Labels to add or overwrite for each alert. labels: [ : ] @@ -323,6 +328,12 @@ expr: # Labels to add or overwrite before storing the result. labels: [ : ] + + +# Defines the number of rule's updates entries stored in memory +# and available for view on rule's Details page. +# Overrides `rule.updateEntriesLimit` value for this specific rule. +[ update_entries_limit: | default 0 ] ``` For recording rules to work `-remoteWrite.url` must be specified. @@ -699,7 +710,7 @@ may get empty response from datasource and produce empty recording rules or rese vmalert evaluation when data is delayed -By default recently written samples to VictoriaMetrics aren't visible for queries for up to 30s. +By default, recently written samples to VictoriaMetrics aren't visible for queries for up to 30s. This behavior is controlled by `-search.latencyOffset` command-line flag and the `latency_offset` query ag at `vmselect`. Usually, this results into a 30s shift for recording rules results. Note that too small value passed to `-search.latencyOffset` or to `latency_offest` query arg may lead to incomplete query results. @@ -725,8 +736,9 @@ If `-remoteWrite.url` command-line flag is configured, vmalert will persist aler [vmui](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#vmui) or Grafana to track how alerts state changed in time. -vmalert also stores last N state updates for each rule. To check updates, click on `Details` link next to rule's name -on `/vmalert/groups` page and check the `Last updates` section: +vmalert stores last `-rule.maxUpdateEntries` (or `update_entries_limit` [per-rule config](https://docs.victoriametrics.com/vmalert.html#alerting-rules)) +state updates for each rule. To check updates, click on `Details` link next to rule's name on `/vmalert/groups` page +and check the `Last updates` section: vmalert state @@ -735,7 +747,7 @@ HTTP request sent by vmalert to the `-datasource.url` during evaluation. If spec no samples returned and curl command returns data - then it is very likely there was no data in datasource on the moment when rule was evaluated. -vmalert also alows configuring more detailed logging for specific rule. Just set `debug: true` in rule's configuration +vmalert allows configuring more detailed logging for specific alerting rule. Just set `debug: true` in rule's configuration and vmalert will start printing additional log messages: ```terminal 2022-09-15T13:35:41.155Z DEBUG rule "TestGroup":"Conns" (2601299393013563564) at 2022-09-15T15:35:41+02:00: query returned 0 samples (elapsed: 5.896041ms) @@ -894,6 +906,8 @@ The shortlist of configuration flags is the following: Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit -loggerFormat string Format for logs. Possible values: default, json (default "default") + -loggerJSONFields string + Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg -loggerLevel string Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO") -loggerOutput string @@ -1096,6 +1110,8 @@ The shortlist of configuration flags is the following: Interval for checking for changes in '-rule' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead -rule.maxResolveDuration duration Limits the maximum duration for automatic alert expiration, which is by default equal to 3 evaluation intervals of the parent group. + -rule.maxUpdateEntries int + Defines the max number of rule's state updates. (default 20) -rule.resendDelay duration Minimum amount of time to wait before resending an alert to notifier -rule.templates array