From 70a9b4318ce0f31b88ad553ef3e0d0e31047462b Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Thu, 1 Dec 2022 13:57:53 +0100 Subject: [PATCH] vmalert: fix replay step param (#3428) The recent change in modifying default value of `datasource.queryStep` flag resulted in situation where replay mode was always running queries with step=`datasource.queryStep`. When it should always use rule's evaluation interval. The fix is related not to replay mode only, but for all Range requests. Now step param is set individually for each mode. Signed-off-by: hagen1778 Signed-off-by: hagen1778 --- app/vmalert/datasource/vm_prom_api.go | 25 +++++++++++++++---------- app/vmalert/datasource/vm_test.go | 6 +++++- docs/CHANGELOG.md | 1 + 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/app/vmalert/datasource/vm_prom_api.go b/app/vmalert/datasource/vm_prom_api.go index 5e80cb820..1b5fd5915 100644 --- a/app/vmalert/datasource/vm_prom_api.go +++ b/app/vmalert/datasource/vm_prom_api.go @@ -149,6 +149,16 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string, timestamp = timestamp.Truncate(s.evaluationInterval) } q.Set("time", fmt.Sprintf("%d", timestamp.Unix())) + if s.evaluationInterval > 0 { // set step as evaluationInterval by default + // always convert to seconds to keep compatibility with older + // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 + q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds()))) + } + if s.queryStep > 0 { // override step with user-specified value + // always convert to seconds to keep compatibility with older + // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 + q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds()))) + } r.URL.RawQuery = q.Encode() s.setPrometheusReqParams(r, query) } @@ -163,6 +173,11 @@ func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, s q := r.URL.Query() q.Add("start", fmt.Sprintf("%d", start.Unix())) q.Add("end", fmt.Sprintf("%d", end.Unix())) + if s.evaluationInterval > 0 { // set step as evaluationInterval by default + // always convert to seconds to keep compatibility with older + // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 + q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds()))) + } r.URL.RawQuery = q.Encode() s.setPrometheusReqParams(r, query) } @@ -178,15 +193,5 @@ func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) { } } q.Set("query", query) - if s.evaluationInterval > 0 { // set step as evaluationInterval by default - // always convert to seconds to keep compatibility with older - // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 - q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds()))) - } - if s.queryStep > 0 { // override step with user-specified value - // always convert to seconds to keep compatibility with older - // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 - q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds()))) - } r.URL.RawQuery = q.Encode() } diff --git a/app/vmalert/datasource/vm_test.go b/app/vmalert/datasource/vm_test.go index 1cd433cee..f993dc2c9 100644 --- a/app/vmalert/datasource/vm_test.go +++ b/app/vmalert/datasource/vm_test.go @@ -197,6 +197,10 @@ func TestVMRangeQuery(t *testing.T) { if _, err := strconv.ParseInt(endTS, 10, 64); err != nil { t.Errorf("failed to parse 'end' query param: %s", err) } + step := r.URL.Query().Get("step") + if step != "15s" { + t.Errorf("expected 'step' query param to be 15s; got %q instead", step) + } switch c { case 0: w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`)) @@ -210,7 +214,7 @@ func TestVMRangeQuery(t *testing.T) { if err != nil { t.Fatalf("unexpected: %s", err) } - s := NewVMStorage(srv.URL, authCfg, time.Minute, 0, false, srv.Client()) + s := NewVMStorage(srv.URL, authCfg, time.Minute, *queryStep, false, srv.Client()) p := NewPrometheusType() pq := s.BuildWithParams(QuerierParams{DataSourceType: &p, EvaluationInterval: 15 * time.Second}) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 9156af45f..c151515f7 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f ## v1.79.x long-time support release (LTS) +* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly specify rule evaluation step during the [replay mode](https://docs.victoriametrics.com/vmalert.html#rules-backfilling). The `step` value was previously overriden by `datasource.queryStep` flag. * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly return the error message from remote-write failures. Before, error was ignored and only `vmalert_remotewrite_errors_total` was incremented. * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly return an empty result from [limit_offset](https://docs.victoriametrics.com/MetricsQL.html#limit_offset) if the `offset` arg exceeds the number of inner time series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3312). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover GCE zones when `filter` option is set at [gce_sd_configs](https://docs.victoriametrics.com/sd_configs.html#gce_sd_configs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3202).