From bcba5d2a78e98f43c37f04941819843da92fbd43 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Thu, 1 Dec 2022 13:57:53 +0100 Subject: [PATCH] vmalert: fix replay step param (#3428) The recent change in modifying default value of `datasource.queryStep` flag resulted in situation where replay mode was always running queries with step=`datasource.queryStep`. When it should always use rule's evaluation interval. The fix is related not to replay mode only, but for all Range requests. Now step param is set individually for each mode. Signed-off-by: hagen1778 Signed-off-by: hagen1778 --- app/vmalert/datasource/vm_prom_api.go | 25 +++++++++++++++---------- app/vmalert/datasource/vm_test.go | 6 +++++- docs/CHANGELOG.md | 1 + 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/app/vmalert/datasource/vm_prom_api.go b/app/vmalert/datasource/vm_prom_api.go index 5e80cb820a..1b5fd59155 100644 --- a/app/vmalert/datasource/vm_prom_api.go +++ b/app/vmalert/datasource/vm_prom_api.go @@ -149,6 +149,16 @@ func (s *VMStorage) setPrometheusInstantReqParams(r *http.Request, query string, timestamp = timestamp.Truncate(s.evaluationInterval) } q.Set("time", fmt.Sprintf("%d", timestamp.Unix())) + if s.evaluationInterval > 0 { // set step as evaluationInterval by default + // always convert to seconds to keep compatibility with older + // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 + q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds()))) + } + if s.queryStep > 0 { // override step with user-specified value + // always convert to seconds to keep compatibility with older + // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 + q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds()))) + } r.URL.RawQuery = q.Encode() s.setPrometheusReqParams(r, query) } @@ -163,6 +173,11 @@ func (s *VMStorage) setPrometheusRangeReqParams(r *http.Request, query string, s q := r.URL.Query() q.Add("start", fmt.Sprintf("%d", start.Unix())) q.Add("end", fmt.Sprintf("%d", end.Unix())) + if s.evaluationInterval > 0 { // set step as evaluationInterval by default + // always convert to seconds to keep compatibility with older + // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 + q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds()))) + } r.URL.RawQuery = q.Encode() s.setPrometheusReqParams(r, query) } @@ -178,15 +193,5 @@ func (s *VMStorage) setPrometheusReqParams(r *http.Request, query string) { } } q.Set("query", query) - if s.evaluationInterval > 0 { // set step as evaluationInterval by default - // always convert to seconds to keep compatibility with older - // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 - q.Set("step", fmt.Sprintf("%ds", int(s.evaluationInterval.Seconds()))) - } - if s.queryStep > 0 { // override step with user-specified value - // always convert to seconds to keep compatibility with older - // Prometheus versions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1943 - q.Set("step", fmt.Sprintf("%ds", int(s.queryStep.Seconds()))) - } r.URL.RawQuery = q.Encode() } diff --git a/app/vmalert/datasource/vm_test.go b/app/vmalert/datasource/vm_test.go index c01c021e60..1932b892de 100644 --- a/app/vmalert/datasource/vm_test.go +++ b/app/vmalert/datasource/vm_test.go @@ -199,6 +199,10 @@ func TestVMRangeQuery(t *testing.T) { if _, err := strconv.ParseInt(endTS, 10, 64); err != nil { t.Errorf("failed to parse 'end' query param: %s", err) } + step := r.URL.Query().Get("step") + if step != "15s" { + t.Errorf("expected 'step' query param to be 15s; got %q instead", step) + } switch c { case 0: w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`)) @@ -212,7 +216,7 @@ func TestVMRangeQuery(t *testing.T) { if err != nil { t.Fatalf("unexpected: %s", err) } - s := NewVMStorage(srv.URL, authCfg, time.Minute, 0, false, srv.Client()) + s := NewVMStorage(srv.URL, authCfg, time.Minute, *queryStep, false, srv.Client()) pq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourcePrometheus), EvaluationInterval: 15 * time.Second}) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 05b31f35de..9e81fff05c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -22,6 +22,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `remoteWrite.sendTimeout` command-line flag to configure timeout for sending data to remote write URL. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3408) * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly pass HTTP headers during the alert state restore procedure. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3418). +* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly specify rule evaluation step during the [replay mode](https://docs.victoriametrics.com/vmalert.html#rules-backfilling). The `step` value was previously overriden by `datasource.queryStep` flag. ## [v1.84.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.84.0)