From 1ccb77904bd85c32c6960e98dfd24bcbd2770146 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko <hagen1778@gmail.com> Date: Thu, 19 Aug 2021 13:58:54 +0300 Subject: [PATCH] vmselect: update `vm_request_duration_seconds` value when request fails (#1537) Before, metric `vm_request_duration_seconds` was update only on successful attempts which could be misleading. For example, timeout errors on netstorage request may be not accounted in the metric and won't be visible on dashboards. Using `defer` statement to update the metric after query arguments validation may improve the situation. --- app/vmselect/prometheus/prometheus.go | 42 ++++++++++++++++++--------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index e99ce9ab9e..91492d94ab 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -51,6 +51,8 @@ const defaultStep = 5 * 60 * 1000 // FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/ func FederateHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer federateDuration.UpdateDuration(startTime) + ct := startTime.UnixNano() / 1e6 if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse request form values: %w", err) @@ -107,7 +109,6 @@ func FederateHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, if err := bw.Flush(); err != nil { return err } - federateDuration.UpdateDuration(startTime) return nil } @@ -115,6 +116,8 @@ var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/fe // ExportCSVHandler exports data in CSV format from /api/v1/export/csv func ExportCSVHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer exportCSVDuration.UpdateDuration(startTime) + ct := startTime.UnixNano() / 1e6 if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse request form values: %w", err) @@ -180,7 +183,6 @@ func ExportCSVHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter if err != nil { return fmt.Errorf("error during exporting data to csv: %w", err) } - exportCSVDuration.UpdateDuration(startTime) return nil } @@ -188,6 +190,8 @@ var exportCSVDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/a // ExportNativeHandler exports data in native format from /api/v1/export/native. func ExportNativeHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer exportNativeDuration.UpdateDuration(startTime) + ct := startTime.UnixNano() / 1e6 if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse request form values: %w", err) @@ -251,7 +255,6 @@ func ExportNativeHandler(startTime time.Time, at *auth.Token, w http.ResponseWri if err := bw.Flush(); err != nil { return err } - exportNativeDuration.UpdateDuration(startTime) return nil } @@ -261,6 +264,8 @@ var bbPool bytesutil.ByteBufferPool // ExportHandler exports data in raw format from /api/v1/export. func ExportHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer exportDuration.UpdateDuration(startTime) + ct := startTime.UnixNano() / 1e6 if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse request form values: %w", err) @@ -291,7 +296,6 @@ func ExportHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r if err := exportHandler(at, w, r, matches, etf, start, end, format, maxRowsPerLine, reduceMemUsage, deadline); err != nil { return fmt.Errorf("error when exporting data for queries=%q on the time range (start=%d, end=%d): %w", matches, start, end, err) } - exportDuration.UpdateDuration(startTime) return nil } @@ -446,6 +450,8 @@ var exportBlockPool = &sync.Pool{ // // See https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series func DeleteHandler(startTime time.Time, at *auth.Token, r *http.Request) error { + defer deleteDuration.UpdateDuration(startTime) + deadline := searchutils.GetDeadlineForQuery(r, startTime) if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse request form values: %w", err) @@ -469,7 +475,6 @@ func DeleteHandler(startTime time.Time, at *auth.Token, r *http.Request) error { // TODO: reset only cache for (account, project) resetRollupResultCaches() } - deleteDuration.UpdateDuration(startTime) return nil } @@ -517,6 +522,8 @@ var httpClient = &http.Client{ // // See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values func LabelValuesHandler(startTime time.Time, at *auth.Token, labelName string, w http.ResponseWriter, r *http.Request) error { + defer labelValuesDuration.UpdateDuration(startTime) + deadline := searchutils.GetDeadlineForQuery(r, startTime) if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse form values: %w", err) @@ -585,7 +592,6 @@ func LabelValuesHandler(startTime time.Time, at *auth.Token, labelName string, w if err := bw.Flush(); err != nil { return err } - labelValuesDuration.UpdateDuration(startTime) return nil } @@ -665,6 +671,8 @@ var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path=" // LabelsCountHandler processes /api/v1/labels/count request. func LabelsCountHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer labelsCountDuration.UpdateDuration(startTime) + deadline := searchutils.GetDeadlineForStatusRequest(r, startTime) denyPartialResponse := searchutils.GetDenyPartialResponse(r) labelEntries, isPartial, err := netstorage.GetLabelEntries(at, denyPartialResponse, deadline) @@ -679,7 +687,6 @@ func LabelsCountHandler(startTime time.Time, at *auth.Token, w http.ResponseWrit if err := bw.Flush(); err != nil { return err } - labelsCountDuration.UpdateDuration(startTime) return nil } @@ -693,6 +700,8 @@ const secsPerDay = 3600 * 24 // // It can accept `match[]` filters in order to narrow down the search. func TSDBStatusHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer tsdbStatusDuration.UpdateDuration(startTime) + deadline := searchutils.GetDeadlineForStatusRequest(r, startTime) if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse form values: %w", err) @@ -749,7 +758,6 @@ func TSDBStatusHandler(startTime time.Time, at *auth.Token, w http.ResponseWrite if err := bw.Flush(); err != nil { return err } - tsdbStatusDuration.UpdateDuration(startTime) return nil } @@ -778,6 +786,8 @@ var tsdbStatusDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/ // // See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names func LabelsHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer labelsDuration.UpdateDuration(startTime) + deadline := searchutils.GetDeadlineForQuery(r, startTime) if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse form values: %w", err) @@ -844,7 +854,6 @@ func LabelsHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r if err := bw.Flush(); err != nil { return err } - labelsDuration.UpdateDuration(startTime) return nil } @@ -910,6 +919,8 @@ var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/ // SeriesCountHandler processes /api/v1/series/count request. func SeriesCountHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer seriesCountDuration.UpdateDuration(startTime) + deadline := searchutils.GetDeadlineForStatusRequest(r, startTime) denyPartialResponse := searchutils.GetDenyPartialResponse(r) n, isPartial, err := netstorage.GetSeriesCount(at, denyPartialResponse, deadline) @@ -924,7 +935,6 @@ func SeriesCountHandler(startTime time.Time, at *auth.Token, w http.ResponseWrit if err := bw.Flush(); err != nil { return err } - seriesCountDuration.UpdateDuration(startTime) return nil } @@ -934,6 +944,8 @@ var seriesCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path=" // // See https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers func SeriesHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer seriesDuration.UpdateDuration(startTime) + ct := startTime.UnixNano() / 1e6 if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse form values: %w", err) @@ -1020,7 +1032,6 @@ func SeriesHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r if err != nil { return fmt.Errorf("error during data fetching: %w", err) } - seriesDuration.UpdateDuration(startTime) return nil } @@ -1030,6 +1041,8 @@ var seriesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/ // // See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries func QueryHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer queryDuration.UpdateDuration(startTime) + ct := startTime.UnixNano() / 1e6 query := r.FormValue("query") if len(query) == 0 { @@ -1136,7 +1149,6 @@ func QueryHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r if err := bw.Flush(); err != nil { return err } - queryDuration.UpdateDuration(startTime) return nil } @@ -1146,6 +1158,8 @@ var queryDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v // // See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries func QueryRangeHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer queryRangeDuration.UpdateDuration(startTime) + ct := startTime.UnixNano() / 1e6 query := r.FormValue("query") if len(query) == 0 { @@ -1170,7 +1184,6 @@ func QueryRangeHandler(startTime time.Time, at *auth.Token, w http.ResponseWrite if err := queryRangeHandler(startTime, at, w, query, start, end, step, r, ct, etf); err != nil { return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %w", query, start, end, step, err) } - queryRangeDuration.UpdateDuration(startTime) return nil } @@ -1386,6 +1399,8 @@ func getLatencyOffsetMilliseconds() int64 { // QueryStatsHandler returns query stats at `/api/v1/status/top_queries` func QueryStatsHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { + defer queryStatsDuration.UpdateDuration(startTime) + if err := r.ParseForm(); err != nil { return fmt.Errorf("cannot parse form values: %w", err) } @@ -1414,7 +1429,6 @@ func QueryStatsHandler(startTime time.Time, at *auth.Token, w http.ResponseWrite if err := bw.Flush(); err != nil { return err } - queryStatsDuration.UpdateDuration(startTime) return nil }