From df0309eae084e0046efbd5ee33cf3d93b0388436 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sun, 27 Dec 2020 12:06:04 +0200 Subject: [PATCH 1/3] app/vmselect/promql: simplify defer call for querystats.RegisterQuery --- app/vmselect/promql/exec.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/vmselect/promql/exec.go b/app/vmselect/promql/exec.go index adb42b0a6..086cde594 100644 --- a/app/vmselect/promql/exec.go +++ b/app/vmselect/promql/exec.go @@ -42,9 +42,7 @@ func Exec(ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, } if querystats.Enabled() { startTime := time.Now() - defer func() { - querystats.RegisterQuery(q, ec.End-ec.Start, startTime) - }() + defer querystats.RegisterQuery(q, ec.End-ec.Start, startTime) } ec.validate() From 4b7105a65b12208e6c87a8d3b758c175a089f044 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sun, 27 Dec 2020 12:53:50 +0200 Subject: [PATCH 2/3] app/vmselect: sync query stats handling with cluster version --- app/vmselect/main.go | 22 ++++++------ app/vmselect/prometheus/prometheus.go | 3 +- app/vmselect/querystats/querystats.go | 50 ++++++++++++++------------- docs/Cluster-VictoriaMetrics.md | 3 ++ 4 files changed, 42 insertions(+), 36 deletions(-) diff --git a/app/vmselect/main.go b/app/vmselect/main.go index 4f78ab3cd..2be5d1ee5 100644 --- a/app/vmselect/main.go +++ b/app/vmselect/main.go @@ -198,14 +198,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool { return true } return true - case "/api/v1/status/top_queries": - topQueriesRequests.Inc() - if err := prometheus.QueryStatsHandler(startTime, w, r); err != nil { - topQueriesErrors.Inc() - sendPrometheusError(w, r, fmt.Errorf("cannot query status endpoint: %w", err)) - return true - } - return true case "/api/v1/status/tsdb": statusTSDBRequests.Inc() if err := prometheus.TSDBStatusHandler(startTime, w, r); err != nil { @@ -218,6 +210,14 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool { statusActiveQueriesRequests.Inc() promql.WriteActiveQueries(w) return true + case "/api/v1/status/top_queries": + topQueriesRequests.Inc() + if err := prometheus.QueryStatsHandler(startTime, w, r); err != nil { + topQueriesErrors.Inc() + sendPrometheusError(w, r, fmt.Errorf("cannot query status endpoint: %w", err)) + return true + } + return true case "/api/v1/export": exportRequests.Inc() if err := prometheus.ExportHandler(startTime, w, r); err != nil { @@ -424,14 +424,14 @@ var ( labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels/count"}`) labelsCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels/count"}`) - topQueriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/top_queries"}`) - topQueriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/top_queries"}`) - statusTSDBRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/tsdb"}`) statusTSDBErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/tsdb"}`) statusActiveQueriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/active_queries"}`) + topQueriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/top_queries"}`) + topQueriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/top_queries"}`) + deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`) deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`) diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index d7a1a5a88..c11f7e771 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -1270,10 +1270,11 @@ func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque if err != nil { return fmt.Errorf("cannot parse `maxLifetime` arg: %w", err) } + maxLifetime := time.Duration(maxLifetimeMsecs) * time.Millisecond w.Header().Set("Content-Type", "application/json; charset=utf-8") bw := bufferedwriter.Get(w) defer bufferedwriter.Put(bw) - querystats.WriteJSONQueryStats(bw, topN, time.Duration(maxLifetimeMsecs)*time.Millisecond) + querystats.WriteJSONQueryStats(bw, topN, maxLifetime) if err := bw.Flush(); err != nil { return err } diff --git a/app/vmselect/querystats/querystats.go b/app/vmselect/querystats/querystats.go index 5f7a32559..8c4a92b81 100644 --- a/app/vmselect/querystats/querystats.go +++ b/app/vmselect/querystats/querystats.go @@ -128,19 +128,29 @@ func (qst *queryStatsTracker) registerQuery(query string, timeRangeMsecs int64, r.duration = duration } +func (r *queryStatRecord) matches(currentTime time.Time, maxLifetime time.Duration) bool { + if r.query == "" || currentTime.Sub(r.registerTime) > maxLifetime { + return false + } + return true +} + +func (r *queryStatRecord) key() queryStatKey { + return queryStatKey{ + query: r.query, + timeRangeSecs: r.timeRangeSecs, + } +} + func (qst *queryStatsTracker) getTopByCount(topN int, maxLifetime time.Duration) []queryStatByCount { currentTime := time.Now() qst.mu.Lock() m := make(map[queryStatKey]int) for _, r := range qst.a { - if r.query == "" || currentTime.Sub(r.registerTime) > maxLifetime { - continue + if r.matches(currentTime, maxLifetime) { + k := r.key() + m[k] = m[k] + 1 } - k := queryStatKey{ - query: r.query, - timeRangeSecs: r.timeRangeSecs, - } - m[k] = m[k] + 1 } qst.mu.Unlock() @@ -176,17 +186,13 @@ func (qst *queryStatsTracker) getTopByAvgDuration(topN int, maxLifetime time.Dur } m := make(map[queryStatKey]countSum) for _, r := range qst.a { - if r.query == "" || currentTime.Sub(r.registerTime) > maxLifetime { - continue + if r.matches(currentTime, maxLifetime) { + k := r.key() + ks := m[k] + ks.count++ + ks.sum += r.duration + m[k] = ks } - k := queryStatKey{ - query: r.query, - timeRangeSecs: r.timeRangeSecs, - } - ks := m[k] - ks.count++ - ks.sum += r.duration - m[k] = ks } qst.mu.Unlock() @@ -218,14 +224,10 @@ func (qst *queryStatsTracker) getTopBySumDuration(topN int, maxLifetime time.Dur qst.mu.Lock() m := make(map[queryStatKey]time.Duration) for _, r := range qst.a { - if r.query == "" || currentTime.Sub(r.registerTime) > maxLifetime { - continue + if r.matches(currentTime, maxLifetime) { + k := r.key() + m[k] = m[k] + r.duration } - k := queryStatKey{ - query: r.query, - timeRangeSecs: r.timeRangeSecs, - } - m[k] = m[k] + r.duration } qst.mu.Unlock() diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index 6d798c317..da8df3bf3 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -198,6 +198,7 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr and `YYYY-MM-DD` is the date for collecting the stats. By default the stats is collected for the current day. - `api/v1/status/active_queries` - for currently executed active queries. Note that every `vmselect` maintains an independent list of active queries, which is returned in the response. + - `api/v1/status/top_queries` - for listing the most frequently executed queries and queries taking the most duration. * URLs for [Graphite Metrics API](https://graphite-api.readthedocs.io/en/latest/api.html#the-metrics-api): `http://:8481/select//graphite/`, where: - `` is an arbitrary number identifying data namespace for query (aka tenant) @@ -214,6 +215,8 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr - `tags/autoComplete/values` - returns tag values matching the given `valuePrefix` and/or `expr`. See [these docs](https://graphite.readthedocs.io/en/stable/tags.html#auto-complete-support). - `tags/delSeries` - deletes series matching the given `path`. See [these docs](https://graphite.readthedocs.io/en/stable/tags.html#removing-series-from-the-tagdb). +* URL for query stats across all tenants: `http://:8481/api/v1/status/top_queries`. It lists with the most frequently executed queries and queries taking the most duration. + * URL for time series deletion: `http://:8481/delete//prometheus/api/v1/admin/tsdb/delete_series?match[]=`. Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't be used on a regular basis, since it carries non-zero overhead. From 261535b32dfc02eed96fa0f3ebe4ad7dbca131c1 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sun, 27 Dec 2020 13:01:30 +0200 Subject: [PATCH 3/3] docs/Articles.md: add a link to https://www.percona.com/blog/2020/12/23/observations-on-better-resource-usage-with-percona-monitoring-and-management-v2-12-0/ --- docs/Articles.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Articles.md b/docs/Articles.md index cd4c2870a..01e358b85 100644 --- a/docs/Articles.md +++ b/docs/Articles.md @@ -3,6 +3,7 @@ ## Third-party articles and slides about VictoriaMetrics * [Foiled by the Firewall: A Tale of Transition From Prometheus to VictoriaMetrics](https://www.percona.com/blog/2020/12/01/foiled-by-the-firewall-a-tale-of-transition-from-prometheus-to-victoriametrics/) +* [Observations on Better Resource Usage with Percona Monitoring and Management v2.12.0](https://www.percona.com/blog/2020/12/23/observations-on-better-resource-usage-with-percona-monitoring-and-management-v2-12-0/) * [Better Prometheus rate() function with VictoriaMetrics](https://www.percona.com/blog/2020/02/28/better-prometheus-rate-function-with-victoriametrics/) * [Percona monitoring and management migration from Prometheus to VictoriaMetrics FAQ](https://www.percona.com/blog/2020/12/16/percona-monitoring-and-management-migration-from-prometheus-to-victoriametrics-faq/) * [Making peace with Prometheus rate()](https://blog.doit-intl.com/making-peace-with-prometheus-rate-43a3ea75c4cf)