app/vmselect: do not limit concurrency for static and fast queries

Previously concurrency for static and fast queries was limited with the -search.maxConcurrentRequests
command-line flag. This could complicate identifying heavy queries via `vmui` at `Top queries` and `Active queries` pages,
since `vmui` and these pages couldn't be opened on overloaded vmselect.

Thanks to @f41gh7 for the idea.
This commit is contained in:
Aliaksandr Valialkin 2023-12-01 17:24:59 +02:00
parent 487f6380d0
commit f62e03b3d2
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
3 changed files with 126 additions and 115 deletions

View file

@ -94,6 +94,23 @@ var vmuiFileServer = http.FileServer(http.FS(vmuiFiles))
// RequestHandler handles remote read API requests
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
path := strings.Replace(r.URL.Path, "//", "/", -1)
// Strip /prometheus and /graphite prefixes in order to provide path compatibility with cluster version
//
// See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
switch {
case strings.HasPrefix(path, "/prometheus/"):
path = path[len("/prometheus"):]
case strings.HasPrefix(path, "/graphite/"):
path = path[len("/graphite"):]
}
if handleStaticAndSimpleRequests(w, r, path) {
return true
}
// Handle non-trivial dynamic requests, which may take big amounts of time and resources.
startTime := time.Now()
defer requestDuration.UpdateDuration(startTime)
tracerEnabled := httputils.GetBool(r, "trace")
@ -152,7 +169,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}()
}
path := strings.Replace(r.URL.Path, "//", "/", -1)
if path == "/internal/resetRollupResultCache" {
if !httpserver.CheckAuthFlag(w, r, *resetCacheAuthKey, "resetCacheAuthKey") {
return true
@ -161,50 +177,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
// Strip /prometheus and /graphite prefixes in order to provide path compatibility with cluster version
//
// See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
switch {
case strings.HasPrefix(path, "/prometheus/"):
path = path[len("/prometheus"):]
case strings.HasPrefix(path, "/graphite/"):
path = path[len("/graphite"):]
}
// vmui access.
if path == "/vmui" || path == "/graph" {
// VMUI access via incomplete url without `/` in the end. Redirect to complete url.
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
// is hidden behind vmauth or similar proxy.
_ = r.ParseForm()
path = strings.TrimPrefix(path, "/")
newURL := path + "/?" + r.Form.Encode()
httpserver.Redirect(w, newURL)
return true
}
if strings.HasPrefix(path, "/graph/") {
// This is needed for serving /graph URLs from Prometheus datasource in Grafana.
path = strings.Replace(path, "/graph/", "/vmui/", 1)
}
if path == "/vmui/custom-dashboards" {
if err := handleVMUICustomDashboards(w); err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
return true
}
if strings.HasPrefix(path, "/vmui/") {
if strings.HasPrefix(path, "/vmui/static/") {
// Allow clients caching static contents for long period of time, since it shouldn't change over time.
// Path to static contents (such as js and css) must be changed whenever its contents is changed.
// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
w.Header().Set("Cache-Control", "max-age=31536000")
}
r.URL.Path = path
vmuiFileServer.ServeHTTP(w, r)
return true
}
if strings.HasPrefix(path, "/api/v1/label/") {
s := path[len("/api/v1/label/"):]
if strings.HasSuffix(s, "/values") {
@ -229,45 +201,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
return true
}
if strings.HasPrefix(path, "/functions") {
funcName := path[len("/functions"):]
funcName = strings.TrimPrefix(funcName, "/")
if funcName == "" {
graphiteFunctionsRequests.Inc()
if err := graphite.FunctionsHandler(w, r); err != nil {
graphiteFunctionsErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
return true
}
graphiteFunctionDetailsRequests.Inc()
if err := graphite.FunctionDetailsHandler(funcName, w, r); err != nil {
graphiteFunctionDetailsErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
return true
}
if path == "/vmalert" {
// vmalert access via incomplete url without `/` in the end. Redirect to complete url.
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
// is hidden behind vmauth or similar proxy.
httpserver.Redirect(w, "vmalert/")
return true
}
if strings.HasPrefix(path, "/vmalert/") {
vmalertRequests.Inc()
if len(*vmalertProxyURL) == 0 {
w.WriteHeader(http.StatusBadRequest)
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, "%s", `{"status":"error","msg":"for accessing vmalert flag '-vmalert.proxyURL' must be configured"}`)
return true
}
proxyVMAlertRequests(w, r)
return true
}
switch path {
case "/api/v1/query":
@ -324,20 +257,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
return true
case "/api/v1/status/active_queries":
statusActiveQueriesRequests.Inc()
httpserver.EnableCORS(w, r)
promql.ActiveQueriesHandler(w, r)
return true
case "/api/v1/status/top_queries":
topQueriesRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.QueryStatsHandler(startTime, w, r); err != nil {
topQueriesErrors.Inc()
sendPrometheusError(w, r, fmt.Errorf("cannot query status endpoint: %w", err))
return true
}
return true
case "/api/v1/export":
exportRequests.Inc()
if err := prometheus.ExportHandler(startTime, w, r); err != nil {
@ -466,6 +385,113 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
return true
case "/api/v1/admin/tsdb/delete_series":
if !httpserver.CheckAuthFlag(w, r, *deleteAuthKey, "deleteAuthKey") {
return true
}
deleteRequests.Inc()
if err := prometheus.DeleteHandler(startTime, r); err != nil {
deleteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
default:
return false
}
}
func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path string) bool {
// vmui access.
if path == "/vmui" || path == "/graph" {
// VMUI access via incomplete url without `/` in the end. Redirect to complete url.
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
// is hidden behind vmauth or similar proxy.
_ = r.ParseForm()
path = strings.TrimPrefix(path, "/")
newURL := path + "/?" + r.Form.Encode()
httpserver.Redirect(w, newURL)
return true
}
if strings.HasPrefix(path, "/graph/") {
// This is needed for serving /graph URLs from Prometheus datasource in Grafana.
path = strings.Replace(path, "/graph/", "/vmui/", 1)
}
if path == "/vmui/custom-dashboards" {
if err := handleVMUICustomDashboards(w); err != nil {
httpserver.Errorf(w, r, "%s", err)
return true
}
return true
}
if strings.HasPrefix(path, "/vmui/") {
if strings.HasPrefix(path, "/vmui/static/") {
// Allow clients caching static contents for long period of time, since it shouldn't change over time.
// Path to static contents (such as js and css) must be changed whenever its contents is changed.
// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
w.Header().Set("Cache-Control", "max-age=31536000")
}
r.URL.Path = path
vmuiFileServer.ServeHTTP(w, r)
return true
}
if strings.HasPrefix(path, "/functions") {
funcName := path[len("/functions"):]
funcName = strings.TrimPrefix(funcName, "/")
if funcName == "" {
graphiteFunctionsRequests.Inc()
if err := graphite.FunctionsHandler(w, r); err != nil {
graphiteFunctionsErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
return true
}
graphiteFunctionDetailsRequests.Inc()
if err := graphite.FunctionDetailsHandler(funcName, w, r); err != nil {
graphiteFunctionDetailsErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
return true
}
if path == "/vmalert" {
// vmalert access via incomplete url without `/` in the end. Redirect to complete url.
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
// is hidden behind vmauth or similar proxy.
httpserver.Redirect(w, "vmalert/")
return true
}
if strings.HasPrefix(path, "/vmalert/") {
vmalertRequests.Inc()
if len(*vmalertProxyURL) == 0 {
w.WriteHeader(http.StatusBadRequest)
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, "%s", `{"status":"error","msg":"for accessing vmalert flag '-vmalert.proxyURL' must be configured"}`)
return true
}
proxyVMAlertRequests(w, r)
return true
}
switch path {
case "/api/v1/status/active_queries":
statusActiveQueriesRequests.Inc()
httpserver.EnableCORS(w, r)
promql.ActiveQueriesHandler(w, r)
return true
case "/api/v1/status/top_queries":
topQueriesRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.QueryStatsHandler(w, r); err != nil {
topQueriesErrors.Inc()
sendPrometheusError(w, r, fmt.Errorf("cannot query status endpoint: %w", err))
return true
}
return true
case "/metric-relabel-debug":
promscrapeMetricRelabelDebugRequests.Inc()
promscrape.WriteMetricRelabelDebug(w, r)
@ -519,18 +545,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, "%s", `{"status":"success","data":[]}`)
return true
case "/api/v1/admin/tsdb/delete_series":
if !httpserver.CheckAuthFlag(w, r, *deleteAuthKey, "deleteAuthKey") {
return true
}
deleteRequests.Inc()
if err := prometheus.DeleteHandler(startTime, r); err != nil {
deleteErrors.Inc()
httpserver.Errorf(w, r, "%s", err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
default:
return false
}

View file

@ -1063,9 +1063,7 @@ func getLatencyOffsetMilliseconds(r *http.Request) (int64, error) {
}
// QueryStatsHandler returns query stats at `/api/v1/status/top_queries`
func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer queryStatsDuration.UpdateDuration(startTime)
func QueryStatsHandler(w http.ResponseWriter, r *http.Request) error {
topN := 20
topNStr := r.FormValue("topN")
if len(topNStr) > 0 {
@ -1090,8 +1088,6 @@ func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
return nil
}
var queryStatsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/status/top_queries"}`)
// commonParams contains common parameters for all /api/v1/* handlers
//
// timeout, start, end, match[], extra_label, extra_filters[]

View file

@ -28,6 +28,7 @@ The sandbox cluster installation is running under the constant load generated by
## tip
* FEATURE: `vmselect`: allow opening [vmui](https://docs.victoriametrics.com/#vmui) and investigating [Top queries](https://docs.victoriametrics.com/#top-queries) and [Active queries](https://docs.victoriametrics.com/#active-queries) when the `vmselect` is overloaded with concurrent queries (e.g. when more than `-search.maxConcurrentRequests` concurrent queries are executed). Previously an attempt to open `Top queries` or `Active queries` at `vmui` could result in `couldn't start executing the request in ... seconds, since -search.maxConcurrentRequests=... concurrent requests are executed` error, which could complicate debugging of overloaded `vmselect` or single-node VictoriaMetrics.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.disableOnDiskQueue` command-line flag, which can be used for disabling data queueing to disk when the remote storage cannot keep up with the data ingestion rate. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for reading and writing samples via [Google PubSub](https://cloud.google.com/pubsub). See [these docs](https://docs.victoriametrics.com/vmagent.html#google-pubsub-integration).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for Datadog `/api/v2/series` and `/api/beta/sketches` ingestion protocols to vmagent/vminsert components. See this [doc](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) for examples. Thanks to @AndrewChubatiuk for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5094).