mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-01 14:47:38 +00:00
d0d258b314
A single heavy request can saturate all the available CPUs, so let's limit the number of concurrent requests to lower value. This will give more chances for executing insert path.
263 lines
8.9 KiB
Go
263 lines
8.9 KiB
Go
package vmselect
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"net/http"
|
|
"runtime"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
|
"github.com/VictoriaMetrics/metrics"
|
|
)
|
|
|
|
var (
|
|
deleteAuthKey = flag.String("deleteAuthKey", "", "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series")
|
|
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+
|
|
"It shouldn't be high, since a single request can saturate all the CPU cores. See also `-search.maxQueueDuration`")
|
|
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when `-search.maxConcurrentRequests` limit is reached")
|
|
)
|
|
|
|
func getDefaultMaxConcurrentRequests() int {
|
|
n := runtime.GOMAXPROCS(-1)
|
|
if n <= 4 {
|
|
n *= 2
|
|
}
|
|
if n > 16 {
|
|
// A single request can saturate all the CPU cores, so there is no sense
|
|
// in allowing higher number of concurrent requests - they will just contend
|
|
// for unavailable CPU time.
|
|
n = 16
|
|
}
|
|
return n
|
|
}
|
|
|
|
// Init initializes vmselect
|
|
func Init() {
|
|
tmpDirPath := *vmstorage.DataPath + "/tmp"
|
|
fs.RemoveDirContents(tmpDirPath)
|
|
netstorage.InitTmpBlocksDir(tmpDirPath)
|
|
promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")
|
|
|
|
concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
|
|
}
|
|
|
|
// Stop stops vmselect
|
|
func Stop() {
|
|
promql.StopRollupResultCache()
|
|
}
|
|
|
|
var concurrencyCh chan struct{}
|
|
|
|
var (
|
|
concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_select_limit_reached_total`)
|
|
concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_select_limit_timeout_total`)
|
|
|
|
_ = metrics.NewGauge(`vm_concurrent_select_capacity`, func() float64 {
|
|
return float64(cap(concurrencyCh))
|
|
})
|
|
_ = metrics.NewGauge(`vm_concurrent_select_current`, func() float64 {
|
|
return float64(len(concurrencyCh))
|
|
})
|
|
)
|
|
|
|
// RequestHandler handles remote read API requests for Prometheus
|
|
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|
// Limit the number of concurrent queries.
|
|
select {
|
|
case concurrencyCh <- struct{}{}:
|
|
defer func() { <-concurrencyCh }()
|
|
default:
|
|
// Sleep for a while until giving up. This should resolve short bursts in requests.
|
|
concurrencyLimitReached.Inc()
|
|
t := timerpool.Get(*maxQueueDuration)
|
|
select {
|
|
case concurrencyCh <- struct{}{}:
|
|
timerpool.Put(t)
|
|
defer func() { <-concurrencyCh }()
|
|
case <-t.C:
|
|
timerpool.Put(t)
|
|
concurrencyLimitTimeout.Inc()
|
|
err := &httpserver.ErrorWithStatusCode{
|
|
Err: fmt.Errorf("cannot handle more than %d concurrent search requests during %s; possible solutions: "+
|
|
"increase `-search.maxQueueDuration`, increase `-search.maxConcurrentRequests`, increase server capacity",
|
|
*maxConcurrentRequests, *maxQueueDuration),
|
|
StatusCode: http.StatusServiceUnavailable,
|
|
}
|
|
httpserver.Errorf(w, "%s", err)
|
|
return true
|
|
}
|
|
}
|
|
|
|
path := strings.Replace(r.URL.Path, "//", "/", -1)
|
|
if strings.HasPrefix(path, "/api/v1/label/") {
|
|
s := r.URL.Path[len("/api/v1/label/"):]
|
|
if strings.HasSuffix(s, "/values") {
|
|
labelValuesRequests.Inc()
|
|
labelName := s[:len(s)-len("/values")]
|
|
httpserver.EnableCORS(w, r)
|
|
if err := prometheus.LabelValuesHandler(labelName, w, r); err != nil {
|
|
labelValuesErrors.Inc()
|
|
sendPrometheusError(w, r, err)
|
|
return true
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
switch path {
|
|
case "/api/v1/query":
|
|
queryRequests.Inc()
|
|
httpserver.EnableCORS(w, r)
|
|
if err := prometheus.QueryHandler(w, r); err != nil {
|
|
queryErrors.Inc()
|
|
sendPrometheusError(w, r, err)
|
|
return true
|
|
}
|
|
return true
|
|
case "/api/v1/query_range":
|
|
queryRangeRequests.Inc()
|
|
httpserver.EnableCORS(w, r)
|
|
if err := prometheus.QueryRangeHandler(w, r); err != nil {
|
|
queryRangeErrors.Inc()
|
|
sendPrometheusError(w, r, err)
|
|
return true
|
|
}
|
|
return true
|
|
case "/api/v1/series":
|
|
seriesRequests.Inc()
|
|
httpserver.EnableCORS(w, r)
|
|
if err := prometheus.SeriesHandler(w, r); err != nil {
|
|
seriesErrors.Inc()
|
|
sendPrometheusError(w, r, err)
|
|
return true
|
|
}
|
|
return true
|
|
case "/api/v1/series/count":
|
|
seriesCountRequests.Inc()
|
|
httpserver.EnableCORS(w, r)
|
|
if err := prometheus.SeriesCountHandler(w, r); err != nil {
|
|
seriesCountErrors.Inc()
|
|
sendPrometheusError(w, r, err)
|
|
return true
|
|
}
|
|
return true
|
|
case "/api/v1/labels":
|
|
labelsRequests.Inc()
|
|
httpserver.EnableCORS(w, r)
|
|
if err := prometheus.LabelsHandler(w, r); err != nil {
|
|
labelsErrors.Inc()
|
|
sendPrometheusError(w, r, err)
|
|
return true
|
|
}
|
|
return true
|
|
case "/api/v1/labels/count":
|
|
labelsCountRequests.Inc()
|
|
httpserver.EnableCORS(w, r)
|
|
if err := prometheus.LabelsCountHandler(w, r); err != nil {
|
|
labelsCountErrors.Inc()
|
|
sendPrometheusError(w, r, err)
|
|
return true
|
|
}
|
|
return true
|
|
case "/api/v1/export":
|
|
exportRequests.Inc()
|
|
if err := prometheus.ExportHandler(w, r); err != nil {
|
|
exportErrors.Inc()
|
|
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
|
return true
|
|
}
|
|
return true
|
|
case "/federate":
|
|
federateRequests.Inc()
|
|
if err := prometheus.FederateHandler(w, r); err != nil {
|
|
federateErrors.Inc()
|
|
httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)
|
|
return true
|
|
}
|
|
return true
|
|
case "/api/v1/rules":
|
|
// Return dumb placeholder
|
|
rulesRequests.Inc()
|
|
w.Header().Set("Content-Type", "application/json")
|
|
fmt.Fprintf(w, "%s", `{"status":"success","data":{"groups":[]}}`)
|
|
return true
|
|
case "/api/v1/alerts":
|
|
// Return dumb placehloder
|
|
alertsRequests.Inc()
|
|
w.Header().Set("Content-Type", "application/json")
|
|
fmt.Fprintf(w, "%s", `{"status":"success","data":{"alerts":[]}}`)
|
|
return true
|
|
case "/api/v1/admin/tsdb/delete_series":
|
|
deleteRequests.Inc()
|
|
authKey := r.FormValue("authKey")
|
|
if authKey != *deleteAuthKey {
|
|
httpserver.Errorf(w, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)
|
|
return true
|
|
}
|
|
if err := prometheus.DeleteHandler(r); err != nil {
|
|
deleteErrors.Inc()
|
|
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
|
|
return true
|
|
}
|
|
w.WriteHeader(http.StatusNoContent)
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
|
|
logger.Errorf("error in %q: %s", r.URL.Path, err)
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
statusCode := http.StatusUnprocessableEntity
|
|
if esc, ok := err.(*httpserver.ErrorWithStatusCode); ok {
|
|
statusCode = esc.StatusCode
|
|
}
|
|
w.WriteHeader(statusCode)
|
|
prometheus.WriteErrorResponse(w, statusCode, err)
|
|
}
|
|
|
|
var (
|
|
labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/label/{}/values"}`)
|
|
labelValuesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/label/{}/values"}`)
|
|
|
|
queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query"}`)
|
|
queryErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query"}`)
|
|
|
|
queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query_range"}`)
|
|
queryRangeErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query_range"}`)
|
|
|
|
seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series"}`)
|
|
seriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series"}`)
|
|
|
|
seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series/count"}`)
|
|
seriesCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series/count"}`)
|
|
|
|
labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels"}`)
|
|
labelsErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels"}`)
|
|
|
|
labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels/count"}`)
|
|
labelsCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels/count"}`)
|
|
|
|
deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
|
|
deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)
|
|
|
|
exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export"}`)
|
|
exportErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export"}`)
|
|
|
|
federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/federate"}`)
|
|
federateErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/federate"}`)
|
|
|
|
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
|
|
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
|
|
)
|