2019-05-22 21:16:55 +00:00
package vmselect
import (
2020-07-08 15:52:55 +00:00
"encoding/json"
2020-06-30 21:02:02 +00:00
"errors"
2019-05-22 21:16:55 +00:00
"flag"
2019-08-23 06:46:45 +00:00
"fmt"
2019-05-22 21:16:55 +00:00
"net/http"
"runtime"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2019-05-28 14:17:19 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
2019-05-22 21:16:55 +00:00
"github.com/VictoriaMetrics/metrics"
)
var (
deleteAuthKey = flag . String ( "deleteAuthKey" , "" , "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series" )
2020-01-17 13:43:47 +00:00
maxConcurrentRequests = flag . Int ( "search.maxConcurrentRequests" , getDefaultMaxConcurrentRequests ( ) , "The maximum number of concurrent search requests. " +
2020-02-04 13:46:13 +00:00
"It shouldn't be high, since a single request can saturate all the CPU cores. See also -search.maxQueueDuration" )
2020-02-21 11:53:18 +00:00
maxQueueDuration = flag . Duration ( "search.maxQueueDuration" , 10 * time . Second , "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached" )
2020-07-08 15:52:55 +00:00
resetCacheAuthKey = flag . String ( "search.resetCacheAuthKey" , "" , "Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call. Will be replaced by internalAuthKey." )
internalAuthKey = flag . String ( "internalAuthKey" , "" , "Optional authKey for operation calls with prefix /internal. Use value of search.resetCacheAuthKey fif not set." )
2019-05-22 21:16:55 +00:00
)
2020-01-17 13:43:47 +00:00
func getDefaultMaxConcurrentRequests ( ) int {
n := runtime . GOMAXPROCS ( - 1 )
if n <= 4 {
n *= 2
}
if n > 16 {
// A single request can saturate all the CPU cores, so there is no sense
// in allowing higher number of concurrent requests - they will just contend
// for unavailable CPU time.
n = 16
}
return n
}
2019-05-22 21:16:55 +00:00
// Init initializes vmselect
func Init ( ) {
promql . InitRollupResultCache ( * vmstorage . DataPath + "/cache/rollupResult" )
2019-08-05 15:27:50 +00:00
2019-05-22 21:16:55 +00:00
concurrencyCh = make ( chan struct { } , * maxConcurrentRequests )
}
// Stop stops vmselect
func Stop ( ) {
promql . StopRollupResultCache ( )
}
2019-08-05 15:27:50 +00:00
var concurrencyCh chan struct { }
var (
concurrencyLimitReached = metrics . NewCounter ( ` vm_concurrent_select_limit_reached_total ` )
concurrencyLimitTimeout = metrics . NewCounter ( ` vm_concurrent_select_limit_timeout_total ` )
_ = metrics . NewGauge ( ` vm_concurrent_select_capacity ` , func ( ) float64 {
return float64 ( cap ( concurrencyCh ) )
} )
_ = metrics . NewGauge ( ` vm_concurrent_select_current ` , func ( ) float64 {
return float64 ( len ( concurrencyCh ) )
} )
)
2020-07-08 15:52:55 +00:00
var internalHandlerPrefix = "/internal"
2019-05-22 21:16:55 +00:00
// RequestHandler handles remote read API requests for Prometheus
func RequestHandler ( w http . ResponseWriter , r * http . Request ) bool {
2020-02-04 14:13:59 +00:00
startTime := time . Now ( )
2019-05-22 21:16:55 +00:00
// Limit the number of concurrent queries.
select {
case concurrencyCh <- struct { } { } :
defer func ( ) { <- concurrencyCh } ( )
2019-08-05 15:27:50 +00:00
default :
// Sleep for a while until giving up. This should resolve short bursts in requests.
concurrencyLimitReached . Inc ( )
t := timerpool . Get ( * maxQueueDuration )
select {
case concurrencyCh <- struct { } { } :
timerpool . Put ( t )
defer func ( ) { <- concurrencyCh } ( )
case <- t . C :
timerpool . Put ( t )
concurrencyLimitTimeout . Inc ( )
2019-08-23 06:46:45 +00:00
err := & httpserver . ErrorWithStatusCode {
2020-01-17 11:24:37 +00:00
Err : fmt . Errorf ( "cannot handle more than %d concurrent search requests during %s; possible solutions: " +
"increase `-search.maxQueueDuration`, increase `-search.maxConcurrentRequests`, increase server capacity" ,
* maxConcurrentRequests , * maxQueueDuration ) ,
2019-08-23 06:46:45 +00:00
StatusCode : http . StatusServiceUnavailable ,
}
httpserver . Errorf ( w , "%s" , err )
2019-08-05 15:27:50 +00:00
return true
}
2019-05-22 21:16:55 +00:00
}
path := strings . Replace ( r . URL . Path , "//" , "/" , - 1 )
2020-07-08 15:52:55 +00:00
if strings . HasPrefix ( path , internalHandlerPrefix ) {
return internalHandler ( w , r )
2020-02-21 11:53:18 +00:00
}
2020-07-08 15:52:55 +00:00
return apiHandler ( startTime , w , r , path )
}
2020-02-21 11:53:18 +00:00
2020-07-08 15:52:55 +00:00
func apiHandler ( startTime time . Time , w http . ResponseWriter , r * http . Request , path string ) bool {
2019-05-22 21:16:55 +00:00
if strings . HasPrefix ( path , "/api/v1/label/" ) {
s := r . URL . Path [ len ( "/api/v1/label/" ) : ]
if strings . HasSuffix ( s , "/values" ) {
labelValuesRequests . Inc ( )
labelName := s [ : len ( s ) - len ( "/values" ) ]
httpserver . EnableCORS ( w , r )
2020-02-04 14:13:59 +00:00
if err := prometheus . LabelValuesHandler ( startTime , labelName , w , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
labelValuesErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
return true
}
}
switch path {
case "/api/v1/query" :
queryRequests . Inc ( )
httpserver . EnableCORS ( w , r )
2020-02-04 14:13:59 +00:00
if err := prometheus . QueryHandler ( startTime , w , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
queryErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
return true
case "/api/v1/query_range" :
queryRangeRequests . Inc ( )
httpserver . EnableCORS ( w , r )
2020-02-04 14:13:59 +00:00
if err := prometheus . QueryRangeHandler ( startTime , w , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
queryRangeErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
return true
case "/api/v1/series" :
seriesRequests . Inc ( )
httpserver . EnableCORS ( w , r )
2020-02-04 14:13:59 +00:00
if err := prometheus . SeriesHandler ( startTime , w , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
seriesErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
return true
case "/api/v1/series/count" :
seriesCountRequests . Inc ( )
httpserver . EnableCORS ( w , r )
2020-02-04 14:13:59 +00:00
if err := prometheus . SeriesCountHandler ( startTime , w , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
seriesCountErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
return true
case "/api/v1/labels" :
labelsRequests . Inc ( )
httpserver . EnableCORS ( w , r )
2020-02-04 14:13:59 +00:00
if err := prometheus . LabelsHandler ( startTime , w , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
labelsErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
return true
2019-06-10 15:55:20 +00:00
case "/api/v1/labels/count" :
labelsCountRequests . Inc ( )
httpserver . EnableCORS ( w , r )
2020-02-04 14:13:59 +00:00
if err := prometheus . LabelsCountHandler ( startTime , w , r ) ; err != nil {
2019-06-10 15:55:20 +00:00
labelsCountErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
return true
2020-04-22 16:57:36 +00:00
case "/api/v1/status/tsdb" :
tsdbStatusRequests . Inc ( )
if err := prometheus . TSDBStatusHandler ( startTime , w , r ) ; err != nil {
tsdbStatusErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
return true
2019-05-22 21:16:55 +00:00
case "/api/v1/export" :
exportRequests . Inc ( )
2020-02-04 14:13:59 +00:00
if err := prometheus . ExportHandler ( startTime , w , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
exportErrors . Inc ( )
httpserver . Errorf ( w , "error in %q: %s" , r . URL . Path , err )
return true
}
return true
case "/federate" :
federateRequests . Inc ( )
2020-02-04 14:13:59 +00:00
if err := prometheus . FederateHandler ( startTime , w , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
federateErrors . Inc ( )
2020-04-22 16:57:36 +00:00
httpserver . Errorf ( w , "error in %q: %s" , r . URL . Path , err )
2019-05-22 21:16:55 +00:00
return true
}
return true
2019-12-03 17:32:57 +00:00
case "/api/v1/rules" :
// Return dumb placeholder
rulesRequests . Inc ( )
w . Header ( ) . Set ( "Content-Type" , "application/json" )
fmt . Fprintf ( w , "%s" , ` { "status":"success","data": { "groups":[]}} ` )
return true
case "/api/v1/alerts" :
// Return dumb placehloder
alertsRequests . Inc ( )
w . Header ( ) . Set ( "Content-Type" , "application/json" )
fmt . Fprintf ( w , "%s" , ` { "status":"success","data": { "alerts":[]}} ` )
return true
2020-02-04 13:53:15 +00:00
case "/api/v1/metadata" :
// Return dumb placeholder
metadataRequests . Inc ( )
w . Header ( ) . Set ( "Content-Type" , "application/json" )
fmt . Fprintf ( w , "%s" , ` { "status":"success","data": { }} ` )
return true
2019-05-22 21:16:55 +00:00
case "/api/v1/admin/tsdb/delete_series" :
deleteRequests . Inc ( )
authKey := r . FormValue ( "authKey" )
if authKey != * deleteAuthKey {
httpserver . Errorf ( w , "invalid authKey %q. It must match the value from -deleteAuthKey command line flag" , authKey )
return true
}
2020-02-04 14:13:59 +00:00
if err := prometheus . DeleteHandler ( startTime , r ) ; err != nil {
2019-05-22 21:16:55 +00:00
deleteErrors . Inc ( )
httpserver . Errorf ( w , "error in %q: %s" , r . URL . Path , err )
return true
}
w . WriteHeader ( http . StatusNoContent )
return true
default :
return false
}
}
2020-07-08 15:52:55 +00:00
func internalHandler ( w http . ResponseWriter , r * http . Request ) bool {
path := r . URL . Path
p := path [ len ( internalHandlerPrefix ) : ]
authKey := * internalAuthKey
if authKey == "" {
authKey = * resetCacheAuthKey
}
if authKey != "" && r . FormValue ( "authKey" ) != authKey {
sendPrometheusError ( w , r , fmt . Errorf ( "invalid authKey=%q for %q" , r . FormValue ( "authKey" ) , path ) )
return true
}
switch p {
case "/resetRollupResultCache" :
promql . ResetRollupResultCache ( )
return true
case "/query/list" :
queryListRequests . Inc ( )
w . Header ( ) . Set ( "Content-Type" , "application/json" )
data , err := json . Marshal ( promql . GetAllRunningQueries ( ) )
if err != nil {
queryListErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
fmt . Fprintf ( w , ` { "status":"success","data": %v} ` , string ( data ) )
return true
case "/query/info" :
queryInfoRequests . Inc ( )
w . Header ( ) . Set ( "Content-Type" , "application/json" )
pid := strings . TrimSpace ( r . URL . Query ( ) . Get ( "pid" ) )
if pid == "" {
queryInfoErrors . Inc ( )
sendPrometheusError ( w , r , fmt . Errorf ( "pid not set" ) )
return true
}
info , err := promql . GetQueryInfo ( pid )
if err != nil {
queryInfoErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
data , err := json . Marshal ( info )
if err != nil {
queryInfoErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
fmt . Fprintf ( w , ` { "status":"success","data": %v} ` , string ( data ) )
return true
case "/query/kill" :
queryKillRequests . Inc ( )
w . Header ( ) . Set ( "Content-Type" , "application/json" )
pid := strings . TrimSpace ( r . URL . Query ( ) . Get ( "pid" ) )
if pid == "" {
queryKillErrors . Inc ( )
sendPrometheusError ( w , r , fmt . Errorf ( "pid not set" ) )
return true
}
err := promql . CancelRunningQuery ( pid )
if err != nil {
queryKillErrors . Inc ( )
sendPrometheusError ( w , r , err )
return true
}
fmt . Fprintf ( w , "%s" , ` { "status":"success","data": { }} ` )
return true
default :
return false
}
}
2019-05-22 21:16:55 +00:00
func sendPrometheusError ( w http . ResponseWriter , r * http . Request , err error ) {
2020-04-15 17:50:12 +00:00
logger . Warnf ( "error in %q: %s" , r . RequestURI , err )
2019-05-22 21:16:55 +00:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2019-08-23 06:46:45 +00:00
statusCode := http . StatusUnprocessableEntity
2020-06-30 21:02:02 +00:00
var esc * httpserver . ErrorWithStatusCode
if errors . As ( err , & esc ) {
2019-08-23 06:46:45 +00:00
statusCode = esc . StatusCode
}
2019-05-22 21:16:55 +00:00
w . WriteHeader ( statusCode )
prometheus . WriteErrorResponse ( w , statusCode , err )
}
var (
labelValuesRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/label/ { }/values"} ` )
labelValuesErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/label/ { }/values"} ` )
queryRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/query"} ` )
queryErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/query"} ` )
queryRangeRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/query_range"} ` )
queryRangeErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/query_range"} ` )
seriesRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/series"} ` )
seriesErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/series"} ` )
seriesCountRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/series/count"} ` )
seriesCountErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/series/count"} ` )
labelsRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/labels"} ` )
labelsErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/labels"} ` )
2019-06-10 15:55:20 +00:00
labelsCountRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/labels/count"} ` )
labelsCountErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/labels/count"} ` )
2020-04-22 16:57:36 +00:00
tsdbStatusRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/status/tsdb"} ` )
tsdbStatusErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/status/tsdb"} ` )
2019-05-22 21:16:55 +00:00
deleteRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/admin/tsdb/delete_series"} ` )
deleteErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/admin/tsdb/delete_series"} ` )
exportRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/export"} ` )
exportErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/api/v1/export"} ` )
federateRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/federate"} ` )
federateErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/federate"} ` )
2019-12-03 17:32:57 +00:00
2020-02-04 13:53:15 +00:00
rulesRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/rules"} ` )
alertsRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/alerts"} ` )
metadataRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/api/v1/metadata"} ` )
2020-07-08 15:52:55 +00:00
queryListRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/-/ { }/query/list"} ` )
queryListErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/-/ { }/query/list"} ` )
queryKillRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/-/ { }/query/kill"} ` )
queryKillErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/-/ { }/query/kill"} ` )
queryInfoRequests = metrics . NewCounter ( ` vm_http_requests_total { path="/-/ { }/query/info"} ` )
queryInfoErrors = metrics . NewCounter ( ` vm_http_request_errors_total { path="/-/ { }/query/info"} ` )
2019-05-22 21:16:55 +00:00
)