VictoriaMetrics/lib/httpserver/httpserver.go

605 lines
19 KiB
Go
Raw Normal View History

2019-05-22 21:16:55 +00:00
package httpserver
import (
"bufio"
"context"
"crypto/tls"
"errors"
"flag"
2019-05-22 21:16:55 +00:00
"fmt"
"io"
"log"
2019-05-22 21:16:55 +00:00
"net"
"net/http"
"net/http/pprof"
"os"
"path"
2019-05-22 21:16:55 +00:00
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
2019-05-22 21:16:55 +00:00
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2019-05-22 21:16:55 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/metrics"
"github.com/klauspost/compress/gzip"
"github.com/valyala/fastrand"
2019-05-22 21:16:55 +00:00
)
var (
tlsEnable = flag.Bool("tls", false, "Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set")
tlsCertFile = flag.String("tlsCertFile", "", "Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower")
tlsKeyFile = flag.String("tlsKeyFile", "", "Path to file with TLS key. Used only if -tls is set")
pathPrefix = flag.String("http.pathPrefix", "", "An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, "+
"then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. "+
"See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus")
disableResponseCompression = flag.Bool("http.disableResponseCompression", false, "Disable compression of HTTP responses to save CPU resources. By default compression is enabled to save network bandwidth")
maxGracefulShutdownDuration = flag.Duration("http.maxGracefulShutdownDuration", 7*time.Second, `The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown`)
shutdownDelay = flag.Duration("http.shutdownDelay", 0, `Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers`)
idleConnTimeout = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections")
connTimeout = flag.Duration("http.connTimeout", 2*time.Minute, `Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem`)
)
var (
servers = make(map[string]*server)
2019-05-22 21:16:55 +00:00
serversLock sync.Mutex
)
type server struct {
shutdownDelayDeadline int64
s *http.Server
}
2019-05-22 21:16:55 +00:00
// RequestHandler must serve the given request r and write response to w.
//
// RequestHandler must return true if the request has been served (successfully or not).
//
// RequestHandler must return false if it cannot serve the given request.
// In such cases the caller must serve the request.
type RequestHandler func(w http.ResponseWriter, r *http.Request) bool
// Serve starts an http server on the given addr with the given rh.
//
// By default all the responses are transparently compressed, since Google
// charges a lot for the egress traffic. The compression may be disabled
// by calling DisableResponseCompression before writing the first byte to w.
//
// The compression is also disabled if -http.disableResponseCompression flag is set.
2019-05-22 21:16:55 +00:00
func Serve(addr string, rh RequestHandler) {
scheme := "http"
if *tlsEnable {
scheme = "https"
}
logger.Infof("starting http server at %s://%s/", scheme, addr)
logger.Infof("pprof handlers are exposed at %s://%s/debug/pprof/", scheme, addr)
lnTmp, err := netutil.NewTCPListener(scheme, addr)
2019-05-22 21:16:55 +00:00
if err != nil {
logger.Fatalf("cannot start http server at %s: %s", addr, err)
}
ln := net.Listener(lnTmp)
if *tlsEnable {
cert, err := tls.LoadX509KeyPair(*tlsCertFile, *tlsKeyFile)
if err != nil {
logger.Fatalf("cannot load TLS cert from tlsCertFile=%q, tlsKeyFile=%q: %s", *tlsCertFile, *tlsKeyFile, err)
}
cfg := &tls.Config{
2021-01-12 22:35:47 +00:00
Certificates: []tls.Certificate{cert},
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
}
ln = tls.NewListener(ln, cfg)
2019-05-22 21:16:55 +00:00
}
serveWithListener(addr, ln, rh)
}
func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
var s server
s.s = &http.Server{
Handler: gzipHandler(&s, rh),
2019-05-22 21:16:55 +00:00
// Disable http/2, since it doesn't give any advantages for VictoriaMetrics services.
2019-05-22 21:16:55 +00:00
TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
ReadHeaderTimeout: 5 * time.Second,
IdleTimeout: *idleConnTimeout,
2019-05-22 21:16:55 +00:00
// Do not set ReadTimeout and WriteTimeout here,
// since these timeouts must be controlled by request handlers.
2019-05-22 21:16:55 +00:00
ErrorLog: logger.StdErrorLogger(),
ConnContext: func(ctx context.Context, c net.Conn) context.Context {
timeoutSec := connTimeout.Seconds()
// Add a jitter for connection timeout in order to prevent Thundering herd problem
// when all the connections are established at the same time.
// See https://en.wikipedia.org/wiki/Thundering_herd_problem
jitterSec := fastrand.Uint32n(uint32(timeoutSec / 10))
deadline := fasttime.UnixTimestamp() + uint64(timeoutSec) + uint64(jitterSec)
return context.WithValue(ctx, connDeadlineTimeKey, &deadline)
},
2019-05-22 21:16:55 +00:00
}
serversLock.Lock()
servers[addr] = &s
2019-05-22 21:16:55 +00:00
serversLock.Unlock()
if err := s.s.Serve(ln); err != nil {
2019-05-22 21:16:55 +00:00
if err == http.ErrServerClosed {
// The server gracefully closed.
return
}
logger.Panicf("FATAL: cannot serve http at %s: %s", addr, err)
}
}
func whetherToCloseConn(r *http.Request) bool {
ctx := r.Context()
v := ctx.Value(connDeadlineTimeKey)
deadline, ok := v.(*uint64)
return ok && fasttime.UnixTimestamp() > *deadline
}
var connDeadlineTimeKey = interface{}("connDeadlineSecs")
2019-05-22 21:16:55 +00:00
// Stop stops the http server on the given addr, which has been started
// via Serve func.
func Stop(addr string) error {
serversLock.Lock()
s := servers[addr]
delete(servers, addr)
serversLock.Unlock()
if s == nil {
err := fmt.Errorf("BUG: there is no http server at %q", addr)
logger.Panicf("%s", err)
// The return is needed for golangci-lint: SA5011(related information): this check suggests that the pointer can be nil
return err
2019-05-22 21:16:55 +00:00
}
deadline := time.Now().Add(*shutdownDelay).UnixNano()
atomic.StoreInt64(&s.shutdownDelayDeadline, deadline)
if *shutdownDelay > 0 {
// Sleep for a while until load balancer in front of the server
// notifies that "/health" endpoint returns non-OK responses.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/463 .
logger.Infof("Waiting for %.3fs before shutdown of http server %q, so load balancers could re-route requests to other servers", shutdownDelay.Seconds(), addr)
time.Sleep(*shutdownDelay)
logger.Infof("Starting shutdown for http server %q", addr)
}
ctx, cancel := context.WithTimeout(context.Background(), *maxGracefulShutdownDuration)
defer cancel()
if err := s.s.Shutdown(ctx); err != nil {
return fmt.Errorf("cannot gracefully shutdown http server at %q in %.3fs; "+
"probably, `-http.maxGracefulShutdownDuration` command-line flag value must be increased; error: %s", addr, maxGracefulShutdownDuration.Seconds(), err)
2019-05-22 21:16:55 +00:00
}
return nil
}
func gzipHandler(s *server, rh RequestHandler) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
2019-05-22 21:16:55 +00:00
w = maybeGzipResponseWriter(w, r)
handlerWrapper(s, w, r, rh)
2019-05-22 21:16:55 +00:00
if zrw, ok := w.(*gzipResponseWriter); ok {
if err := zrw.Close(); err != nil && !isTrivialNetworkError(err) {
logger.Warnf("gzipResponseWriter.Close: %s", err)
2019-05-22 21:16:55 +00:00
}
}
}
}
var metricsHandlerDuration = metrics.NewHistogram(`vm_http_request_duration_seconds{path="/metrics"}`)
var connTimeoutClosedConns = metrics.NewCounter(`vm_http_conn_timeout_closed_conns_total`)
2019-05-22 21:16:55 +00:00
var hostname = func() string {
h, err := os.Hostname()
if err != nil {
// Cannot use logger.Errorf, since it isn't initialized yet.
// So use log.Printf instead.
log.Printf("ERROR: cannot determine hostname: %s", err)
return "unknown"
}
return h
}()
func handlerWrapper(s *server, w http.ResponseWriter, r *http.Request, rh RequestHandler) {
// All the VictoriaMetrics code assumes that panic stops the process.
// Unfortunately, the standard net/http.Server recovers from panics in request handlers,
// so VictoriaMetrics state can become inconsistent after the recovered panic.
// The following recover() code works around this by explicitly stopping the process after logging the panic.
// See https://github.com/golang/go/issues/16542#issuecomment-246549902 for details.
defer func() {
// need to check for abortHandler
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1353
if err := recover(); err != nil && err != http.ErrAbortHandler {
buf := make([]byte, 1<<20)
n := runtime.Stack(buf, false)
fmt.Fprintf(os.Stderr, "panic: %v\n\n%s", err, buf[:n])
os.Exit(1)
}
}()
w.Header().Add("X-Server-Hostname", hostname)
2019-05-22 21:16:55 +00:00
requestsTotal.Inc()
if whetherToCloseConn(r) {
connTimeoutClosedConns.Inc()
w.Header().Set("Connection", "close")
}
path, err := getCanonicalPath(r.URL.Path)
if err != nil {
Errorf(w, r, "cannot get canonical path: %s", err)
unsupportedRequestErrors.Inc()
return
}
r.URL.Path = path
2019-05-22 21:16:55 +00:00
switch r.URL.Path {
case "/health":
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
deadline := atomic.LoadInt64(&s.shutdownDelayDeadline)
if deadline <= 0 {
w.Write([]byte("OK"))
return
}
// Return non-OK response during grace period before shutting down the server.
// Load balancers must notify these responses and re-route new requests to other servers.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/463 .
d := time.Until(time.Unix(0, deadline))
if d < 0 {
d = 0
}
errMsg := fmt.Sprintf("The server is in delayed shutdown mode, which will end in %.3fs", d.Seconds())
http.Error(w, errMsg, http.StatusServiceUnavailable)
2019-05-22 21:16:55 +00:00
return
case "/ping":
// This is needed for compatibility with Influx agents.
// See https://docs.influxdata.com/influxdb/v1.7/tools/api/#ping-http-endpoint
status := http.StatusNoContent
if verbose := r.FormValue("verbose"); verbose == "true" {
status = http.StatusOK
}
w.WriteHeader(status)
return
case "/favicon.ico":
faviconRequests.Inc()
w.WriteHeader(http.StatusNoContent)
return
2019-05-22 21:16:55 +00:00
case "/metrics":
metricsRequests.Inc()
startTime := time.Now()
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
WritePrometheusMetrics(w)
2019-05-22 21:16:55 +00:00
metricsHandlerDuration.UpdateDuration(startTime)
return
default:
if strings.HasPrefix(r.URL.Path, "/debug/pprof/") {
pprofRequests.Inc()
DisableResponseCompression(w)
pprofHandler(r.URL.Path[len("/debug/pprof/"):], w, r)
return
}
if rh(w, r) {
return
}
Errorf(w, r, "unsupported path requested: %q", r.URL.Path)
2019-05-22 21:16:55 +00:00
unsupportedRequestErrors.Inc()
return
}
}
func getCanonicalPath(path string) (string, error) {
if len(*pathPrefix) == 0 || path == "/" {
return path, nil
}
if *pathPrefix == path {
return "/", nil
}
prefix := *pathPrefix
if !strings.HasSuffix(prefix, "/") {
prefix = prefix + "/"
}
if !strings.HasPrefix(path, prefix) {
return "", fmt.Errorf("missing `-pathPrefix=%q` in the requested path: %q", *pathPrefix, path)
}
path = path[len(prefix)-1:]
return path, nil
}
2019-05-22 21:16:55 +00:00
func maybeGzipResponseWriter(w http.ResponseWriter, r *http.Request) http.ResponseWriter {
if *disableResponseCompression {
return w
}
if r.Header.Get("Connection") == "Upgrade" {
return w
}
2019-05-22 21:16:55 +00:00
ae := r.Header.Get("Accept-Encoding")
if ae == "" {
return w
}
ae = strings.ToLower(ae)
n := strings.Index(ae, "gzip")
if n < 0 {
// Do not apply gzip encoding to the response.
2019-05-22 21:16:55 +00:00
return w
}
// Apply gzip encoding to the response.
2019-05-22 21:16:55 +00:00
zw := getGzipWriter(w)
bw := getBufioWriter(zw)
zrw := &gzipResponseWriter{
rw: w,
zw: zw,
bw: bw,
2019-05-22 21:16:55 +00:00
}
return zrw
}
// DisableResponseCompression disables response compression on w.
//
// The function must be called before the first w.Write* call.
func DisableResponseCompression(w http.ResponseWriter) {
zrw, ok := w.(*gzipResponseWriter)
if !ok {
return
}
if zrw.firstWriteDone {
logger.Panicf("BUG: DisableResponseCompression must be called before sending the response")
}
zrw.disableCompression = true
2019-05-22 21:16:55 +00:00
}
// EnableCORS enables https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS
// on the response.
func EnableCORS(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
}
func getGzipWriter(w io.Writer) *gzip.Writer {
v := gzipWriterPool.Get()
if v == nil {
zw, err := gzip.NewWriterLevel(w, 1)
if err != nil {
logger.Panicf("BUG: cannot create gzip writer: %s", err)
}
return zw
}
zw := v.(*gzip.Writer)
zw.Reset(w)
return zw
}
func putGzipWriter(zw *gzip.Writer) {
gzipWriterPool.Put(zw)
}
var gzipWriterPool sync.Pool
type gzipResponseWriter struct {
rw http.ResponseWriter
2019-05-22 21:16:55 +00:00
zw *gzip.Writer
bw *bufio.Writer
statusCode int
firstWriteDone bool
disableCompression bool
}
// Implements http.ResponseWriter.Header method.
func (zrw *gzipResponseWriter) Header() http.Header {
return zrw.rw.Header()
}
// Implements http.ResponseWriter.Write method.
2019-05-22 21:16:55 +00:00
func (zrw *gzipResponseWriter) Write(p []byte) (int, error) {
if !zrw.firstWriteDone {
h := zrw.Header()
if zrw.statusCode == http.StatusNoContent {
zrw.disableCompression = true
}
if h.Get("Content-Encoding") != "" {
2019-05-22 21:16:55 +00:00
zrw.disableCompression = true
}
if !zrw.disableCompression {
h.Set("Content-Encoding", "gzip")
h.Del("Content-Length")
if h.Get("Content-Type") == "" {
// Disable auto-detection of content-type, since it
// is incorrectly detected after the compression.
h.Set("Content-Type", "text/html; charset=utf-8")
}
2019-05-22 21:16:55 +00:00
}
zrw.writeHeader()
2019-05-22 21:16:55 +00:00
zrw.firstWriteDone = true
}
if zrw.disableCompression {
return zrw.rw.Write(p)
2019-05-22 21:16:55 +00:00
}
return zrw.bw.Write(p)
}
// Implements http.ResponseWriter.WriteHeader method.
2019-05-22 21:16:55 +00:00
func (zrw *gzipResponseWriter) WriteHeader(statusCode int) {
zrw.statusCode = statusCode
}
func (zrw *gzipResponseWriter) writeHeader() {
if zrw.statusCode == 0 {
zrw.statusCode = http.StatusOK
}
zrw.rw.WriteHeader(zrw.statusCode)
}
2019-05-22 21:16:55 +00:00
// Implements http.Flusher
func (zrw *gzipResponseWriter) Flush() {
if !zrw.firstWriteDone {
_, _ = zrw.Write(nil)
}
if !zrw.disableCompression {
if err := zrw.bw.Flush(); err != nil && !isTrivialNetworkError(err) {
logger.Warnf("gzipResponseWriter.Flush (buffer): %s", err)
}
if err := zrw.zw.Flush(); err != nil && !isTrivialNetworkError(err) {
logger.Warnf("gzipResponseWriter.Flush (gzip): %s", err)
}
2019-05-22 21:16:55 +00:00
}
if fw, ok := zrw.rw.(http.Flusher); ok {
2019-05-22 21:16:55 +00:00
fw.Flush()
}
}
func (zrw *gzipResponseWriter) Close() error {
if !zrw.firstWriteDone {
_, _ = zrw.Write(nil)
2019-05-22 21:16:55 +00:00
}
zrw.Flush()
var err error
if !zrw.disableCompression {
err = zrw.zw.Close()
}
2019-05-22 21:16:55 +00:00
putGzipWriter(zrw.zw)
zrw.zw = nil
putBufioWriter(zrw.bw)
zrw.bw = nil
return err
}
func getBufioWriter(w io.Writer) *bufio.Writer {
v := bufioWriterPool.Get()
if v == nil {
return bufio.NewWriterSize(w, 16*1024)
}
bw := v.(*bufio.Writer)
bw.Reset(w)
return bw
}
func putBufioWriter(bw *bufio.Writer) {
bufioWriterPool.Put(bw)
}
var bufioWriterPool sync.Pool
func pprofHandler(profileName string, w http.ResponseWriter, r *http.Request) {
// This switch has been stolen from init func at https://golang.org/src/net/http/pprof/pprof.go
switch profileName {
case "cmdline":
pprofCmdlineRequests.Inc()
pprof.Cmdline(w, r)
case "profile":
pprofProfileRequests.Inc()
pprof.Profile(w, r)
case "symbol":
pprofSymbolRequests.Inc()
pprof.Symbol(w, r)
case "trace":
pprofTraceRequests.Inc()
pprof.Trace(w, r)
case "mutex":
pprofMutexRequests.Inc()
seconds, _ := strconv.Atoi(r.FormValue("seconds"))
if seconds <= 0 {
seconds = 10
}
prev := runtime.SetMutexProfileFraction(10)
time.Sleep(time.Duration(seconds) * time.Second)
pprof.Index(w, r)
runtime.SetMutexProfileFraction(prev)
default:
pprofDefaultRequests.Inc()
pprof.Index(w, r)
}
}
var (
metricsRequests = metrics.NewCounter(`vm_http_requests_total{path="/metrics"}`)
pprofRequests = metrics.NewCounter(`vm_http_requests_total{path="/debug/pprof/"}`)
pprofCmdlineRequests = metrics.NewCounter(`vm_http_requests_total{path="/debug/pprof/cmdline"}`)
pprofProfileRequests = metrics.NewCounter(`vm_http_requests_total{path="/debug/pprof/profile"}`)
pprofSymbolRequests = metrics.NewCounter(`vm_http_requests_total{path="/debug/pprof/symbol"}`)
pprofTraceRequests = metrics.NewCounter(`vm_http_requests_total{path="/debug/pprof/trace"}`)
pprofMutexRequests = metrics.NewCounter(`vm_http_requests_total{path="/debug/pprof/mutex"}`)
pprofDefaultRequests = metrics.NewCounter(`vm_http_requests_total{path="/debug/pprof/default"}`)
faviconRequests = metrics.NewCounter(`vm_http_requests_total{path="/favicon.ico"}`)
unsupportedRequestErrors = metrics.NewCounter(`vm_http_request_errors_total{path="*", reason="unsupported"}`)
requestsTotal = metrics.NewCounter(`vm_http_requests_all_total`)
)
// GetQuotedRemoteAddr returns quoted remote address.
func GetQuotedRemoteAddr(r *http.Request) string {
remoteAddr := strconv.Quote(r.RemoteAddr) // quote remoteAddr and X-Forwarded-For, since they may contain untrusted input
if addr := r.Header.Get("X-Forwarded-For"); addr != "" {
remoteAddr += ", X-Forwarded-For: " + strconv.Quote(addr)
}
return remoteAddr
}
// Errorf writes formatted error message to w and to logger.
func Errorf(w http.ResponseWriter, r *http.Request, format string, args ...interface{}) {
errStr := fmt.Sprintf(format, args...)
remoteAddr := GetQuotedRemoteAddr(r)
errStr = fmt.Sprintf("remoteAddr: %s; %s", remoteAddr, errStr)
logger.WarnfSkipframes(1, "%s", errStr)
// Extract statusCode from args
statusCode := http.StatusBadRequest
var esc *ErrorWithStatusCode
for _, arg := range args {
if err, ok := arg.(error); ok && errors.As(err, &esc) {
statusCode = esc.StatusCode
break
}
}
http.Error(w, errStr, statusCode)
}
// ErrorWithStatusCode is error with HTTP status code.
//
// The given StatusCode is sent to client when the error is passed to Errorf.
type ErrorWithStatusCode struct {
Err error
StatusCode int
}
// Unwrap returns e.Err.
//
// This is used by standard errors package. See https://golang.org/pkg/errors
func (e *ErrorWithStatusCode) Unwrap() error {
return e.Err
}
// Error implements error interface.
func (e *ErrorWithStatusCode) Error() string {
return e.Err.Error()
2019-05-22 21:16:55 +00:00
}
func isTrivialNetworkError(err error) bool {
s := err.Error()
if strings.Contains(s, "broken pipe") || strings.Contains(s, "reset by peer") {
return true
}
return false
}
// IsTLS indicates is tls enabled or not
func IsTLS() bool {
return *tlsEnable
}
// GetPathPrefix - returns http server path prefix.
func GetPathPrefix() string {
return *pathPrefix
}
// WriteAPIHelp writes pathList to w in HTML format.
func WriteAPIHelp(w io.Writer, pathList [][2]string) {
for _, p := range pathList {
p, doc := p[0], p[1]
p = path.Join(*pathPrefix, p)
fmt.Fprintf(w, "<a href=%q>%s</a> - %s<br/>", p, p, doc)
}
}