From eefae854500e742b4788c16cedd883e20f335a3d Mon Sep 17 00:00:00 2001 From: Zakhar Bessarab Date: Tue, 8 Oct 2024 12:36:31 +0400 Subject: [PATCH] vmagent: add support of HTTP2 client for Kubernetes SD (#7114) ### Describe Your Changes Currently, vmagent always uses a separate `http.Client` for every group watcher in Kubernetes SD. With a high number of group watchers this leads to large amount of opened connections. This PR adds 2 changes to address this: - re-use of existing `http.Client` - in case `http.Client` is connecting to the same API server and uses the same parameters it will be re-used between group watchers - HTTP2 support - this allows to reuse connections more efficiently due to ability of using streaming via existing connections. See this issue for the details and test results - https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5971 ### Checklist The following checks are **mandatory**: - [ ] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). --------- Signed-off-by: Zakhar Bessarab Co-authored-by: Roman Khavronenko --- deployment/docker/Makefile | 4 +- docs/README.md | 2 + docs/changelog/CHANGELOG.md | 1 + docs/vmagent.md | 2 + lib/promauth/config.go | 51 ++++++++++++++++--- .../discovery/kubernetes/api_watcher.go | 48 +++++++++++++++-- lib/promscrape/scrapework.go | 2 - 7 files changed, 93 insertions(+), 17 deletions(-) diff --git a/deployment/docker/Makefile b/deployment/docker/Makefile index e762dc484..c89a559cc 100644 --- a/deployment/docker/Makefile +++ b/deployment/docker/Makefile @@ -42,7 +42,7 @@ app-via-docker: package-builder $(BUILDER_IMAGE) \ go build $(RACE) -trimpath -buildvcs=false \ -ldflags "-extldflags '-static' $(GO_BUILDINFO)" \ - -tags 'netgo osusergo nethttpomithttp2 musl' \ + -tags 'netgo osusergo musl' \ -o bin/$(APP_NAME)$(APP_SUFFIX)-prod $(PKG_PREFIX)/app/$(APP_NAME) app-via-docker-windows: package-builder @@ -57,7 +57,7 @@ app-via-docker-windows: package-builder $(BUILDER_IMAGE) \ go build $(RACE) -trimpath -buildvcs=false \ -ldflags "-s -w -extldflags '-static' $(GO_BUILDINFO)" \ - -tags 'netgo osusergo nethttpomithttp2' \ + -tags 'netgo osusergo' \ -o bin/$(APP_NAME)-windows$(APP_SUFFIX)-prod.exe $(PKG_PREFIX)/app/$(APP_NAME) package-via-docker: package-base diff --git a/docs/README.md b/docs/README.md index 170010ecb..4f91e2dce 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3038,6 +3038,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li How frequently to reload the full state from Kubernetes API server (default 30m0s) -promscrape.kubernetes.attachNodeMetadataAll Whether to set attach_metadata.node=true for all the kubernetes_sd_configs at -promscrape.config . It is possible to set attach_metadata.node=false individually per each kubernetes_sd_configs . See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs + -promscrape.kubernetes.useHTTP2Client + Whether to use HTTP/2 client for connection to Kubernetes API server. This may reduce amount of concurrent connections to API server when watching for a big number of Kubernetes objects. -promscrape.kubernetesSDCheckInterval duration Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs for details (default 30s) -promscrape.kumaSDCheckInterval duration diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md index 588ae7fb1..05ae453ad 100644 --- a/docs/changelog/CHANGELOG.md +++ b/docs/changelog/CHANGELOG.md @@ -19,6 +19,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). ## tip * FEATURE: add Darwin binaries for [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/) to the release flow. The binaries will be available in the new release. +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): allow using HTTP/2 client for Kubernetes service discovery if `-promscrape.kubernetes.useHTTP2Client` cmd-line flag is set. This could help to reduce the amount of opened connections to the Kubernetes API server. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5971) for the details. ## [v1.104.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.104.0) diff --git a/docs/vmagent.md b/docs/vmagent.md index 796475774..1ad5f321d 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -1983,6 +1983,8 @@ See the docs at https://docs.victoriametrics.com/vmagent/ . How frequently to reload the full state from Kubernetes API server (default 30m0s) -promscrape.kubernetes.attachNodeMetadataAll Whether to set attach_metadata.node=true for all the kubernetes_sd_configs at -promscrape.config . It is possible to set attach_metadata.node=false individually per each kubernetes_sd_configs . See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs + -promscrape.kubernetes.useHTTP2Client + Whether to use HTTP/2 client for connection to Kubernetes API server. This may reduce amount of concurrent connections to API server when watching for a big number of Kubernetes objects. -promscrape.kubernetesSDCheckInterval duration Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs for details (default 30s) -promscrape.kumaSDCheckInterval duration diff --git a/lib/promauth/config.go b/lib/promauth/config.go index 669257749..38a557096 100644 --- a/lib/promauth/config.go +++ b/lib/promauth/config.go @@ -120,8 +120,6 @@ type HTTPClientConfig struct { // - http2 is much harder to debug than http // - http2 has very bad security record because of its complexity - see https://portswigger.net/research/http2 // - // VictoriaMetrics components are compiled with nethttpomithttp2 tag because of these issues. - // // EnableHTTP2 bool } @@ -462,12 +460,34 @@ func (ac *Config) GetTLSConfig() (*tls.Config, error) { return tlsC, nil } +type idleConnectionsCloser interface { + CloseIdleConnections() +} + // NewRoundTripper returns new http.RoundTripper for the given ac, which uses the given trBase as base transport. // // The caller shouldn't change the trBase, since the returned RoundTripper owns it. func (ac *Config) NewRoundTripper(trBase *http.Transport) http.RoundTripper { rt := &roundTripper{ trBase: trBase, + trGetter: func(tls *tls.Config) http.RoundTripper { + tr := trBase.Clone() + if tls != nil { + tr.TLSClientConfig = tls + } + return tr + }, + } + if ac != nil { + rt.getTLSConfigCached = ac.getTLSConfigCached + } + return rt +} + +// NewRoundTripperFromGetter returns new http.RoundTripper for the given ac, which uses the given get as transport getter. +func (ac *Config) NewRoundTripperFromGetter(get func(tls *tls.Config) http.RoundTripper) http.RoundTripper { + rt := &roundTripper{ + trGetter: get, } if ac != nil { rt.getTLSConfigCached = ac.getTLSConfigCached @@ -476,11 +496,12 @@ func (ac *Config) NewRoundTripper(trBase *http.Transport) http.RoundTripper { } type roundTripper struct { - trBase *http.Transport + trBase http.RoundTripper getTLSConfigCached getTLSConfigFunc + trGetter func(tls *tls.Config) http.RoundTripper rootCAPrev *x509.CertPool - trPrev *http.Transport + trPrev http.RoundTripper mu sync.Mutex } @@ -493,7 +514,20 @@ func (rt *roundTripper) RoundTrip(req *http.Request) (*http.Response, error) { return tr.RoundTrip(req) } -func (rt *roundTripper) getTransport() (*http.Transport, error) { +func (rt *roundTripper) getTransport() (http.RoundTripper, error) { + if rt.trBase == nil { + if rt.getTLSConfigCached != nil { + tlsCfg, err := rt.getTLSConfigCached() + if err != nil { + return nil, fmt.Errorf("cannot initialize TLS config: %w", err) + } + + rt.trBase = rt.trGetter(tlsCfg) + } else { + rt.trBase = rt.trGetter(nil) + } + } + if rt.getTLSConfigCached == nil { return rt.trBase, nil } @@ -514,11 +548,12 @@ func (rt *roundTripper) getTransport() (*http.Transport, error) { // Slow path - tlsCfg has been changed. // Close connections for the previous transport and create new transport for the updated tlsCfg. if rt.trPrev != nil { - rt.trPrev.CloseIdleConnections() + if ic, ok := rt.trPrev.(idleConnectionsCloser); ok { + ic.CloseIdleConnections() + } } - tr := rt.trBase.Clone() - tr.TLSClientConfig = tlsCfg + tr := rt.trGetter(tlsCfg) rt.trPrev = tr rt.rootCAPrev = tlsCfg.RootCAs diff --git a/lib/promscrape/discovery/kubernetes/api_watcher.go b/lib/promscrape/discovery/kubernetes/api_watcher.go index 67b858058..58f16b735 100644 --- a/lib/promscrape/discovery/kubernetes/api_watcher.go +++ b/lib/promscrape/discovery/kubernetes/api_watcher.go @@ -2,6 +2,7 @@ package kubernetes import ( "context" + "crypto/tls" "encoding/json" "errors" "flag" @@ -18,6 +19,7 @@ import ( "time" "github.com/VictoriaMetrics/metrics" + "golang.org/x/net/http2" "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" @@ -31,6 +33,8 @@ var ( apiServerTimeout = flag.Duration("promscrape.kubernetes.apiServerTimeout", 30*time.Minute, "How frequently to reload the full state from Kubernetes API server") attachNodeMetadataAll = flag.Bool("promscrape.kubernetes.attachNodeMetadataAll", false, "Whether to set attach_metadata.node=true for all the kubernetes_sd_configs at -promscrape.config . "+ "It is possible to set attach_metadata.node=false individually per each kubernetes_sd_configs . See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs") + useHTTP2Client = flag.Bool("promscrape.kubernetes.useHTTP2Client", false, "Whether to use HTTP/2 client for connection to Kubernetes API server."+ + " This may reduce amount of concurrent connections to API server when watching for a big number of Kubernetes objects.") ) // WatchEvent is a watch event returned from API server endpoints if `watch=1` query arg is set. @@ -243,20 +247,54 @@ type groupWatcher struct { noAPIWatchers bool } -func newGroupWatcher(apiServer string, ac *promauth.Config, namespaces []string, selectors []Selector, attachNodeMetadata bool, proxyURL *url.URL) *groupWatcher { +var ( + httpClientsCache = make(map[string]*http.Client) + httpClientsLock sync.Mutex +) + +func getHTTPClient(ac *promauth.Config, proxyURL *url.URL) *http.Client { + key := fmt.Sprintf("authConfig=%s, proxyURL=%s", ac.String(), proxyURL) + httpClientsLock.Lock() + if c, ok := httpClientsCache[key]; ok { + httpClientsLock.Unlock() + return c + } + var proxy func(*http.Request) (*url.URL, error) if proxyURL != nil { proxy = http.ProxyURL(proxyURL) } - client := &http.Client{ - Transport: ac.NewRoundTripper(&http.Transport{ + getTransport := func(cfg *tls.Config) http.RoundTripper { + return &http.Transport{ Proxy: proxy, TLSHandshakeTimeout: 10 * time.Second, IdleConnTimeout: *apiServerTimeout, MaxIdleConnsPerHost: 100, - }), - Timeout: *apiServerTimeout, + TLSClientConfig: cfg, + } } + if *useHTTP2Client { + // proxy is not supported for http2 client + // see: https://github.com/golang/go/issues/26479 + getTransport = func(cfg *tls.Config) http.RoundTripper { + return &http2.Transport{ + IdleConnTimeout: *apiServerTimeout, + TLSClientConfig: cfg, + PingTimeout: 10 * time.Second, + } + } + } + c := &http.Client{ + Transport: ac.NewRoundTripperFromGetter(getTransport), + Timeout: *apiServerTimeout, + } + httpClientsCache[key] = c + httpClientsLock.Unlock() + return c +} + +func newGroupWatcher(apiServer string, ac *promauth.Config, namespaces []string, selectors []Selector, attachNodeMetadata bool, proxyURL *url.URL) *groupWatcher { + client := getHTTPClient(ac, proxyURL) ctx, cancel := context.WithCancel(context.Background()) gw := &groupWatcher{ apiServer: apiServer, diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 32f365334..8e4fb799e 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -73,8 +73,6 @@ type ScrapeWork struct { // - http2 is much harder to debug than http // - http2 has very bad security record because of its complexity - see https://portswigger.net/research/http2 // - // VictoriaMetrics components are compiled with nethttpomithttp2 tag because of these issues. - // // EnableHTTP2 bool // OriginalLabels contains original labels before relabeling.