mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
vmagent: add support of HTTP2 client for Kubernetes SD (#7114)
### Describe Your Changes Currently, vmagent always uses a separate `http.Client` for every group watcher in Kubernetes SD. With a high number of group watchers this leads to large amount of opened connections. This PR adds 2 changes to address this: - re-use of existing `http.Client` - in case `http.Client` is connecting to the same API server and uses the same parameters it will be re-used between group watchers - HTTP2 support - this allows to reuse connections more efficiently due to ability of using streaming via existing connections. See this issue for the details and test results - https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5971 ### Checklist The following checks are **mandatory**: - [ ] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). --------- Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
This commit is contained in:
parent
9b6efb5e81
commit
eefae85450
7 changed files with 93 additions and 17 deletions
|
@ -42,7 +42,7 @@ app-via-docker: package-builder
|
|||
$(BUILDER_IMAGE) \
|
||||
go build $(RACE) -trimpath -buildvcs=false \
|
||||
-ldflags "-extldflags '-static' $(GO_BUILDINFO)" \
|
||||
-tags 'netgo osusergo nethttpomithttp2 musl' \
|
||||
-tags 'netgo osusergo musl' \
|
||||
-o bin/$(APP_NAME)$(APP_SUFFIX)-prod $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
app-via-docker-windows: package-builder
|
||||
|
@ -57,7 +57,7 @@ app-via-docker-windows: package-builder
|
|||
$(BUILDER_IMAGE) \
|
||||
go build $(RACE) -trimpath -buildvcs=false \
|
||||
-ldflags "-s -w -extldflags '-static' $(GO_BUILDINFO)" \
|
||||
-tags 'netgo osusergo nethttpomithttp2' \
|
||||
-tags 'netgo osusergo' \
|
||||
-o bin/$(APP_NAME)-windows$(APP_SUFFIX)-prod.exe $(PKG_PREFIX)/app/$(APP_NAME)
|
||||
|
||||
package-via-docker: package-base
|
||||
|
|
|
@ -3038,6 +3038,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
How frequently to reload the full state from Kubernetes API server (default 30m0s)
|
||||
-promscrape.kubernetes.attachNodeMetadataAll
|
||||
Whether to set attach_metadata.node=true for all the kubernetes_sd_configs at -promscrape.config . It is possible to set attach_metadata.node=false individually per each kubernetes_sd_configs . See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs
|
||||
-promscrape.kubernetes.useHTTP2Client
|
||||
Whether to use HTTP/2 client for connection to Kubernetes API server. This may reduce amount of concurrent connections to API server when watching for a big number of Kubernetes objects.
|
||||
-promscrape.kubernetesSDCheckInterval duration
|
||||
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs for details (default 30s)
|
||||
-promscrape.kumaSDCheckInterval duration
|
||||
|
|
|
@ -19,6 +19,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
|||
## tip
|
||||
|
||||
* FEATURE: add Darwin binaries for [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/) to the release flow. The binaries will be available in the new release.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): allow using HTTP/2 client for Kubernetes service discovery if `-promscrape.kubernetes.useHTTP2Client` cmd-line flag is set. This could help to reduce the amount of opened connections to the Kubernetes API server. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5971) for the details.
|
||||
|
||||
## [v1.104.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.104.0)
|
||||
|
||||
|
|
|
@ -1983,6 +1983,8 @@ See the docs at https://docs.victoriametrics.com/vmagent/ .
|
|||
How frequently to reload the full state from Kubernetes API server (default 30m0s)
|
||||
-promscrape.kubernetes.attachNodeMetadataAll
|
||||
Whether to set attach_metadata.node=true for all the kubernetes_sd_configs at -promscrape.config . It is possible to set attach_metadata.node=false individually per each kubernetes_sd_configs . See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs
|
||||
-promscrape.kubernetes.useHTTP2Client
|
||||
Whether to use HTTP/2 client for connection to Kubernetes API server. This may reduce amount of concurrent connections to API server when watching for a big number of Kubernetes objects.
|
||||
-promscrape.kubernetesSDCheckInterval duration
|
||||
Interval for checking for changes in Kubernetes API server. This works only if kubernetes_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs for details (default 30s)
|
||||
-promscrape.kumaSDCheckInterval duration
|
||||
|
|
|
@ -120,8 +120,6 @@ type HTTPClientConfig struct {
|
|||
// - http2 is much harder to debug than http
|
||||
// - http2 has very bad security record because of its complexity - see https://portswigger.net/research/http2
|
||||
//
|
||||
// VictoriaMetrics components are compiled with nethttpomithttp2 tag because of these issues.
|
||||
//
|
||||
// EnableHTTP2 bool
|
||||
}
|
||||
|
||||
|
@ -462,12 +460,34 @@ func (ac *Config) GetTLSConfig() (*tls.Config, error) {
|
|||
return tlsC, nil
|
||||
}
|
||||
|
||||
type idleConnectionsCloser interface {
|
||||
CloseIdleConnections()
|
||||
}
|
||||
|
||||
// NewRoundTripper returns new http.RoundTripper for the given ac, which uses the given trBase as base transport.
|
||||
//
|
||||
// The caller shouldn't change the trBase, since the returned RoundTripper owns it.
|
||||
func (ac *Config) NewRoundTripper(trBase *http.Transport) http.RoundTripper {
|
||||
rt := &roundTripper{
|
||||
trBase: trBase,
|
||||
trGetter: func(tls *tls.Config) http.RoundTripper {
|
||||
tr := trBase.Clone()
|
||||
if tls != nil {
|
||||
tr.TLSClientConfig = tls
|
||||
}
|
||||
return tr
|
||||
},
|
||||
}
|
||||
if ac != nil {
|
||||
rt.getTLSConfigCached = ac.getTLSConfigCached
|
||||
}
|
||||
return rt
|
||||
}
|
||||
|
||||
// NewRoundTripperFromGetter returns new http.RoundTripper for the given ac, which uses the given get as transport getter.
|
||||
func (ac *Config) NewRoundTripperFromGetter(get func(tls *tls.Config) http.RoundTripper) http.RoundTripper {
|
||||
rt := &roundTripper{
|
||||
trGetter: get,
|
||||
}
|
||||
if ac != nil {
|
||||
rt.getTLSConfigCached = ac.getTLSConfigCached
|
||||
|
@ -476,11 +496,12 @@ func (ac *Config) NewRoundTripper(trBase *http.Transport) http.RoundTripper {
|
|||
}
|
||||
|
||||
type roundTripper struct {
|
||||
trBase *http.Transport
|
||||
trBase http.RoundTripper
|
||||
getTLSConfigCached getTLSConfigFunc
|
||||
trGetter func(tls *tls.Config) http.RoundTripper
|
||||
|
||||
rootCAPrev *x509.CertPool
|
||||
trPrev *http.Transport
|
||||
trPrev http.RoundTripper
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
|
@ -493,7 +514,20 @@ func (rt *roundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
|||
return tr.RoundTrip(req)
|
||||
}
|
||||
|
||||
func (rt *roundTripper) getTransport() (*http.Transport, error) {
|
||||
func (rt *roundTripper) getTransport() (http.RoundTripper, error) {
|
||||
if rt.trBase == nil {
|
||||
if rt.getTLSConfigCached != nil {
|
||||
tlsCfg, err := rt.getTLSConfigCached()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize TLS config: %w", err)
|
||||
}
|
||||
|
||||
rt.trBase = rt.trGetter(tlsCfg)
|
||||
} else {
|
||||
rt.trBase = rt.trGetter(nil)
|
||||
}
|
||||
}
|
||||
|
||||
if rt.getTLSConfigCached == nil {
|
||||
return rt.trBase, nil
|
||||
}
|
||||
|
@ -514,11 +548,12 @@ func (rt *roundTripper) getTransport() (*http.Transport, error) {
|
|||
// Slow path - tlsCfg has been changed.
|
||||
// Close connections for the previous transport and create new transport for the updated tlsCfg.
|
||||
if rt.trPrev != nil {
|
||||
rt.trPrev.CloseIdleConnections()
|
||||
if ic, ok := rt.trPrev.(idleConnectionsCloser); ok {
|
||||
ic.CloseIdleConnections()
|
||||
}
|
||||
}
|
||||
|
||||
tr := rt.trBase.Clone()
|
||||
tr.TLSClientConfig = tlsCfg
|
||||
tr := rt.trGetter(tlsCfg)
|
||||
rt.trPrev = tr
|
||||
rt.rootCAPrev = tlsCfg.RootCAs
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ package kubernetes
|
|||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
|
@ -18,6 +19,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"golang.org/x/net/http2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
|
@ -31,6 +33,8 @@ var (
|
|||
apiServerTimeout = flag.Duration("promscrape.kubernetes.apiServerTimeout", 30*time.Minute, "How frequently to reload the full state from Kubernetes API server")
|
||||
attachNodeMetadataAll = flag.Bool("promscrape.kubernetes.attachNodeMetadataAll", false, "Whether to set attach_metadata.node=true for all the kubernetes_sd_configs at -promscrape.config . "+
|
||||
"It is possible to set attach_metadata.node=false individually per each kubernetes_sd_configs . See https://docs.victoriametrics.com/sd_configs/#kubernetes_sd_configs")
|
||||
useHTTP2Client = flag.Bool("promscrape.kubernetes.useHTTP2Client", false, "Whether to use HTTP/2 client for connection to Kubernetes API server."+
|
||||
" This may reduce amount of concurrent connections to API server when watching for a big number of Kubernetes objects.")
|
||||
)
|
||||
|
||||
// WatchEvent is a watch event returned from API server endpoints if `watch=1` query arg is set.
|
||||
|
@ -243,20 +247,54 @@ type groupWatcher struct {
|
|||
noAPIWatchers bool
|
||||
}
|
||||
|
||||
func newGroupWatcher(apiServer string, ac *promauth.Config, namespaces []string, selectors []Selector, attachNodeMetadata bool, proxyURL *url.URL) *groupWatcher {
|
||||
var (
|
||||
httpClientsCache = make(map[string]*http.Client)
|
||||
httpClientsLock sync.Mutex
|
||||
)
|
||||
|
||||
func getHTTPClient(ac *promauth.Config, proxyURL *url.URL) *http.Client {
|
||||
key := fmt.Sprintf("authConfig=%s, proxyURL=%s", ac.String(), proxyURL)
|
||||
httpClientsLock.Lock()
|
||||
if c, ok := httpClientsCache[key]; ok {
|
||||
httpClientsLock.Unlock()
|
||||
return c
|
||||
}
|
||||
|
||||
var proxy func(*http.Request) (*url.URL, error)
|
||||
if proxyURL != nil {
|
||||
proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
client := &http.Client{
|
||||
Transport: ac.NewRoundTripper(&http.Transport{
|
||||
getTransport := func(cfg *tls.Config) http.RoundTripper {
|
||||
return &http.Transport{
|
||||
Proxy: proxy,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
IdleConnTimeout: *apiServerTimeout,
|
||||
MaxIdleConnsPerHost: 100,
|
||||
}),
|
||||
TLSClientConfig: cfg,
|
||||
}
|
||||
}
|
||||
if *useHTTP2Client {
|
||||
// proxy is not supported for http2 client
|
||||
// see: https://github.com/golang/go/issues/26479
|
||||
getTransport = func(cfg *tls.Config) http.RoundTripper {
|
||||
return &http2.Transport{
|
||||
IdleConnTimeout: *apiServerTimeout,
|
||||
TLSClientConfig: cfg,
|
||||
PingTimeout: 10 * time.Second,
|
||||
}
|
||||
}
|
||||
}
|
||||
c := &http.Client{
|
||||
Transport: ac.NewRoundTripperFromGetter(getTransport),
|
||||
Timeout: *apiServerTimeout,
|
||||
}
|
||||
httpClientsCache[key] = c
|
||||
httpClientsLock.Unlock()
|
||||
return c
|
||||
}
|
||||
|
||||
func newGroupWatcher(apiServer string, ac *promauth.Config, namespaces []string, selectors []Selector, attachNodeMetadata bool, proxyURL *url.URL) *groupWatcher {
|
||||
client := getHTTPClient(ac, proxyURL)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
gw := &groupWatcher{
|
||||
apiServer: apiServer,
|
||||
|
|
|
@ -73,8 +73,6 @@ type ScrapeWork struct {
|
|||
// - http2 is much harder to debug than http
|
||||
// - http2 has very bad security record because of its complexity - see https://portswigger.net/research/http2
|
||||
//
|
||||
// VictoriaMetrics components are compiled with nethttpomithttp2 tag because of these issues.
|
||||
//
|
||||
// EnableHTTP2 bool
|
||||
|
||||
// OriginalLabels contains original labels before relabeling.
|
||||
|
|
Loading…
Reference in a new issue