mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib/promscrape/discovery/kubernetes: reduce CPU time spent on registering big number of Kubernetes objects shared among big number of scrape jobs
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1182
This commit is contained in:
parent
a51d0ec6ec
commit
b46194472f
5 changed files with 114 additions and 52 deletions
|
@ -60,6 +60,15 @@ type Config struct {
|
||||||
baseDir string
|
baseDir string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cfg *Config) mustStart() {
|
||||||
|
startTime := time.Now()
|
||||||
|
logger.Infof("starting service discovery routines...")
|
||||||
|
for i := range cfg.ScrapeConfigs {
|
||||||
|
cfg.ScrapeConfigs[i].mustStart(cfg.baseDir)
|
||||||
|
}
|
||||||
|
logger.Infof("started service discovery routines in %.3f seconds", time.Since(startTime).Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
func (cfg *Config) mustStop() {
|
func (cfg *Config) mustStop() {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
logger.Infof("stopping service discovery routines...")
|
logger.Infof("stopping service discovery routines...")
|
||||||
|
@ -120,6 +129,21 @@ type ScrapeConfig struct {
|
||||||
swc *scrapeWorkConfig
|
swc *scrapeWorkConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (sc *ScrapeConfig) mustStart(baseDir string) {
|
||||||
|
for i := range sc.KubernetesSDConfigs {
|
||||||
|
swosFunc := func(metaLabels map[string]string) interface{} {
|
||||||
|
target := metaLabels["__address__"]
|
||||||
|
sw, err := sc.swc.getScrapeWork(target, nil, metaLabels)
|
||||||
|
if err != nil {
|
||||||
|
logger.Errorf("cannot create kubernetes_sd_config target %q for job_name %q: %s", target, sc.swc.jobName, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return sw
|
||||||
|
}
|
||||||
|
sc.KubernetesSDConfigs[i].MustStart(baseDir, swosFunc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (sc *ScrapeConfig) mustStop() {
|
func (sc *ScrapeConfig) mustStop() {
|
||||||
for i := range sc.KubernetesSDConfigs {
|
for i := range sc.KubernetesSDConfigs {
|
||||||
sc.KubernetesSDConfigs[i].MustStop()
|
sc.KubernetesSDConfigs[i].MustStop()
|
||||||
|
@ -243,15 +267,7 @@ func (cfg *Config) getKubernetesSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
||||||
ok := true
|
ok := true
|
||||||
for j := range sc.KubernetesSDConfigs {
|
for j := range sc.KubernetesSDConfigs {
|
||||||
sdc := &sc.KubernetesSDConfigs[j]
|
sdc := &sc.KubernetesSDConfigs[j]
|
||||||
swos, err := sdc.GetScrapeWorkObjects(cfg.baseDir, func(metaLabels map[string]string) interface{} {
|
swos, err := sdc.GetScrapeWorkObjects()
|
||||||
target := metaLabels["__address__"]
|
|
||||||
sw, err := sc.swc.getScrapeWork(target, nil, metaLabels)
|
|
||||||
if err != nil {
|
|
||||||
logger.Errorf("cannot create kubernetes_sd_config target %q for job_name %q: %s", target, sc.swc.jobName, err)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return sw
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("skipping kubernetes_sd_config targets for job_name %q because of error: %s", sc.swc.jobName, err)
|
logger.Errorf("skipping kubernetes_sd_config targets for job_name %q because of error: %s", sc.swc.jobName, err)
|
||||||
ok = false
|
ok = false
|
||||||
|
|
|
@ -7,7 +7,6 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// apiConfig contains config for API server
|
// apiConfig contains config for API server
|
||||||
|
@ -15,20 +14,14 @@ type apiConfig struct {
|
||||||
aw *apiWatcher
|
aw *apiWatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ac *apiConfig) mustStart() {
|
||||||
|
ac.aw.mustStart()
|
||||||
|
}
|
||||||
|
|
||||||
func (ac *apiConfig) mustStop() {
|
func (ac *apiConfig) mustStop() {
|
||||||
ac.aw.mustStop()
|
ac.aw.mustStop()
|
||||||
}
|
}
|
||||||
|
|
||||||
var configMap = discoveryutils.NewConfigMap()
|
|
||||||
|
|
||||||
func getAPIConfig(sdc *SDConfig, baseDir string, swcFunc ScrapeWorkConstructorFunc) (*apiConfig, error) {
|
|
||||||
v, err := configMap.Get(sdc, func() (interface{}, error) { return newAPIConfig(sdc, baseDir, swcFunc) })
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return v.(*apiConfig), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func newAPIConfig(sdc *SDConfig, baseDir string, swcFunc ScrapeWorkConstructorFunc) (*apiConfig, error) {
|
func newAPIConfig(sdc *SDConfig, baseDir string, swcFunc ScrapeWorkConstructorFunc) (*apiConfig, error) {
|
||||||
switch sdc.Role {
|
switch sdc.Role {
|
||||||
case "node", "pod", "service", "endpoints", "endpointslices", "ingress":
|
case "node", "pod", "service", "endpoints", "endpointslices", "ingress":
|
||||||
|
|
|
@ -72,6 +72,10 @@ func newAPIWatcher(apiServer string, ac *promauth.Config, sdc *SDConfig, swcFunc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (aw *apiWatcher) mustStart() {
|
||||||
|
aw.gw.startWatchersForRole(aw.role, aw)
|
||||||
|
}
|
||||||
|
|
||||||
func (aw *apiWatcher) mustStop() {
|
func (aw *apiWatcher) mustStop() {
|
||||||
aw.gw.unsubscribeAPIWatcher(aw)
|
aw.gw.unsubscribeAPIWatcher(aw)
|
||||||
aw.swosByNamespaceLock.Lock()
|
aw.swosByNamespaceLock.Lock()
|
||||||
|
@ -128,7 +132,8 @@ func (aw *apiWatcher) getScrapeWorkObjectsForLabels(labelss []map[string]string)
|
||||||
|
|
||||||
// getScrapeWorkObjects returns all the ScrapeWork objects for the given aw.
|
// getScrapeWorkObjects returns all the ScrapeWork objects for the given aw.
|
||||||
func (aw *apiWatcher) getScrapeWorkObjects() []interface{} {
|
func (aw *apiWatcher) getScrapeWorkObjects() []interface{} {
|
||||||
aw.gw.startWatchersForRole(aw.role, aw)
|
aw.gw.registerPendingAPIWatchers()
|
||||||
|
|
||||||
aw.swosByNamespaceLock.Lock()
|
aw.swosByNamespaceLock.Lock()
|
||||||
defer aw.swosByNamespaceLock.Unlock()
|
defer aw.swosByNamespaceLock.Unlock()
|
||||||
|
|
||||||
|
@ -272,10 +277,8 @@ func (gw *groupWatcher) getObjectByRole(role, namespace, name string) object {
|
||||||
func (gw *groupWatcher) getCachedObjectByRole(role, namespace, name string) object {
|
func (gw *groupWatcher) getCachedObjectByRole(role, namespace, name string) object {
|
||||||
key := namespace + "/" + name
|
key := namespace + "/" + name
|
||||||
gw.startWatchersForRole(role, nil)
|
gw.startWatchersForRole(role, nil)
|
||||||
gw.mu.Lock()
|
uws := gw.getURLWatchers()
|
||||||
defer gw.mu.Unlock()
|
for _, uw := range uws {
|
||||||
|
|
||||||
for _, uw := range gw.m {
|
|
||||||
if uw.role != role {
|
if uw.role != role {
|
||||||
// Role mismatch
|
// Role mismatch
|
||||||
continue
|
continue
|
||||||
|
@ -310,9 +313,11 @@ func (gw *groupWatcher) startWatchersForRole(role string, aw *apiWatcher) {
|
||||||
uw.reloadObjects()
|
uw.reloadObjects()
|
||||||
go uw.watchForUpdates()
|
go uw.watchForUpdates()
|
||||||
}
|
}
|
||||||
|
if aw != nil {
|
||||||
uw.subscribeAPIWatcher(aw)
|
uw.subscribeAPIWatcher(aw)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// doRequest performs http request to the given requestURL.
|
// doRequest performs http request to the given requestURL.
|
||||||
func (gw *groupWatcher) doRequest(requestURL string) (*http.Response, error) {
|
func (gw *groupWatcher) doRequest(requestURL string) (*http.Response, error) {
|
||||||
|
@ -326,12 +331,28 @@ func (gw *groupWatcher) doRequest(requestURL string) (*http.Response, error) {
|
||||||
return gw.client.Do(req)
|
return gw.client.Do(req)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (gw *groupWatcher) unsubscribeAPIWatcher(aw *apiWatcher) {
|
func (gw *groupWatcher) registerPendingAPIWatchers() {
|
||||||
|
uws := gw.getURLWatchers()
|
||||||
|
for _, uw := range uws {
|
||||||
|
uw.registerPendingAPIWatchers()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (gw *groupWatcher) getURLWatchers() []*urlWatcher {
|
||||||
gw.mu.Lock()
|
gw.mu.Lock()
|
||||||
|
uws := make([]*urlWatcher, 0, len(gw.m))
|
||||||
for _, uw := range gw.m {
|
for _, uw := range gw.m {
|
||||||
uw.unsubscribeAPIWatcher(aw)
|
uws = append(uws, uw)
|
||||||
}
|
}
|
||||||
gw.mu.Unlock()
|
gw.mu.Unlock()
|
||||||
|
return uws
|
||||||
|
}
|
||||||
|
|
||||||
|
func (gw *groupWatcher) unsubscribeAPIWatcher(aw *apiWatcher) {
|
||||||
|
uws := gw.getURLWatchers()
|
||||||
|
for _, uw := range uws {
|
||||||
|
uw.unsubscribeAPIWatcher(aw)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// urlWatcher watches for an apiURL and updates object states in objectsByKey.
|
// urlWatcher watches for an apiURL and updates object states in objectsByKey.
|
||||||
|
@ -344,9 +365,16 @@ type urlWatcher struct {
|
||||||
parseObject parseObjectFunc
|
parseObject parseObjectFunc
|
||||||
parseObjectList parseObjectListFunc
|
parseObjectList parseObjectListFunc
|
||||||
|
|
||||||
// mu protects aws, objectsByKey and resourceVersion
|
// mu protects aws, awsPending, objectsByKey and resourceVersion
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
|
||||||
|
// awsPending contains pending apiWatcher objects, which are registered in a batch.
|
||||||
|
// Batch registering saves CPU time needed for registering big number of Kubernetes objects
|
||||||
|
// shared among big number of scrape jobs, since per-object labels are generated only once
|
||||||
|
// for all the scrape jobs (each scrape job is associated with a single apiWatcher).
|
||||||
|
// See reloadScrapeWorksForAPIWatchers for details.
|
||||||
|
awsPending map[*apiWatcher]struct{}
|
||||||
|
|
||||||
// aws contains registered apiWatcher objects
|
// aws contains registered apiWatcher objects
|
||||||
aws map[*apiWatcher]struct{}
|
aws map[*apiWatcher]struct{}
|
||||||
|
|
||||||
|
@ -374,6 +402,7 @@ func newURLWatcher(role, namespace, apiURL string, gw *groupWatcher) *urlWatcher
|
||||||
parseObject: parseObject,
|
parseObject: parseObject,
|
||||||
parseObjectList: parseObjectList,
|
parseObjectList: parseObjectList,
|
||||||
|
|
||||||
|
awsPending: make(map[*apiWatcher]struct{}),
|
||||||
aws: make(map[*apiWatcher]struct{}),
|
aws: make(map[*apiWatcher]struct{}),
|
||||||
objectsByKey: make(map[string]object),
|
objectsByKey: make(map[string]object),
|
||||||
|
|
||||||
|
@ -388,30 +417,38 @@ func newURLWatcher(role, namespace, apiURL string, gw *groupWatcher) *urlWatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
func (uw *urlWatcher) subscribeAPIWatcher(aw *apiWatcher) {
|
func (uw *urlWatcher) subscribeAPIWatcher(aw *apiWatcher) {
|
||||||
if aw == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
uw.mu.Lock()
|
uw.mu.Lock()
|
||||||
if _, ok := uw.aws[aw]; !ok {
|
if _, ok := uw.aws[aw]; !ok {
|
||||||
swosByKey := make(map[string][]interface{})
|
if _, ok := uw.awsPending[aw]; !ok {
|
||||||
for key, o := range uw.objectsByKey {
|
uw.awsPending[aw] = struct{}{}
|
||||||
labels := o.getTargetLabels(uw.gw)
|
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_discovery_kubernetes_subscribers{role=%q,status="pending"}`, uw.role)).Inc()
|
||||||
swos := aw.getScrapeWorkObjectsForLabels(labels)
|
|
||||||
if len(swos) > 0 {
|
|
||||||
swosByKey[key] = swos
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
aw.reloadScrapeWorks(uw.namespace, swosByKey)
|
|
||||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_discovery_kubernetes_subscibers{role=%q}`, uw.role)).Inc()
|
|
||||||
}
|
|
||||||
uw.mu.Unlock()
|
uw.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (uw *urlWatcher) registerPendingAPIWatchers() {
|
||||||
|
uw.mu.Lock()
|
||||||
|
awsPending := make([]*apiWatcher, 0, len(uw.awsPending))
|
||||||
|
for aw := range uw.awsPending {
|
||||||
|
awsPending = append(awsPending, aw)
|
||||||
|
delete(uw.awsPending, aw)
|
||||||
|
}
|
||||||
|
uw.reloadScrapeWorksForAPIWatchers(awsPending, uw.objectsByKey)
|
||||||
|
uw.mu.Unlock()
|
||||||
|
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_discovery_kubernetes_subscribers{role=%q,status="working"}`, uw.role)).Add(len(awsPending))
|
||||||
|
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_discovery_kubernetes_subscribers{role=%q,status="pending"}`, uw.role)).Add(-len(awsPending))
|
||||||
|
}
|
||||||
|
|
||||||
func (uw *urlWatcher) unsubscribeAPIWatcher(aw *apiWatcher) {
|
func (uw *urlWatcher) unsubscribeAPIWatcher(aw *apiWatcher) {
|
||||||
uw.mu.Lock()
|
uw.mu.Lock()
|
||||||
|
if _, ok := uw.awsPending[aw]; ok {
|
||||||
|
delete(uw.awsPending, aw)
|
||||||
|
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_discovery_kubernetes_subscribers{role=%q,status="pending"}`, uw.role)).Dec()
|
||||||
|
}
|
||||||
if _, ok := uw.aws[aw]; ok {
|
if _, ok := uw.aws[aw]; ok {
|
||||||
delete(uw.aws, aw)
|
delete(uw.aws, aw)
|
||||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_discovery_kubernetes_subscibers{role=%q}`, uw.role)).Dec()
|
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_discovery_kubernetes_subscribers{role=%q,status="working"}`, uw.role)).Dec()
|
||||||
}
|
}
|
||||||
uw.mu.Unlock()
|
uw.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,9 @@ type SDConfig struct {
|
||||||
ProxyURL proxy.URL `yaml:"proxy_url,omitempty"`
|
ProxyURL proxy.URL `yaml:"proxy_url,omitempty"`
|
||||||
Namespaces Namespaces `yaml:"namespaces,omitempty"`
|
Namespaces Namespaces `yaml:"namespaces,omitempty"`
|
||||||
Selectors []Selector `yaml:"selectors,omitempty"`
|
Selectors []Selector `yaml:"selectors,omitempty"`
|
||||||
|
|
||||||
|
cfg *apiConfig
|
||||||
|
startErr error
|
||||||
}
|
}
|
||||||
|
|
||||||
// Namespaces represents namespaces for SDConfig
|
// Namespaces represents namespaces for SDConfig
|
||||||
|
@ -37,23 +40,33 @@ type Selector struct {
|
||||||
// ScrapeWorkConstructorFunc must construct ScrapeWork object for the given metaLabels.
|
// ScrapeWorkConstructorFunc must construct ScrapeWork object for the given metaLabels.
|
||||||
type ScrapeWorkConstructorFunc func(metaLabels map[string]string) interface{}
|
type ScrapeWorkConstructorFunc func(metaLabels map[string]string) interface{}
|
||||||
|
|
||||||
// GetScrapeWorkObjects returns ScrapeWork objects for the given sdc and baseDir.
|
// GetScrapeWorkObjects returns ScrapeWork objects for the given sdc.
|
||||||
|
//
|
||||||
|
// This function must be called after MustStart call.
|
||||||
|
func (sdc *SDConfig) GetScrapeWorkObjects() ([]interface{}, error) {
|
||||||
|
if sdc.cfg == nil {
|
||||||
|
return nil, sdc.startErr
|
||||||
|
}
|
||||||
|
return sdc.cfg.aw.getScrapeWorkObjects(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustStart initializes sdc before its usage.
|
||||||
//
|
//
|
||||||
// swcFunc is used for constructing such objects.
|
// swcFunc is used for constructing such objects.
|
||||||
func (sdc *SDConfig) GetScrapeWorkObjects(baseDir string, swcFunc ScrapeWorkConstructorFunc) ([]interface{}, error) {
|
func (sdc *SDConfig) MustStart(baseDir string, swcFunc ScrapeWorkConstructorFunc) {
|
||||||
cfg, err := getAPIConfig(sdc, baseDir, swcFunc)
|
cfg, err := newAPIConfig(sdc, baseDir, swcFunc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("cannot create API config: %w", err)
|
sdc.startErr = fmt.Errorf("cannot create API config for kubernetes: %w", err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
return cfg.aw.getScrapeWorkObjects(), nil
|
cfg.aw.mustStart()
|
||||||
|
sdc.cfg = cfg
|
||||||
}
|
}
|
||||||
|
|
||||||
// MustStop stops further usage for sdc.
|
// MustStop stops further usage for sdc.
|
||||||
func (sdc *SDConfig) MustStop() {
|
func (sdc *SDConfig) MustStop() {
|
||||||
v := configMap.Delete(sdc)
|
if sdc.cfg != nil {
|
||||||
if v != nil {
|
// sdc.cfg can be nil on MustStart error.
|
||||||
// v can be nil if GetLabels wasn't called yet.
|
sdc.cfg.mustStop()
|
||||||
cfg := v.(*apiConfig)
|
|
||||||
cfg.mustStop()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,6 +93,7 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Fatalf("cannot read %q: %s", configFile, err)
|
logger.Fatalf("cannot read %q: %s", configFile, err)
|
||||||
}
|
}
|
||||||
|
cfg.mustStart()
|
||||||
|
|
||||||
scs := newScrapeConfigs(pushData)
|
scs := newScrapeConfigs(pushData)
|
||||||
scs.add("static_configs", 0, func(cfg *Config, swsPrev []*ScrapeWork) []*ScrapeWork { return cfg.getStaticScrapeWork() })
|
scs.add("static_configs", 0, func(cfg *Config, swsPrev []*ScrapeWork) []*ScrapeWork { return cfg.getStaticScrapeWork() })
|
||||||
|
@ -130,6 +131,7 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
|
||||||
goto waitForChans
|
goto waitForChans
|
||||||
}
|
}
|
||||||
cfg.mustStop()
|
cfg.mustStop()
|
||||||
|
cfgNew.mustStart()
|
||||||
cfg = cfgNew
|
cfg = cfgNew
|
||||||
data = dataNew
|
data = dataNew
|
||||||
case <-tickerCh:
|
case <-tickerCh:
|
||||||
|
@ -143,6 +145,7 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
|
||||||
goto waitForChans
|
goto waitForChans
|
||||||
}
|
}
|
||||||
cfg.mustStop()
|
cfg.mustStop()
|
||||||
|
cfgNew.mustStart()
|
||||||
cfg = cfgNew
|
cfg = cfgNew
|
||||||
data = dataNew
|
data = dataNew
|
||||||
case <-globalStopCh:
|
case <-globalStopCh:
|
||||||
|
|
Loading…
Reference in a new issue