lib/promscrape/discovery/kubernetes: fix watcher start order for roles endpoints and endpointslice (#5557)

* lib/promscrape/discovery/kubernetes: fix watcher start order for roles endpoints and endpointslice

Previously the groupWatcher could be mistakenly stopped when requests for pod or services resources take too long.

* remove mislead comment

* docs/sd_configs.md: mention -promscrape.kubernetes.attachNodeMetadataAll flag in the description for attach_metadata section

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4640

* wip

* lib/promscrape/kubernetes: prevent from stopping groupWatcher when there are in-flight apiWatcher.mustStart() calls

groupWatcher is stopped if it has zero registered apiWatchers during 14 seconds.
But such a groupWatcher can be still in use if apiWatcher for `role: endpoints` or `role: endpointslice`
is being registered and the discovery of the associated `pod` and/or `service` objects takes longer
than 14 seconds - see the beginning of groupWatcher.startWatchersForRole() function for details.

Track the number of in-flight calls to apiWatcher.mustStart() and prevent from stopping the associated groupWatcher
if the number of in-flight calls is non-zero.

P.S. postponing the discovery of `pod` and/or `service` objects associated with `endpoints` or `endpointslice` roles
isn't the best solution, since it slows down initial discovery of `endpoints` and `endpointslice` targets.

* typo fix

---------

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
Hui Wang 2024-01-22 05:13:15 +08:00 committed by Aliaksandr Valialkin
parent 885ee160c2
commit 49fa92c1d0
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
2 changed files with 20 additions and 11 deletions

View file

@ -67,6 +67,7 @@ The sandbox cluster installation is running under the constant load generated by
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): retry on import errors in `vm-native` mode. Before, retries happened only on writes into a network connection between source and destination. But errors returned by server after all the data was transmitted were logged, but not retried.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly assume role with [AWS IRSA authorization](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). Previously role chaining was not supported. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822) for details.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): exit if there is config syntax error in [`scrape_config_files`](https://docs.victoriametrics.com/vmagent.html#loading-scrape-configs-from-multiple-files) when `-promscrape.config.strictParse=true`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5508).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover targets for `role: endpoints` and `role: endpointslice` in [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Previously some `endpoints` and `endpointslice` targets could be left undiscovered or some targets could have missing `__meta_*` labels when performing service discovery in busy Kubernetes clusters with large number of pods. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5557).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not store scrape response for target in memory when staleness markers are disabled. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5577) for details.
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix a link for the statistic inaccuracy explanation in the cardinality explorer tool. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5460).
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): send `step` param for instant queries. The change reverts [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896) due to reasons explained in [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896#issuecomment-1896704401).

View file

@ -103,7 +103,9 @@ func newAPIWatcher(apiServer string, ac *promauth.Config, sdc *SDConfig, swcFunc
}
func (aw *apiWatcher) mustStart() {
atomic.AddInt32(&aw.gw.apiWatcherInflightStartCalls, 1)
aw.gw.startWatchersForRole(aw.role, aw)
atomic.AddInt32(&aw.gw.apiWatcherInflightStartCalls, -1)
}
func (aw *apiWatcher) updateSwosCount(multiplier int, swosByKey map[string][]interface{}) {
@ -209,6 +211,10 @@ func (aw *apiWatcher) getScrapeWorkObjects() []interface{} {
// groupWatcher watches for Kubernetes objects on the given apiServer with the given namespaces,
// selectors and attachNodeMetadata using the given client.
type groupWatcher struct {
// The number of in-flight apiWatcher.mustStart() calls for the given groupWatcher.
// This field is used by groupWatchersCleaner() in order to determine when the given groupWatcher can be stopped.
apiWatcherInflightStartCalls int32
// Old Kubernetes doesn't support /apis/networking.k8s.io/v1/, so /apis/networking.k8s.io/v1beta1/ must be used instead.
// This flag is used for automatic substitution of v1 API path with v1beta1 API path during requests to apiServer.
useNetworkingV1Beta1 uint32
@ -309,11 +315,7 @@ func selectorsKey(selectors []Selector) string {
var (
groupWatchersLock sync.Mutex
groupWatchers = func() map[string]*groupWatcher {
gws := make(map[string]*groupWatcher)
go groupWatchersCleaner(gws)
return gws
}()
groupWatchers map[string]*groupWatcher
_ = metrics.NewGauge(`vm_promscrape_discovery_kubernetes_group_watchers`, func() float64 {
groupWatchersLock.Lock()
@ -323,11 +325,16 @@ var (
})
)
func groupWatchersCleaner(gws map[string]*groupWatcher) {
func init() {
groupWatchers = make(map[string]*groupWatcher)
go groupWatchersCleaner()
}
func groupWatchersCleaner() {
for {
time.Sleep(7 * time.Second)
groupWatchersLock.Lock()
for key, gw := range gws {
for key, gw := range groupWatchers {
gw.mu.Lock()
// Calculate the number of apiWatcher instances subscribed to gw.
awsTotal := 0
@ -335,14 +342,14 @@ func groupWatchersCleaner(gws map[string]*groupWatcher) {
awsTotal += len(uw.aws) + len(uw.awsPending)
}
if awsTotal == 0 {
// There are no API watchers subscribed to gw.
// Stop all the urlWatcher instances at gw and drop gw from gws in this case,
if awsTotal == 0 && atomic.LoadInt32(&gw.apiWatcherInflightStartCalls) == 0 {
// There are no API watchers subscribed to gw and there are no in-flight apiWatcher.mustStart() calls.
// Stop all the urlWatcher instances at gw and drop gw from groupWatchers in this case,
// but do it only on the second iteration in order to reduce urlWatcher churn
// during scrape config reloads.
if gw.noAPIWatchers {
gw.cancel()
delete(gws, key)
delete(groupWatchers, key)
} else {
gw.noAPIWatchers = true
}
@ -432,6 +439,7 @@ func (gw *groupWatcher) startWatchersForRole(role string, aw *apiWatcher) {
if gw.attachNodeMetadata && (role == "pod" || role == "endpoints" || role == "endpointslice") {
gw.startWatchersForRole("node", nil)
}
paths := getAPIPathsWithNamespaces(role, gw.namespaces, gw.selectors)
for _, path := range paths {
apiURL := gw.apiServer + path