Merge branch 'public-single-node' into pmm-6401-read-prometheus-data-files

This commit is contained in:
Aliaksandr Valialkin 2022-05-21 02:27:04 +03:00
commit 993ecbb141
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
11 changed files with 155 additions and 73 deletions

View file

@ -1918,8 +1918,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum number of CPU cores to use for small merges. Default value is used if set to 0
-snapshotAuthKey string
authKey, which must be passed in query string to /snapshot* pages
-snapshotsMaxAge duration
-snapshotsMaxAge value
Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
-sortLabels
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
-storage.cacheSizeIndexDBDataBlocks size

View file

@ -926,6 +926,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-remoteWrite.aws.secretKey array
Optional AWS SecretKey to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.aws.serice array
Optional AWS Service to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url. Defaults to "aps".
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.aws.useSigv4 array
Enables SigV4 request signing for -remoteWrite.url. It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
Supports array of values separated by comma or specified via multiple flags.

View file

@ -605,7 +605,7 @@ The shortlist of configuration flags is the following:
-datasource.tlsServerName string
Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used
-datasource.url string
VictoriaMetrics or vmselect url. Required parameter. E.g. http://127.0.0.1:8428
VictoriaMetrics or vmselect url. Required parameter. E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend
-defaultTenant.graphite string
Default tenant for Graphite alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy
-defaultTenant.prometheus string
@ -748,7 +748,7 @@ The shortlist of configuration flags is the following:
-remoteRead.bearerTokenFile string
Optional path to bearer token file to use for -remoteRead.url.
-remoteRead.disablePathAppend
Whether to disable automatic appending of '/api/v1/query' path to the configured -remoteRead.url.
Whether to disable automatic appending of '/api/v1/query' path to the configured -datasource.url and -remoteRead.url
-remoteRead.ignoreRestoreErrors
Whether to ignore errors from remote storage when restoring alerts state on startup. (default true)
-remoteRead.lookback duration
@ -817,6 +817,8 @@ The shortlist of configuration flags is the following:
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used
-remoteWrite.url string
Optional URL to VictoriaMetrics or vminsert where to persist alerts state and recording rules results in form of timeseries. For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend
-replay.disableProgressBar
Whether to disable rendering progress bars during the replay. Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.
-replay.maxDatapointsPerQuery int
Max number of data points expected in one request. The higher the value, the less requests will be made during replay. (default 1000)
-replay.ruleRetryAttempts int
@ -836,17 +838,20 @@ The shortlist of configuration flags is the following:
absolute path to all .yaml files in root.
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
Supports an array of values separated by comma or specified via multiple flags.
-rule.templates
Path or glob pattern to location with go template definitions for rules annotations templating. Flag can be specified multiple times.
Examples:
-rule.templates="/path/to/file". Path to a single file with go templates
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder, absolute path to all .tpl files in root.
-rule.configCheckInterval duration
Interval for checking for changes in '-rule' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead
-rule.maxResolveDuration duration
Limits the maximum duration for automatic alert expiration, which is by default equal to 3 evaluation intervals of the parent group.
-rule.resendDelay duration
Minimum amount of time to wait before resending an alert to notifier
-rule.templates array
Path or glob pattern to location with go template definitions
for rules annotations templating. Flag can be specified multiple times.
Examples:
-rule.templates="/path/to/file". Path to a single file with go templates
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
absolute path to all .tpl files in root.
Supports an array of values separated by comma or specified via multiple flags.
-rule.validateExpressions
Whether to validate rules expressions via MetricsQL engine (default true)
-rule.validateTemplates

View file

@ -49,14 +49,21 @@ type groupMetrics struct {
iterationTotal *utils.Counter
iterationDuration *utils.Summary
iterationMissed *utils.Counter
iterationInterval *utils.Gauge
}
func newGroupMetrics(name, file string) *groupMetrics {
func newGroupMetrics(g *Group) *groupMetrics {
m := &groupMetrics{}
labels := fmt.Sprintf(`group=%q, file=%q`, name, file)
labels := fmt.Sprintf(`group=%q, file=%q`, g.Name, g.File)
m.iterationTotal = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels))
m.iterationDuration = utils.GetOrCreateSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels))
m.iterationMissed = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_missed_total{%s}`, labels))
m.iterationInterval = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_iteration_interval_seconds{%s}`, labels), func() float64 {
g.mu.RLock()
i := g.Interval.Seconds()
g.mu.RUnlock()
return i
})
return m
}
@ -92,13 +99,13 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
finishedCh: make(chan struct{}),
updateCh: make(chan *Group),
}
g.metrics = newGroupMetrics(g.Name, g.File)
if g.Interval == 0 {
g.Interval = defaultInterval
}
if g.Concurrency < 1 {
g.Concurrency = 1
}
g.metrics = newGroupMetrics(g)
rules := make([]Rule, len(cfg.Rules))
for i, r := range cfg.Rules {
var extraLabels map[string]string
@ -222,6 +229,8 @@ func (g *Group) close() {
g.metrics.iterationDuration.Unregister()
g.metrics.iterationTotal.Unregister()
g.metrics.iterationMissed.Unregister()
g.metrics.iterationInterval.Unregister()
for _, rule := range g.Rules {
rule.Close()
}

View file

@ -15,18 +15,24 @@ The following tip changes can be tested by building VictoriaMetrics components f
## tip
## [v1.77.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.77.2)
Released at 21-05-2022
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): support [reusable templates](https://prometheus.io/docs/prometheus/latest/configuration/template_examples/#defining-reusable-templates) for rules annotations. The path to the template files can be specified via `-rule.templates` flag. See more about this feature [here](https://docs.victoriametrics.com/vmalert.html#reusable-templates). Thanks to @AndrewChubatiuk for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2532). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2510).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): expose `vmalert_iteration_interval_seconds` metric at `http://vmalert:8880/metrics`. This metric shows the configured per-group evaluation interval. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2618).
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add `influx-prometheus-mode` command-line flag, which allows to restore the original time series written from Prometheus into InfluxDB during data migration from InfluxDB to VictoriaMetrics. See [this feature request](https://github.com/VictoriaMetrics/vmctl/issues/8). Thanks to @mback2k for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2545).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add ability to specify AWS service name when issuing requests to AWS api. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2605). Thanks to @transacid for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2604).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): fix a bug, which could lead to incomplete discovery of scrape targets in Kubernetes (aka `kubernetes_sd_config`). the bug has been introduced in [v1.77.0](https://docs.victoriametrics.com/CHANGELOG.html#v1770).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): support `scalar` result type in response. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2607).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): support strings in `humanize.*` template function in the same way as Prometheus does. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2569).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): proxy `/rules` requests to vmalert from Grafana's alerting UI. This removes errors in Grafana's UI for Grafana versions older than `8.5.*`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2583)
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): do not add `/api/v1/query` suffix to `-datasource.url` if `-remoteRead.disablePathAppend` command-line flag is set. Previously this flag was applied only to `-remoteRead.url`, which could confuse users.
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): prevent from possible resource leak on config update, which could lead to the slowdown of `vmalert` over time. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2577).
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): do not return values from [label_value()](https://docs.victoriametrics.com/MetricsQL.html#label_value) function if the original time series has no values at the selected timestamps.
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): limit the number of concurrently established connections from vmselect to vmstorage. This should prevent from potentially high spikes in the number of established connections after temporary slowdown in connection handshake procedure between vmselect and vmstorage because of spikes in workload. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2552).
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): fix build for Solaris / SmartOS. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1322#issuecomment-1120276146).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): do not add `/api/v1/query` suffix to `-datasource.url` if `-remoteRead.disablePathAppend` command-line flag is set. Previously this flag was applied only to `-remoteRead.url`, which could confuse users.
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): prevent from possible resource leak on config update, which could lead to the slowdown of `vmalert` over time. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2577).
## [v1.77.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.77.1)

View file

@ -1918,8 +1918,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum number of CPU cores to use for small merges. Default value is used if set to 0
-snapshotAuthKey string
authKey, which must be passed in query string to /snapshot* pages
-snapshotsMaxAge duration
-snapshotsMaxAge value
Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
-sortLabels
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
-storage.cacheSizeIndexDBDataBlocks size

View file

@ -1922,8 +1922,9 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
The maximum number of CPU cores to use for small merges. Default value is used if set to 0
-snapshotAuthKey string
authKey, which must be passed in query string to /snapshot* pages
-snapshotsMaxAge duration
-snapshotsMaxAge value
Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted
The following optional suffixes are supported: h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 0)
-sortLabels
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
-storage.cacheSizeIndexDBDataBlocks size

View file

@ -9,3 +9,5 @@ sort: 22
3. [HA monitoring setup in K8s via VM Cluster](https://docs.victoriametrics.com/guides/k8s-ha-monitoring-via-vm-cluster.html)
4. [Getting started with VM Operator](https://docs.victoriametrics.com/guides/getting-started-with-vm-operator.html)
5. [Multi Retention Setup within VictoriaMetrics Cluster](https://docs.victoriametrics.com/guides/guide-vmcluster-multiple-retention-setup.html)
6. [Migrate from InfluxDB to VictoriaMetrics](https://docs.victoriametrics.com/guides/migrate-from-influx.html)
7. [Multi-regional setup with VictoriaMetrics: Dedicated regions for monitoring](https://docs.victoriametrics.com/guides/multi-regional-setup-dedicated-regions.html)

View file

@ -930,6 +930,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-remoteWrite.aws.secretKey array
Optional AWS SecretKey to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.aws.serice array
Optional AWS Service to use for -remoteWrite.url if -remoteWrite.aws.useSigv4 is set. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url. Defaults to "aps".
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.aws.useSigv4 array
Enables SigV4 request signing for -remoteWrite.url. It is expected that other -remoteWrite.aws.* command-line flags are set if sigv4 request signing is enabled. If multiple args are set, then they are applied independently for the corresponding -remoteWrite.url
Supports array of values separated by comma or specified via multiple flags.

View file

@ -609,7 +609,7 @@ The shortlist of configuration flags is the following:
-datasource.tlsServerName string
Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used
-datasource.url string
VictoriaMetrics or vmselect url. Required parameter. E.g. http://127.0.0.1:8428
VictoriaMetrics or vmselect url. Required parameter. E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend
-defaultTenant.graphite string
Default tenant for Graphite alerting groups. See https://docs.victoriametrics.com/vmalert.html#multitenancy
-defaultTenant.prometheus string
@ -752,7 +752,7 @@ The shortlist of configuration flags is the following:
-remoteRead.bearerTokenFile string
Optional path to bearer token file to use for -remoteRead.url.
-remoteRead.disablePathAppend
Whether to disable automatic appending of '/api/v1/query' path to the configured -remoteRead.url.
Whether to disable automatic appending of '/api/v1/query' path to the configured -datasource.url and -remoteRead.url
-remoteRead.ignoreRestoreErrors
Whether to ignore errors from remote storage when restoring alerts state on startup. (default true)
-remoteRead.lookback duration
@ -821,6 +821,8 @@ The shortlist of configuration flags is the following:
Optional TLS server name to use for connections to -remoteWrite.url. By default the server name from -remoteWrite.url is used
-remoteWrite.url string
Optional URL to VictoriaMetrics or vminsert where to persist alerts state and recording rules results in form of timeseries. For example, if -remoteWrite.url=http://127.0.0.1:8428 is specified, then the alerts state will be written to http://127.0.0.1:8428/api/v1/write . See also -remoteWrite.disablePathAppend
-replay.disableProgressBar
Whether to disable rendering progress bars during the replay. Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.
-replay.maxDatapointsPerQuery int
Max number of data points expected in one request. The higher the value, the less requests will be made during replay. (default 1000)
-replay.ruleRetryAttempts int
@ -840,17 +842,20 @@ The shortlist of configuration flags is the following:
absolute path to all .yaml files in root.
Rule files may contain %{ENV_VAR} placeholders, which are substituted by the corresponding env vars.
Supports an array of values separated by comma or specified via multiple flags.
-rule.templates
Path or glob pattern to location with go template definitions for rules annotations templating. Flag can be specified multiple times.
Examples:
-rule.templates="/path/to/file". Path to a single file with go templates
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder, absolute path to all .tpl files in root.
-rule.configCheckInterval duration
Interval for checking for changes in '-rule' files. By default the checking is disabled. Send SIGHUP signal in order to force config check for changes. DEPRECATED - see '-configCheckInterval' instead
-rule.maxResolveDuration duration
Limits the maximum duration for automatic alert expiration, which is by default equal to 3 evaluation intervals of the parent group.
-rule.resendDelay duration
Minimum amount of time to wait before resending an alert to notifier
-rule.templates array
Path or glob pattern to location with go template definitions
for rules annotations templating. Flag can be specified multiple times.
Examples:
-rule.templates="/path/to/file". Path to a single file with go templates
-rule.templates="dir/*.tpl" -rule.templates="/*.tpl". Relative path to all .tpl files in "dir" folder,
absolute path to all .tpl files in root.
Supports an array of values separated by comma or specified via multiple flags.
-rule.validateExpressions
Whether to validate rules expressions via MetricsQL engine (default true)
-rule.validateTemplates

View file

@ -55,7 +55,7 @@ type apiWatcher struct {
gw *groupWatcher
// swos contains per-urlWatcher maps of ScrapeWork objects for the given apiWatcher
// swosByURLWatcher contains per-urlWatcher maps of ScrapeWork objects for the given apiWatcher
swosByURLWatcher map[*urlWatcher]map[string][]interface{}
swosByURLWatcherLock sync.Mutex
@ -91,23 +91,51 @@ func (aw *apiWatcher) mustStart() {
aw.gw.startWatchersForRole(aw.role, aw)
}
func (aw *apiWatcher) updateSwosCount(multiplier int, swosByKey map[string][]interface{}) {
n := 0
for _, swos := range swosByKey {
n += len(swos)
}
n *= multiplier
aw.swosCount.Add(n)
}
func (aw *apiWatcher) mustStop() {
aw.gw.unsubscribeAPIWatcher(aw)
aw.swosByURLWatcherLock.Lock()
for _, swosByKey := range aw.swosByURLWatcher {
aw.swosCount.Add(-len(swosByKey))
aw.updateSwosCount(-1, swosByKey)
}
aw.swosByURLWatcher = make(map[*urlWatcher]map[string][]interface{})
aw.swosByURLWatcherLock.Unlock()
}
func (aw *apiWatcher) reloadScrapeWorks(uw *urlWatcher, swosByKey map[string][]interface{}) {
func (aw *apiWatcher) replaceScrapeWorks(uw *urlWatcher, swosByKey map[string][]interface{}) {
aw.swosByURLWatcherLock.Lock()
aw.swosCount.Add(len(swosByKey) - len(aw.swosByURLWatcher[uw]))
aw.updateSwosCount(-1, aw.swosByURLWatcher[uw])
aw.updateSwosCount(1, swosByKey)
aw.swosByURLWatcher[uw] = swosByKey
aw.swosByURLWatcherLock.Unlock()
}
func (aw *apiWatcher) updateScrapeWorks(uw *urlWatcher, swosByKey map[string][]interface{}) {
aw.swosByURLWatcherLock.Lock()
dst := aw.swosByURLWatcher[uw]
if dst == nil {
dst = make(map[string][]interface{})
aw.swosByURLWatcher[uw] = dst
}
for key, swos := range swosByKey {
aw.swosCount.Add(len(swos) - len(dst[key]))
if len(swos) == 0 {
delete(dst, key)
} else {
dst[key] = swos
}
}
aw.swosByURLWatcherLock.Unlock()
}
func (aw *apiWatcher) setScrapeWorks(uw *urlWatcher, key string, labels []map[string]string) {
swos := getScrapeWorkObjectsForLabels(aw.swcFunc, labels)
aw.swosByURLWatcherLock.Lock()
@ -117,10 +145,10 @@ func (aw *apiWatcher) setScrapeWorks(uw *urlWatcher, key string, labels []map[st
aw.swosByURLWatcher[uw] = swosByKey
}
aw.swosCount.Add(len(swos) - len(swosByKey[key]))
if len(swos) > 0 {
swosByKey[key] = swos
} else {
if len(swos) == 0 {
delete(swosByKey, key)
} else {
swosByKey[key] = swos
}
aw.swosByURLWatcherLock.Unlock()
}
@ -250,6 +278,45 @@ var (
})
)
type swosByKeyWithLock struct {
mu sync.Mutex
swosByKey map[string][]interface{}
}
func (gw *groupWatcher) getScrapeWorkObjectsByAPIWatcherLocked(objectsByKey map[string]object, awsMap map[*apiWatcher]struct{}) map[*apiWatcher]*swosByKeyWithLock {
if len(awsMap) == 0 {
return nil
}
swosByAPIWatcher := make(map[*apiWatcher]*swosByKeyWithLock, len(awsMap))
for aw := range awsMap {
swosByAPIWatcher[aw] = &swosByKeyWithLock{
swosByKey: make(map[string][]interface{}),
}
}
// Generate ScrapeWork objects in parallel on available CPU cores.
// This should reduce the time needed for their generation on systems with many CPU cores.
var wg sync.WaitGroup
limiterCh := make(chan struct{}, cgroup.AvailableCPUs())
for key, o := range objectsByKey {
labels := o.getTargetLabels(gw)
wg.Add(1)
limiterCh <- struct{}{}
go func(key string, labels []map[string]string) {
for aw, e := range swosByAPIWatcher {
swos := getScrapeWorkObjectsForLabels(aw.swcFunc, labels)
e.mu.Lock()
e.swosByKey[key] = swos
e.mu.Unlock()
}
wg.Done()
<-limiterCh
}(key, labels)
}
wg.Wait()
return swosByAPIWatcher
}
func (gw *groupWatcher) getObjectByRoleLocked(role, namespace, name string) object {
if role == "node" {
// Node objects have no namespace
@ -310,9 +377,9 @@ func (gw *groupWatcher) startWatchersForRole(role string, aw *apiWatcher) {
time.Sleep(sleepTime)
startTime := time.Now()
gw.mu.Lock()
if uw.needUpdateScrapeWorks {
uw.needUpdateScrapeWorks = false
uw.updateScrapeWorksLocked(uw.objectsByKey, uw.aws)
if uw.needRecreateScrapeWorks {
uw.needRecreateScrapeWorks = false
uw.recreateScrapeWorksLocked(uw.objectsByKey, uw.aws)
sleepTime = time.Since(startTime)
if sleepTime < minSleepTime {
sleepTime = minSleepTime
@ -401,7 +468,7 @@ type urlWatcher struct {
// objectsByKey contains the latest state for objects obtained from apiURL
objectsByKey map[string]object
needUpdateScrapeWorks bool
needRecreateScrapeWorks bool
resourceVersion string
@ -450,7 +517,7 @@ func (uw *urlWatcher) registerPendingAPIWatchersLocked() {
if len(uw.awsPending) == 0 {
return
}
uw.updateScrapeWorksLocked(uw.objectsByKey, uw.awsPending)
uw.recreateScrapeWorksLocked(uw.objectsByKey, uw.awsPending)
for aw := range uw.awsPending {
uw.aws[aw] = struct{}{}
}
@ -460,44 +527,23 @@ func (uw *urlWatcher) registerPendingAPIWatchersLocked() {
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_discovery_kubernetes_subscribers{role=%q,status="pending"}`, uw.role)).Add(-awsPendingLen)
}
func (uw *urlWatcher) updateScrapeWorksLocked(objectsByKey map[string]object, awsMap map[*apiWatcher]struct{}) {
if len(objectsByKey) == 0 || len(awsMap) == 0 {
return
}
aws := make([]*apiWatcher, 0, len(awsMap))
for aw := range awsMap {
aws = append(aws, aw)
}
swosByKey := make([]map[string][]interface{}, len(aws))
for i := range aws {
swosByKey[i] = make(map[string][]interface{})
}
// Generate ScrapeWork objects in parallel on available CPU cores.
// This should reduce the time needed for their generation on systems with many CPU cores.
var swosByKeyLock sync.Mutex
var wg sync.WaitGroup
limiterCh := make(chan struct{}, cgroup.AvailableCPUs())
for key, o := range objectsByKey {
labels := o.getTargetLabels(uw.gw)
wg.Add(1)
limiterCh <- struct{}{}
go func(key string, labels []map[string]string) {
for i, aw := range aws {
swos := getScrapeWorkObjectsForLabels(aw.swcFunc, labels)
if len(swos) > 0 {
swosByKeyLock.Lock()
swosByKey[i][key] = swos
swosByKeyLock.Unlock()
}
func (uw *urlWatcher) recreateScrapeWorksLocked(objectsByKey map[string]object, awsMap map[*apiWatcher]struct{}) {
es := uw.gw.getScrapeWorkObjectsByAPIWatcherLocked(objectsByKey, awsMap)
for aw, e := range es {
swosByKey := e.swosByKey
for key, swos := range swosByKey {
if len(swos) == 0 {
delete(swosByKey, key)
}
wg.Done()
<-limiterCh
}(key, labels)
}
aw.replaceScrapeWorks(uw, swosByKey)
}
wg.Wait()
for i, aw := range aws {
aw.reloadScrapeWorks(uw, swosByKey[i])
}
func (uw *urlWatcher) updateScrapeWorksLocked(objectsByKey map[string]object, awsMap map[*apiWatcher]struct{}) {
es := uw.gw.getScrapeWorkObjectsByAPIWatcherLocked(objectsByKey, awsMap)
for aw, e := range es {
aw.updateScrapeWorks(uw, e.swosByKey)
}
}
@ -574,7 +620,7 @@ func (uw *urlWatcher) reloadObjects() string {
uw.removeScrapeWorksLocked(objectsRemoved)
uw.updateScrapeWorksLocked(objectsUpdated, uw.aws)
uw.updateScrapeWorksLocked(objectsAdded, uw.aws)
uw.needUpdateScrapeWorks = false
uw.needRecreateScrapeWorks = false
if len(objectsRemoved) > 0 || len(objectsUpdated) > 0 || len(objectsAdded) > 0 {
uw.maybeUpdateDependedScrapeWorksLocked()
}
@ -756,12 +802,12 @@ func (uw *urlWatcher) maybeUpdateDependedScrapeWorksLocked() {
}
if (role == "pod" || role == "service") && (uwx.role == "endpoints" || uwx.role == "endpointslice") {
// endpoints and endpointslice objects depend on pods and service objects
uwx.needUpdateScrapeWorks = true
uwx.needRecreateScrapeWorks = true
continue
}
if attachNodeMetadata && role == "node" && uwx.role == "pod" {
// pod objects depend on node objects if attachNodeMetadata is set
uwx.needUpdateScrapeWorks = true
uwx.needRecreateScrapeWorks = true
continue
}
}