mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
app/vmauth: properly release memory during config reload (#5805)
* app/vmauth: properly release memory during config reload previously metrics package hold a refrence for channels for users concurrent requests. it case of churn at `name` field of users configuration, new metric was created. But previous one wasn't deleted. It prevented full parsed configuration from being garbace collected. now all config related metrics are bound to corresponding metrics.Set and unregistered during config reload process. It also must fix an issue with incorrect values for current concurrent user requests https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4690 * wip --------- Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
parent
89aceb2b2f
commit
0a2cc0e873
2 changed files with 28 additions and 15 deletions
|
@ -43,6 +43,9 @@ var (
|
|||
type AuthConfig struct {
|
||||
Users []UserInfo `yaml:"users,omitempty"`
|
||||
UnauthorizedUser *UserInfo `yaml:"unauthorized_user,omitempty"`
|
||||
|
||||
// ms holds all the metrics for the given AuthConfig
|
||||
ms *metrics.Set
|
||||
}
|
||||
|
||||
// UserInfo is user information read from authConfigPath
|
||||
|
@ -503,6 +506,11 @@ func loadAuthConfig() (bool, error) {
|
|||
}
|
||||
logger.Infof("loaded information about %d users from -auth.config=%q", len(m), *authConfigPath)
|
||||
|
||||
prevAc := authConfig.Load()
|
||||
if prevAc != nil {
|
||||
metrics.UnregisterSet(prevAc.ms)
|
||||
}
|
||||
metrics.RegisterSet(ac.ms)
|
||||
authConfig.Store(ac)
|
||||
authConfigData.Store(&data)
|
||||
authUsers.Store(&m)
|
||||
|
@ -515,10 +523,13 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot expand environment vars: %w", err)
|
||||
}
|
||||
var ac AuthConfig
|
||||
if err = yaml.UnmarshalStrict(data, &ac); err != nil {
|
||||
ac := &AuthConfig{
|
||||
ms: metrics.NewSet(),
|
||||
}
|
||||
if err = yaml.UnmarshalStrict(data, ac); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal AuthConfig data: %w", err)
|
||||
}
|
||||
|
||||
ui := ac.UnauthorizedUser
|
||||
if ui != nil {
|
||||
if ui.Username != "" {
|
||||
|
@ -541,15 +552,15 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err)
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
||||
ui.backendErrors = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
||||
ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels)
|
||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||
ui.concurrencyLimitReached = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_concurrent_requests_limit_reached_total` + metricLabels)
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
||||
ui.concurrencyLimitReached = ac.ms.NewCounter(`vmauth_unauthorized_user_concurrent_requests_limit_reached_total` + metricLabels)
|
||||
_ = ac.ms.NewGauge(`vmauth_unauthorized_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
||||
return float64(cap(ui.concurrencyLimitCh))
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_current`+metricLabels, func() float64 {
|
||||
_ = ac.ms.NewGauge(`vmauth_unauthorized_user_concurrent_requests_current`+metricLabels, func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
|
||||
|
@ -559,7 +570,7 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
|||
}
|
||||
ui.httpTransport = tr
|
||||
}
|
||||
return &ac, nil
|
||||
return ac, nil
|
||||
}
|
||||
|
||||
func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
||||
|
@ -601,16 +612,16 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse metric_labels: %w", err)
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.backendErrors = metrics.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||
ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels)
|
||||
ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels)
|
||||
ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels)
|
||||
mcr := ui.getMaxConcurrentRequests()
|
||||
ui.concurrencyLimitCh = make(chan struct{}, mcr)
|
||||
ui.concurrencyLimitReached = metrics.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
||||
ui.concurrencyLimitReached = ac.ms.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels)
|
||||
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 {
|
||||
return float64(cap(ui.concurrencyLimitCh))
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
|
||||
_ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
|
||||
|
|
|
@ -46,6 +46,8 @@ The sandbox cluster installation is running under the constant load generated by
|
|||
* FEATURE: [vmalert](https://docs.victoriametrics.com/#vmalert): support [filtering](https://prometheus.io/docs/prometheus/2.49/querying/api/#rules) for `/api/v1/rules` API. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5749) by @victoramsantos.
|
||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): support client-side TLS configuration for creating and deleting snapshots via `-snapshot.tls*` cmd-line flags. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5724). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5738).
|
||||
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth.html): properly release memory during config reload. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4690).
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth.html): properly expose `vmauth_unauthorized_user_concurrent_requests_capacity`, `vmauth_unauthorized_user_concurrent_requests_current`, `vmauth_user_concurrent_requests_capacity` and `vmauth_user_concurrent_requests_current` [metrics](https://docs.victoriametrics.com/vmauth.html#monitoring) after [config reload](https://docs.victoriametrics.com/vmauth.html#config-reload). Previously these metrics didn't work after config reload.
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly propagate [label filters](https://docs.victoriametrics.com/keyconcepts/#filtering) from multiple arguments passed to [aggregate functions](https://docs.victoriametrics.com/metricsql/#aggregate-functions). For example, `sum({job="foo"}, {job="bar"}) by (job) + a` was improperly optimized to `sum({job="foo"}, {job="bar"}) by (job) + a{job="foo"}` before being executed. This could lead to unexpected results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5604).
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle precision errors when calculating [changes](https://docs.victoriametrics.com/metricsql/#changes), [changes_prometheus](https://docs.victoriametrics.com/metricsql/#changes_prometheus), [increases_over_time](https://docs.victoriametrics.com/metricsql/#increases_over_time) and [resets](https://docs.victoriametrics.com/metricsql/#resets) functions. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/767).
|
||||
* BUGFIX: all VictoriaMetrics components: consistently return 200 http status code from [`/-/reload` endpoint](https://docs.victoriametrics.com/vmagent/#configuration-update). Previously [single-node VictoriaMetrics](https://docs.victoriametrics.com/) was returning 204 http status code. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5774).
|
||||
|
|
Loading…
Reference in a new issue