diff --git a/app/vmauth/auth_config.go b/app/vmauth/auth_config.go index cf8399a2f..43ab7cb6f 100644 --- a/app/vmauth/auth_config.go +++ b/app/vmauth/auth_config.go @@ -43,6 +43,9 @@ var ( type AuthConfig struct { Users []UserInfo `yaml:"users,omitempty"` UnauthorizedUser *UserInfo `yaml:"unauthorized_user,omitempty"` + + // ms holds all the metrics for the given AuthConfig + ms *metrics.Set } // UserInfo is user information read from authConfigPath @@ -503,6 +506,11 @@ func loadAuthConfig() (bool, error) { } logger.Infof("loaded information about %d users from -auth.config=%q", len(m), *authConfigPath) + prevAc := authConfig.Load() + if prevAc != nil { + metrics.UnregisterSet(prevAc.ms) + } + metrics.RegisterSet(ac.ms) authConfig.Store(ac) authConfigData.Store(&data) authUsers.Store(&m) @@ -515,10 +523,13 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) { if err != nil { return nil, fmt.Errorf("cannot expand environment vars: %w", err) } - var ac AuthConfig - if err = yaml.UnmarshalStrict(data, &ac); err != nil { + ac := &AuthConfig{ + ms: metrics.NewSet(), + } + if err = yaml.UnmarshalStrict(data, ac); err != nil { return nil, fmt.Errorf("cannot unmarshal AuthConfig data: %w", err) } + ui := ac.UnauthorizedUser if ui != nil { if ui.Username != "" { @@ -541,15 +552,15 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) { if err != nil { return nil, fmt.Errorf("cannot parse metric_labels for unauthorized_user: %w", err) } - ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total` + metricLabels) - ui.backendErrors = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels) - ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels) + ui.requests = ac.ms.NewCounter(`vmauth_unauthorized_user_requests_total` + metricLabels) + ui.backendErrors = ac.ms.NewCounter(`vmauth_unauthorized_user_request_backend_errors_total` + metricLabels) + ui.requestsDuration = ac.ms.NewSummary(`vmauth_unauthorized_user_request_duration_seconds` + metricLabels) ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests()) - ui.concurrencyLimitReached = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_concurrent_requests_limit_reached_total` + metricLabels) - _ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_capacity`+metricLabels, func() float64 { + ui.concurrencyLimitReached = ac.ms.NewCounter(`vmauth_unauthorized_user_concurrent_requests_limit_reached_total` + metricLabels) + _ = ac.ms.NewGauge(`vmauth_unauthorized_user_concurrent_requests_capacity`+metricLabels, func() float64 { return float64(cap(ui.concurrencyLimitCh)) }) - _ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_current`+metricLabels, func() float64 { + _ = ac.ms.NewGauge(`vmauth_unauthorized_user_concurrent_requests_current`+metricLabels, func() float64 { return float64(len(ui.concurrencyLimitCh)) }) @@ -559,7 +570,7 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) { } ui.httpTransport = tr } - return &ac, nil + return ac, nil } func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) { @@ -601,16 +612,16 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) { if err != nil { return nil, fmt.Errorf("cannot parse metric_labels: %w", err) } - ui.requests = metrics.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels) - ui.backendErrors = metrics.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels) - ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels) + ui.requests = ac.ms.GetOrCreateCounter(`vmauth_user_requests_total` + metricLabels) + ui.backendErrors = ac.ms.GetOrCreateCounter(`vmauth_user_request_backend_errors_total` + metricLabels) + ui.requestsDuration = ac.ms.GetOrCreateSummary(`vmauth_user_request_duration_seconds` + metricLabels) mcr := ui.getMaxConcurrentRequests() ui.concurrencyLimitCh = make(chan struct{}, mcr) - ui.concurrencyLimitReached = metrics.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels) - _ = metrics.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 { + ui.concurrencyLimitReached = ac.ms.GetOrCreateCounter(`vmauth_user_concurrent_requests_limit_reached_total` + metricLabels) + _ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_capacity`+metricLabels, func() float64 { return float64(cap(ui.concurrencyLimitCh)) }) - _ = metrics.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 { + _ = ac.ms.GetOrCreateGauge(`vmauth_user_concurrent_requests_current`+metricLabels, func() float64 { return float64(len(ui.concurrencyLimitCh)) }) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index b9e66ea7c..be43e1ce4 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -46,6 +46,8 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: [vmalert](https://docs.victoriametrics.com/#vmalert): support [filtering](https://prometheus.io/docs/prometheus/2.49/querying/api/#rules) for `/api/v1/rules` API. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5749) by @victoramsantos. * FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): support client-side TLS configuration for creating and deleting snapshots via `-snapshot.tls*` cmd-line flags. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5724). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5738). +* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth.html): properly release memory during config reload. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4690). +* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth.html): properly expose `vmauth_unauthorized_user_concurrent_requests_capacity`, `vmauth_unauthorized_user_concurrent_requests_current`, `vmauth_user_concurrent_requests_capacity` and `vmauth_user_concurrent_requests_current` [metrics](https://docs.victoriametrics.com/vmauth.html#monitoring) after [config reload](https://docs.victoriametrics.com/vmauth.html#config-reload). Previously these metrics didn't work after config reload. * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly propagate [label filters](https://docs.victoriametrics.com/keyconcepts/#filtering) from multiple arguments passed to [aggregate functions](https://docs.victoriametrics.com/metricsql/#aggregate-functions). For example, `sum({job="foo"}, {job="bar"}) by (job) + a` was improperly optimized to `sum({job="foo"}, {job="bar"}) by (job) + a{job="foo"}` before being executed. This could lead to unexpected results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5604). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle precision errors when calculating [changes](https://docs.victoriametrics.com/metricsql/#changes), [changes_prometheus](https://docs.victoriametrics.com/metricsql/#changes_prometheus), [increases_over_time](https://docs.victoriametrics.com/metricsql/#increases_over_time) and [resets](https://docs.victoriametrics.com/metricsql/#resets) functions. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/767). * BUGFIX: all VictoriaMetrics components: consistently return 200 http status code from [`/-/reload` endpoint](https://docs.victoriametrics.com/vmagent/#configuration-update). Previously [single-node VictoriaMetrics](https://docs.victoriametrics.com/) was returning 204 http status code. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5774).