lib/promscrape/discovery: close unused HTTP connections to service discovery servers

This should prevent from connection leaks

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4724
This commit is contained in:
Aliaksandr Valialkin 2023-07-27 14:47:53 -07:00
parent 85de94e85c
commit 1f30f53df2
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
16 changed files with 60 additions and 8 deletions

View file

@ -88,6 +88,7 @@ The previous behavior can be restored in the following ways:
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): correctly calculate evaluation time for rules. Before, there was a low probability for discrepancy between actual time and rules evaluation time if evaluation interval was lower than the execution time for rules within the group.
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): reset evaluation timestamp after modifying group interval. Before, there could have latency on rule evaluation time.
* BUGFIX: vmselect: fix timestamp alignment for Prometheus querying API if time argument is less than 10m from the beginning of Unix epoch.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): close HTTP connections to [service discovery](https://docs.victoriametrics.com/sd_configs.html) servers when they are no longer needed. This should prevent from possible connection exhasution in some cases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4724).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not show [relabel debug](https://docs.victoriametrics.com/vmagent.html#relabel-debug) links at the `/targets` page when `vmagent` runs with `-promscrape.dropOriginalLabels` command-line flag, since it has no the original labels needed for relabel debug. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4597).
* BUGFIX: vminsert: fixed decoding of label values with slash when accepting data via [pushgateway protocol](https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format). This fixes Prometheus golang client compatibility. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4692).
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly parse binary operations with reserved words on the right side such as `foo + (on{bar="baz"})`. Previously such queries could lead to panic. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4422).

View file

@ -58,7 +58,11 @@ func (sdc *SDConfig) GetLabels(baseDir string) ([]*promutils.Labels, error) {
// MustStop stops further usage for sdc.
func (sdc *SDConfig) MustStop() {
configMap.Delete(sdc)
v := configMap.Delete(sdc)
if v != nil {
cfg := v.(*apiConfig)
cfg.c.Stop()
}
}
func appendMachineLabels(vms []virtualMachine, port int, sdc *SDConfig) []*promutils.Labels {

View file

@ -71,6 +71,7 @@ func TestGetVirtualMachinesSuccess(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error at client create: %s", err)
}
defer c.Stop()
ac := &apiConfig{
c: c,
subscriptionID: "some-id",

View file

@ -90,6 +90,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
}
dc, err := getDatacenter(client, sdc.Datacenter)
if err != nil {
client.Stop()
return nil, fmt.Errorf("cannot obtain consul datacenter: %w", err)
}

View file

@ -84,6 +84,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
}
agent, err := consul.GetAgentInfo(client)
if err != nil {
client.Stop()
return nil, fmt.Errorf("cannot obtain consul datacenter: %w", err)
}
dc := sdc.Datacenter

View file

@ -155,5 +155,9 @@ func addDropletLabels(droplets []droplet, defaultPort int) []*promutils.Labels {
// MustStop stops further usage for sdc.
func (sdc *SDConfig) MustStop() {
configMap.Delete(sdc)
v := configMap.Delete(sdc)
if v != nil {
cfg := v.(*apiConfig)
cfg.client.Stop()
}
}

View file

@ -47,5 +47,9 @@ func (sdc *SDConfig) GetLabels(baseDir string) ([]*promutils.Labels, error) {
// MustStop stops further usage for sdc.
func (sdc *SDConfig) MustStop() {
configMap.Delete(sdc)
v := configMap.Delete(sdc)
if v != nil {
cfg := v.(*apiConfig)
cfg.client.Stop()
}
}

View file

@ -56,5 +56,9 @@ func (sdc *SDConfig) GetLabels(baseDir string) ([]*promutils.Labels, error) {
// MustStop stops further usage for sdc.
func (sdc *SDConfig) MustStop() {
configMap.Delete(sdc)
v := configMap.Delete(sdc)
if v != nil {
cfg := v.(*apiConfig)
cfg.client.Stop()
}
}

View file

@ -101,7 +101,11 @@ func (sdc *SDConfig) GetLabels(baseDir string) ([]*promutils.Labels, error) {
// MustStop stops further usage for sdc.
func (sdc *SDConfig) MustStop() {
configMap.Delete(sdc)
v := configMap.Delete(sdc)
if v != nil {
cfg := v.(*apiConfig)
cfg.client.Stop()
}
}
func addInstanceLabels(apps *applications) []*promutils.Labels {

View file

@ -42,6 +42,7 @@ func newAPIConfig(sdc *SDConfig) (*apiConfig, error) {
if len(project) == 0 {
proj, err := getCurrentProject()
if err != nil {
client.CloseIdleConnections()
return nil, fmt.Errorf("cannot determine the current project; make sure `vmagent` runs inside GCE; error: %w", err)
}
project = proj
@ -52,6 +53,7 @@ func newAPIConfig(sdc *SDConfig) (*apiConfig, error) {
// Autodetect the current zone.
zone, err := getCurrentZone()
if err != nil {
client.CloseIdleConnections()
return nil, fmt.Errorf("cannot determine the current zone; make sure `vmagent` runs inside GCE; error: %w", err)
}
zones = append(zones, zone)
@ -62,6 +64,7 @@ func newAPIConfig(sdc *SDConfig) (*apiConfig, error) {
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3202
zs, err := getZonesForProject(client, project)
if err != nil {
client.CloseIdleConnections()
return nil, fmt.Errorf("cannot obtain zones for project %q: %w", project, err)
}
zones = zs

View file

@ -73,5 +73,9 @@ func (sdc *SDConfig) GetLabels(baseDir string) ([]*promutils.Labels, error) {
// MustStop stops further usage for sdc.
func (sdc *SDConfig) MustStop() {
configMap.Delete(sdc)
v := configMap.Delete(sdc)
if v != nil {
cfg := v.(*apiConfig)
cfg.client.CloseIdleConnections()
}
}

View file

@ -57,5 +57,9 @@ func addHTTPTargetLabels(src []httpGroupTarget, sourceURL string) []*promutils.L
// MustStop stops further usage for sdc.
func (sdc *SDConfig) MustStop() {
configMap.Delete(sdc)
v := configMap.Delete(sdc)
if v != nil {
cfg := v.(*apiConfig)
cfg.client.Stop()
}
}

View file

@ -80,6 +80,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
// The synchronous targets' update is needed for returning non-empty list of targets
// just after the initialization.
if err := cfg.updateTargetsLabels(ctx); err != nil {
client.Stop()
return nil, fmt.Errorf("cannot discover Kuma targets: %w", err)
}
cfg.wg.Add(1)

View file

@ -91,6 +91,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
}
ac, err := opts.NewConfig()
if err != nil {
cfg.client.CloseIdleConnections()
return nil, err
}
cfg.client.Transport = &http.Transport{
@ -111,6 +112,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
sdcAuth = readCredentialsFromEnv()
}
if strings.HasSuffix(sdcAuth.IdentityEndpoint, "v2.0") {
cfg.client.CloseIdleConnections()
return nil, errors.New("identity_endpoint v2.0 is not supported")
}
// trim .0 from v3.0 for prometheus cfg compatibility
@ -118,11 +120,13 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
parsedURL, err := url.Parse(sdcAuth.IdentityEndpoint)
if err != nil {
cfg.client.CloseIdleConnections()
return nil, fmt.Errorf("cannot parse identity_endpoint: %s as url, err: %w", sdcAuth.IdentityEndpoint, err)
}
cfg.endpoint = parsedURL
tokenReq, err := buildAuthRequestBody(&sdcAuth)
if err != nil {
cfg.client.CloseIdleConnections()
return nil, err
}
cfg.authTokenReq = tokenReq

View file

@ -57,5 +57,9 @@ func (sdc *SDConfig) GetLabels(baseDir string) ([]*promutils.Labels, error) {
// MustStop stops further usage for sdc.
func (sdc *SDConfig) MustStop() {
configMap.Delete(sdc)
v := configMap.Delete(sdc)
if v != nil {
cfg := v.(*apiConfig)
cfg.client.CloseIdleConnections()
}
}

View file

@ -81,6 +81,12 @@ type HTTPClient struct {
ReadTimeout time.Duration
}
func (hc *HTTPClient) stop() {
// Close idle connections to server in order to free up resources.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4724
hc.client.CloseIdleConnections()
}
var defaultDialer = &net.Dialer{}
// NewClient returns new Client for the given args.
@ -276,6 +282,8 @@ func (c *Client) APIServer() string {
// Stop cancels all in-flight requests
func (c *Client) Stop() {
c.clientCancel()
c.client.stop()
c.blockingClient.stop()
}
func doRequestWithPossibleRetry(hc *HTTPClient, req *http.Request) (*http.Response, error) {