From 43049503917686e881fe854c04e0b6447b805419 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 16 Jul 2024 17:49:55 +0200 Subject: [PATCH] lib/promscrape/discovery/yandexcloud: follow-up for 070abe5c71c73260972e85bfcbfc4c53c571e68f - Obtain IAM token via GCE-like API instead of Amazon EC2 IMDSv2 API, since it looks like IMDBSv2 API isn't supported by Yandex Cloud according to https://yandex.cloud/en/docs/security/standard/authentication#aws-token : > So far, Yandex Cloud does not support version 2, so it is strongly recommended > to technically disable getting a service account token via the Amazon EC2 metadata service. - Try obtaining IAM token via GCE-like API at first and then fall back to the deprecated Amazon EC2 IMDBSv1. This should prevent from auth errors for instances with disabled GCE-like auth API. This addresses @ITD27M01 concern at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513#issuecomment-1867794884 - Make more clear the description of the change at docs/CHANGELOG.md , add reference to the related issue. P.S. This change wasn't tested in prod because I have no access to Yandex Cloud. It is recommended to test this change by @ITD27M01 and @vmazgo , who filed the issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6524 --- docs/CHANGELOG.md | 2 +- lib/promscrape/discovery/yandexcloud/api.go | 114 ++++++++++++-------- 2 files changed, 68 insertions(+), 48 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 9f287660ad..780333046c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -41,7 +41,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): reduces CPU usage by reusing request body buffer. Allows to disable requests caching with `-maxRequestBodySizeToRetry=0`. See this [PR](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6533) for details. * FEATURE: [dashboards](https://grafana.com/orgs/victoriametrics): add [Grafana dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vmauth.json) and [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmauth.yml) for [vmauth](https://docs.victoriametrics.com/vmauth/) dashboard. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4313) for details. * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): reduces CPU usage by reusing request body buffer. Allows to disable requests caching with `-maxRequestBodySizeToRetry=0`. See this [PR](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6533) for details. -* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): added `yandexcloud_sd` AWS API IMDSv2 support. +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): [`yandexcloud_sd_configs`](https://docs.victoriametrics.com/sd_configs/#yandexcloud_sd_configs): add support for obtaining IAM token in [GCE format](https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm) additionally to the [deprecated Amazon EC2 IMDSv1 format](https://yandex.cloud/en/docs/security/standard/authentication#aws-token). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [Single-node VictoriaMetrics](https://docs.victoriametrics.com/): add `-graphite.sanitizeMetricName` cmd-line flag for sanitizing metrics ingested via [Graphite protocol](https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6077). * FEATURE: [streaming aggregation](https://docs.victoriametrics.com/stream-aggregation/): expose the following metrics at `/metrics` page of [vmagent](https://docs.victoriametrics.com/vmagent/) and [single-node VictoriaMetrics](https://docs.victoriametrics.com/): * `vm_streamaggr_matched_samples_total` - the number of input samples matched by the corresponding aggregation rule diff --git a/lib/promscrape/discovery/yandexcloud/api.go b/lib/promscrape/discovery/yandexcloud/api.go index 3d5d81341b..6ceb5df8d8 100644 --- a/lib/promscrape/discovery/yandexcloud/api.go +++ b/lib/promscrape/discovery/yandexcloud/api.go @@ -7,7 +7,6 @@ import ( "io" "net/http" "net/url" - "strconv" "sync" "time" @@ -37,10 +36,6 @@ type apiConfig struct { // credsLock protects the refresh of creds credsLock sync.Mutex creds *apiCredentials - - // metadataCredsLock protects the refresh of metadataCreds - metadataCredsLock sync.Mutex - metadataCreds *apiCredentials } func getAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { @@ -124,55 +119,80 @@ func getCreds(cfg *apiConfig) (*apiCredentials, error) { }, nil } -// getMetadataCreds gets Yandex Cloud IAM metadata token -func getMetadataCreds(cfg *apiConfig) (*apiCredentials, error) { - cfg.metadataCredsLock.Lock() - defer cfg.metadataCredsLock.Unlock() - - if cfg.metadataCreds != nil && time.Until(cfg.metadataCreds.Expiration) > 10*time.Second { - // Credentials aren't expired yet. - return cfg.metadataCreds, nil - } - - endpoint := "http://169.254.169.254/latest/api/token" - req, err := http.NewRequest(http.MethodPut, endpoint, nil) - if err != nil { - return nil, fmt.Errorf("cannot create metadata token request: %w", err) - } - ttl := 1800 - expiration := time.Now().Add(time.Duration(ttl) * time.Second) - req.Header.Add("X-aws-ec2-metadata-token-ttl-seconds", strconv.Itoa(ttl)) - resp, err := cfg.client.Do(req) - if err != nil { - return nil, fmt.Errorf("cannot perform metadata token request: %w", err) - } - data, err := readResponseBody(resp, endpoint) - if err != nil { - return nil, fmt.Errorf("cannot read metadata creds from %s: %w", endpoint, err) - } - return &apiCredentials{ - Token: string(data), - Expiration: expiration, - }, nil -} - // getInstanceCreds gets Yandex Cloud IAM token using instance Service Account // // See https://cloud.yandex.com/en-ru/docs/compute/operations/vm-connect/auth-inside-vm func getInstanceCreds(cfg *apiConfig) (*apiCredentials, error) { - metadataCreds, err := getMetadataCreds(cfg) - if err != nil { - return nil, err + // Try obtaining GCE-like creds at first. + // See https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm + creds, err := getGCEInstanceCreds(cfg) + if err == nil { + return creds, nil } - endpoint := "http://169.254.169.254/latest/meta-data/iam/security-credentials/default" + errGCE := err + + // Fall back to the disabled IMDSv1 - see https://yandex.cloud/en/docs/security/standard/authentication#aws-token + // + // TODO: remove this when it is completely removed from Yandex Cloud. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513 + // and https://yandex.cloud/en/docs/security/standard/authentication#aws-token + creds, err = getEC2IMDBSv1Creds(cfg) + if err == nil { + return creds, nil + } + + // Return errGCE, since it is likely the IMDBSv1 is disabled. + return nil, errGCE +} + +// getGCEInstanceCreds gets Yandex Cloud IAM token using GCE API +// +// See https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm +func getGCEInstanceCreds(cfg *apiConfig) (*apiCredentials, error) { + endpoint := "http://169.254.169.254/computeMetadata/v1/instance/service-accounts/default/token" req, err := http.NewRequest(http.MethodGet, endpoint, nil) if err != nil { - return nil, fmt.Errorf("cannot create instance creds request: %w", err) + logger.Panicf("BUG: cannot create GCE token request for %s: %s", endpoint, err) } - req.Header.Add("X-aws-ec2-metadata-token", metadataCreds.Token) + req.Header.Add("Metadata-Flavor", "Google") + resp, err := cfg.client.Do(req) if err != nil { - return nil, fmt.Errorf("cannot read instance creds from %s: %w", endpoint, err) + return nil, fmt.Errorf("cannot obtain GCE token from %s: %w", endpoint, err) + } + data, err := readResponseBody(resp, endpoint) + if err != nil { + return nil, fmt.Errorf("cannot read GCE token from %s: %w", endpoint, err) + } + + var ac gceAPICredentials + if err := json.Unmarshal(data, &ac); err != nil { + return nil, fmt.Errorf("cannot unmarshal GCE token from %s: %w; data=%s", endpoint, err, data) + } + if ac.TokenType != "Bearer" { + return nil, fmt.Errorf("unsupported GCE token type received from %s: %q; supported: %q", endpoint, ac.TokenType, "Bearer") + } + + expiration := time.Now().Add(time.Duration(ac.ExpiresIn) * time.Second) + return &apiCredentials{ + Token: ac.AccessToken, + Expiration: expiration, + }, nil +} + +// See https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm +type gceAPICredentials struct { + AccessToken string `json:"access_token"` + ExpiresIn int `json:"expires_in"` + TokenType string `json:"token_type"` +} + +// getEC2IMDBSv1Creds gets Yandex Cloud IAM token using Amazon EC2 IMDBSv1 +func getEC2IMDBSv1Creds(cfg *apiConfig) (*apiCredentials, error) { + endpoint := "http://169.254.169.254/latest/meta-data/iam/security-credentials/default" + resp, err := cfg.client.Get(endpoint) + if err != nil { + return nil, fmt.Errorf("cannot read Amazon EC2 IMDBSv1 token from %s: %w", endpoint, err) } data, err := readResponseBody(resp, endpoint) if err != nil { @@ -181,7 +201,7 @@ func getInstanceCreds(cfg *apiConfig) (*apiCredentials, error) { var ac apiCredentials if err := json.Unmarshal(data, &ac); err != nil { - return nil, fmt.Errorf("cannot parse auth credentials response from %s: %w", endpoint, err) + return nil, fmt.Errorf("cannot parse Amazon EC2 IMDBSv1 token from %s: %w; data=%s", endpoint, err, data) } return &ac, nil } @@ -198,7 +218,7 @@ func getIAMToken(cfg *apiConfig) (*iamToken, error) { body := bytes.NewBuffer(passport) resp, err := cfg.client.Post(iamURL, "application/json", body) if err != nil { - logger.Panicf("BUG: cannot create request to yandex cloud iam api %q: %s", iamURL, err) + return nil, fmt.Errorf("cannot send request to yandex cloud iam api %q: %s", iamURL, err) } data, err := readResponseBody(resp, iamURL) if err != nil { @@ -206,7 +226,7 @@ func getIAMToken(cfg *apiConfig) (*iamToken, error) { } var it iamToken if err := json.Unmarshal(data, &it); err != nil { - return nil, fmt.Errorf("cannot parse iam token: %w; data: %s", err, data) + return nil, fmt.Errorf("cannot parse iam token from %s: %w; data: %s", iamURL, err, data) } return &it, nil }