From 7cfde237ecd0a64f80c6c64a0646eea4dd96e8bc Mon Sep 17 00:00:00 2001 From: Nikolay Date: Wed, 20 Dec 2023 18:05:39 +0100 Subject: [PATCH] lib/awsapi: properly assume role with webIdentity token (#5495) * lib/awsapi: properly assume role with webIdentity token introduce new irsaRoleArn param for config. It's only needed for authorization with webIdentity token. First credentials obtained with irsa role and the next sts assume call for an actual roleArn made with those credentials. Common use case for it - cross AWS accounts authorization https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822 * wip --------- Co-authored-by: Aliaksandr Valialkin --- docs/CHANGELOG.md | 1 + lib/awsapi/config.go | 50 ++++++++++++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 16ad0013f..957959e5c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -38,6 +38,7 @@ The sandbox cluster installation is running under the constant load generated by * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): check `-external.url` schema when starting vmalert, must be `http` or `https`. Before, alertmanager could reject alert notifications if `-external.url` contained no or wrong schema. * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle queries, which wrap [rollup functions](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions) with multiple arguments without explicitly specified lookbehind window in square brackets into [aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions). For example, `sum(quantile_over_time(0.5, process_resident_memory_bytes))` was resulting to `expecting at least 2 args to ...; got 1 args` error. Thanks to @atykhyy for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5414). * BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): retry on import errors in `vm-native` mode. Before, retries happened only on writes into a network connection between source and destination. But errors returned by server after all the data was transmitted were logged, but not retried. +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly assume role with [AWS IRSA authorization](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). Previously role chaining was not supported. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822) for details. ## [v1.96.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.96.0) diff --git a/lib/awsapi/config.go b/lib/awsapi/config.go index 4f3d53514..862aabba9 100644 --- a/lib/awsapi/config.go +++ b/lib/awsapi/config.go @@ -17,9 +17,15 @@ import ( // Config represent aws access configuration. type Config struct { - client *http.Client - region string - roleARN string + client *http.Client + region string + roleARN string + + // IRSA may use a different role for assume API call. + // It can only be set via AWS_ROLE_ARN env variable. + // See https://docs.aws.amazon.com/eks/latest/userguide/pod-configuration.html + irsaRoleARN string + webTokenPath string ec2Endpoint string @@ -49,6 +55,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, client: http.DefaultClient, region: region, roleARN: roleARN, + irsaRoleARN: os.Getenv("AWS_ROLE_ARN"), service: service, defaultAccessKey: os.Getenv("AWS_ACCESS_KEY_ID"), defaultSecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), @@ -69,7 +76,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, cfg.roleARN = os.Getenv("AWS_ROLE_ARN") } cfg.webTokenPath = os.Getenv("AWS_WEB_IDENTITY_TOKEN_FILE") - if cfg.webTokenPath != "" && cfg.roleARN == "" { + if cfg.webTokenPath != "" && cfg.irsaRoleARN == "" { return nil, fmt.Errorf("roleARN is missing for AWS_WEB_IDENTITY_TOKEN_FILE=%q; set it via env var AWS_ROLE_ARN", cfg.webTokenPath) } // explicitly set credentials has priority over env variables @@ -83,6 +90,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey, AccessKeyID: cfg.defaultAccessKey, SecretAccessKey: cfg.defaultSecretKey, } + return cfg, nil } @@ -201,7 +209,7 @@ func (cfg *Config) getAPICredentials() (*credentials, error) { if err != nil { return nil, fmt.Errorf("cannot read webToken from path: %q, err: %w", cfg.webTokenPath, err) } - return cfg.getRoleWebIdentityCredentials(string(token)) + return cfg.getRoleWebIdentityCredentials(string(token), cfg.irsaRoleARN) } if ecsMetaURI := os.Getenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"); len(ecsMetaURI) > 0 { path := "http://169.254.170.2" + ecsMetaURI @@ -223,7 +231,7 @@ func (cfg *Config) getAPICredentials() (*credentials, error) { // read credentials from sts api, if role_arn is defined if len(cfg.roleARN) > 0 { - ac, err := cfg.getRoleARNCredentials(acNew) + ac, err := cfg.getRoleARNCredentials(acNew, cfg.roleARN) if err != nil { return nil, fmt.Errorf("cannot get credentials for role_arn %q: %w", cfg.roleARN, err) } @@ -330,28 +338,44 @@ func getMetadataByPath(client *http.Client, apiPath string) ([]byte, error) { } // getRoleWebIdentityCredentials obtains credentials for the given roleARN with webToken. +// // https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html // aws IRSA for kubernetes. // https://aws.amazon.com/blogs/opensource/introducing-fine-grained-iam-roles-service-accounts/ -func (cfg *Config) getRoleWebIdentityCredentials(token string) (*credentials, error) { - data, err := cfg.getSTSAPIResponse("AssumeRoleWithWebIdentity", func(apiURL string) (*http.Request, error) { +func (cfg *Config) getRoleWebIdentityCredentials(token, roleARN string) (*credentials, error) { + data, err := cfg.getSTSAPIResponse("AssumeRoleWithWebIdentity", roleARN, func(apiURL string) (*http.Request, error) { apiURL += fmt.Sprintf("&WebIdentityToken=%s", url.QueryEscape(token)) return http.NewRequest(http.MethodGet, apiURL, nil) }) if err != nil { return nil, err } - return parseARNCredentials(data, "AssumeRoleWithWebIdentity") + creds, err := parseARNCredentials(data, "AssumeRoleWithWebIdentity") + if err != nil { + return nil, err + } + if roleARN != cfg.roleARN { + // need to assume a different role + assumeCreds, err := cfg.getRoleARNCredentials(creds, cfg.roleARN) + if err != nil { + return nil, fmt.Errorf("cannot assume chained role=%q for roleARN=%q: %w", cfg.roleARN, roleARN, err) + } + if assumeCreds.Expiration.After(creds.Expiration) { + assumeCreds.Expiration = creds.Expiration + } + return assumeCreds, nil + } + return creds, nil } // getSTSAPIResponse makes request to aws sts api with the given cfg and returns temporary credentials with expiration time. // // See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html -func (cfg *Config) getSTSAPIResponse(action string, reqBuilder func(apiURL string) (*http.Request, error)) ([]byte, error) { +func (cfg *Config) getSTSAPIResponse(action string, roleARN string, reqBuilder func(apiURL string) (*http.Request, error)) ([]byte, error) { // See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/Query-Requests.html apiURL := fmt.Sprintf("%s?Action=%s", cfg.stsEndpoint, action) apiURL += "&Version=2011-06-15" - apiURL += fmt.Sprintf("&RoleArn=%s", cfg.roleARN) + apiURL += fmt.Sprintf("&RoleArn=%s", roleARN) // we have to provide unique session name for cloudtrail audit apiURL += "&RoleSessionName=vmagent-ec2-discovery" req, err := reqBuilder(apiURL) @@ -366,8 +390,8 @@ func (cfg *Config) getSTSAPIResponse(action string, reqBuilder func(apiURL strin } // getRoleARNCredentials obtains credentials for the given roleARN. -func (cfg *Config) getRoleARNCredentials(creds *credentials) (*credentials, error) { - data, err := cfg.getSTSAPIResponse("AssumeRole", func(apiURL string) (*http.Request, error) { +func (cfg *Config) getRoleARNCredentials(creds *credentials, roleARN string) (*credentials, error) { + data, err := cfg.getSTSAPIResponse("AssumeRole", roleARN, func(apiURL string) (*http.Request, error) { return newSignedGetRequest(apiURL, "sts", cfg.region, creds) }) if err != nil {