lib/awsapi: properly assume role with webIdentity token (#5495)

* lib/awsapi: properly assume role with webIdentity token
introduce new irsaRoleArn param for config. It's only needed for authorization with webIdentity token.
First credentials obtained with irsa role and the next sts assume call for an actual roleArn made with those credentials.
Common use case for it - cross AWS accounts authorization
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822

* wip

---------

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
Nikolay 2023-12-20 18:05:39 +01:00 committed by GitHub
parent 5a88bc973f
commit 7cfde237ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 13 deletions

View file

@ -38,6 +38,7 @@ The sandbox cluster installation is running under the constant load generated by
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): check `-external.url` schema when starting vmalert, must be `http` or `https`. Before, alertmanager could reject alert notifications if `-external.url` contained no or wrong schema. * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): check `-external.url` schema when starting vmalert, must be `http` or `https`. Before, alertmanager could reject alert notifications if `-external.url` contained no or wrong schema.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle queries, which wrap [rollup functions](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions) with multiple arguments without explicitly specified lookbehind window in square brackets into [aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions). For example, `sum(quantile_over_time(0.5, process_resident_memory_bytes))` was resulting to `expecting at least 2 args to ...; got 1 args` error. Thanks to @atykhyy for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5414). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle queries, which wrap [rollup functions](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions) with multiple arguments without explicitly specified lookbehind window in square brackets into [aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions). For example, `sum(quantile_over_time(0.5, process_resident_memory_bytes))` was resulting to `expecting at least 2 args to ...; got 1 args` error. Thanks to @atykhyy for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5414).
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): retry on import errors in `vm-native` mode. Before, retries happened only on writes into a network connection between source and destination. But errors returned by server after all the data was transmitted were logged, but not retried. * BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): retry on import errors in `vm-native` mode. Before, retries happened only on writes into a network connection between source and destination. But errors returned by server after all the data was transmitted were logged, but not retried.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly assume role with [AWS IRSA authorization](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). Previously role chaining was not supported. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822) for details.
## [v1.96.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.96.0) ## [v1.96.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.96.0)

View file

@ -17,9 +17,15 @@ import (
// Config represent aws access configuration. // Config represent aws access configuration.
type Config struct { type Config struct {
client *http.Client client *http.Client
region string region string
roleARN string roleARN string
// IRSA may use a different role for assume API call.
// It can only be set via AWS_ROLE_ARN env variable.
// See https://docs.aws.amazon.com/eks/latest/userguide/pod-configuration.html
irsaRoleARN string
webTokenPath string webTokenPath string
ec2Endpoint string ec2Endpoint string
@ -49,6 +55,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey,
client: http.DefaultClient, client: http.DefaultClient,
region: region, region: region,
roleARN: roleARN, roleARN: roleARN,
irsaRoleARN: os.Getenv("AWS_ROLE_ARN"),
service: service, service: service,
defaultAccessKey: os.Getenv("AWS_ACCESS_KEY_ID"), defaultAccessKey: os.Getenv("AWS_ACCESS_KEY_ID"),
defaultSecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), defaultSecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"),
@ -69,7 +76,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey,
cfg.roleARN = os.Getenv("AWS_ROLE_ARN") cfg.roleARN = os.Getenv("AWS_ROLE_ARN")
} }
cfg.webTokenPath = os.Getenv("AWS_WEB_IDENTITY_TOKEN_FILE") cfg.webTokenPath = os.Getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
if cfg.webTokenPath != "" && cfg.roleARN == "" { if cfg.webTokenPath != "" && cfg.irsaRoleARN == "" {
return nil, fmt.Errorf("roleARN is missing for AWS_WEB_IDENTITY_TOKEN_FILE=%q; set it via env var AWS_ROLE_ARN", cfg.webTokenPath) return nil, fmt.Errorf("roleARN is missing for AWS_WEB_IDENTITY_TOKEN_FILE=%q; set it via env var AWS_ROLE_ARN", cfg.webTokenPath)
} }
// explicitly set credentials has priority over env variables // explicitly set credentials has priority over env variables
@ -83,6 +90,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey,
AccessKeyID: cfg.defaultAccessKey, AccessKeyID: cfg.defaultAccessKey,
SecretAccessKey: cfg.defaultSecretKey, SecretAccessKey: cfg.defaultSecretKey,
} }
return cfg, nil return cfg, nil
} }
@ -201,7 +209,7 @@ func (cfg *Config) getAPICredentials() (*credentials, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot read webToken from path: %q, err: %w", cfg.webTokenPath, err) return nil, fmt.Errorf("cannot read webToken from path: %q, err: %w", cfg.webTokenPath, err)
} }
return cfg.getRoleWebIdentityCredentials(string(token)) return cfg.getRoleWebIdentityCredentials(string(token), cfg.irsaRoleARN)
} }
if ecsMetaURI := os.Getenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"); len(ecsMetaURI) > 0 { if ecsMetaURI := os.Getenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"); len(ecsMetaURI) > 0 {
path := "http://169.254.170.2" + ecsMetaURI path := "http://169.254.170.2" + ecsMetaURI
@ -223,7 +231,7 @@ func (cfg *Config) getAPICredentials() (*credentials, error) {
// read credentials from sts api, if role_arn is defined // read credentials from sts api, if role_arn is defined
if len(cfg.roleARN) > 0 { if len(cfg.roleARN) > 0 {
ac, err := cfg.getRoleARNCredentials(acNew) ac, err := cfg.getRoleARNCredentials(acNew, cfg.roleARN)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot get credentials for role_arn %q: %w", cfg.roleARN, err) return nil, fmt.Errorf("cannot get credentials for role_arn %q: %w", cfg.roleARN, err)
} }
@ -330,28 +338,44 @@ func getMetadataByPath(client *http.Client, apiPath string) ([]byte, error) {
} }
// getRoleWebIdentityCredentials obtains credentials for the given roleARN with webToken. // getRoleWebIdentityCredentials obtains credentials for the given roleARN with webToken.
//
// https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html // https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
// aws IRSA for kubernetes. // aws IRSA for kubernetes.
// https://aws.amazon.com/blogs/opensource/introducing-fine-grained-iam-roles-service-accounts/ // https://aws.amazon.com/blogs/opensource/introducing-fine-grained-iam-roles-service-accounts/
func (cfg *Config) getRoleWebIdentityCredentials(token string) (*credentials, error) { func (cfg *Config) getRoleWebIdentityCredentials(token, roleARN string) (*credentials, error) {
data, err := cfg.getSTSAPIResponse("AssumeRoleWithWebIdentity", func(apiURL string) (*http.Request, error) { data, err := cfg.getSTSAPIResponse("AssumeRoleWithWebIdentity", roleARN, func(apiURL string) (*http.Request, error) {
apiURL += fmt.Sprintf("&WebIdentityToken=%s", url.QueryEscape(token)) apiURL += fmt.Sprintf("&WebIdentityToken=%s", url.QueryEscape(token))
return http.NewRequest(http.MethodGet, apiURL, nil) return http.NewRequest(http.MethodGet, apiURL, nil)
}) })
if err != nil { if err != nil {
return nil, err return nil, err
} }
return parseARNCredentials(data, "AssumeRoleWithWebIdentity") creds, err := parseARNCredentials(data, "AssumeRoleWithWebIdentity")
if err != nil {
return nil, err
}
if roleARN != cfg.roleARN {
// need to assume a different role
assumeCreds, err := cfg.getRoleARNCredentials(creds, cfg.roleARN)
if err != nil {
return nil, fmt.Errorf("cannot assume chained role=%q for roleARN=%q: %w", cfg.roleARN, roleARN, err)
}
if assumeCreds.Expiration.After(creds.Expiration) {
assumeCreds.Expiration = creds.Expiration
}
return assumeCreds, nil
}
return creds, nil
} }
// getSTSAPIResponse makes request to aws sts api with the given cfg and returns temporary credentials with expiration time. // getSTSAPIResponse makes request to aws sts api with the given cfg and returns temporary credentials with expiration time.
// //
// See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html // See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html
func (cfg *Config) getSTSAPIResponse(action string, reqBuilder func(apiURL string) (*http.Request, error)) ([]byte, error) { func (cfg *Config) getSTSAPIResponse(action string, roleARN string, reqBuilder func(apiURL string) (*http.Request, error)) ([]byte, error) {
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/Query-Requests.html // See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/Query-Requests.html
apiURL := fmt.Sprintf("%s?Action=%s", cfg.stsEndpoint, action) apiURL := fmt.Sprintf("%s?Action=%s", cfg.stsEndpoint, action)
apiURL += "&Version=2011-06-15" apiURL += "&Version=2011-06-15"
apiURL += fmt.Sprintf("&RoleArn=%s", cfg.roleARN) apiURL += fmt.Sprintf("&RoleArn=%s", roleARN)
// we have to provide unique session name for cloudtrail audit // we have to provide unique session name for cloudtrail audit
apiURL += "&RoleSessionName=vmagent-ec2-discovery" apiURL += "&RoleSessionName=vmagent-ec2-discovery"
req, err := reqBuilder(apiURL) req, err := reqBuilder(apiURL)
@ -366,8 +390,8 @@ func (cfg *Config) getSTSAPIResponse(action string, reqBuilder func(apiURL strin
} }
// getRoleARNCredentials obtains credentials for the given roleARN. // getRoleARNCredentials obtains credentials for the given roleARN.
func (cfg *Config) getRoleARNCredentials(creds *credentials) (*credentials, error) { func (cfg *Config) getRoleARNCredentials(creds *credentials, roleARN string) (*credentials, error) {
data, err := cfg.getSTSAPIResponse("AssumeRole", func(apiURL string) (*http.Request, error) { data, err := cfg.getSTSAPIResponse("AssumeRole", roleARN, func(apiURL string) (*http.Request, error) {
return newSignedGetRequest(apiURL, "sts", cfg.region, creds) return newSignedGetRequest(apiURL, "sts", cfg.region, creds)
}) })
if err != nil { if err != nil {