lib/awsapi: properly assume role with webIdentity token (#5495)

* lib/awsapi: properly assume role with webIdentity token
introduce new irsaRoleArn param for config. It's only needed for authorization with webIdentity token.
First credentials obtained with irsa role and the next sts assume call for an actual roleArn made with those credentials.
Common use case for it - cross AWS accounts authorization
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822

* wip

---------

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
Nikolay 2023-12-20 18:05:39 +01:00 committed by GitHub
parent 5a88bc973f
commit 7cfde237ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 13 deletions

View file

@ -38,6 +38,7 @@ The sandbox cluster installation is running under the constant load generated by
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): check `-external.url` schema when starting vmalert, must be `http` or `https`. Before, alertmanager could reject alert notifications if `-external.url` contained no or wrong schema.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle queries, which wrap [rollup functions](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions) with multiple arguments without explicitly specified lookbehind window in square brackets into [aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions). For example, `sum(quantile_over_time(0.5, process_resident_memory_bytes))` was resulting to `expecting at least 2 args to ...; got 1 args` error. Thanks to @atykhyy for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5414).
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): retry on import errors in `vm-native` mode. Before, retries happened only on writes into a network connection between source and destination. But errors returned by server after all the data was transmitted were logged, but not retried.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly assume role with [AWS IRSA authorization](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). Previously role chaining was not supported. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822) for details.
## [v1.96.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.96.0)

View file

@ -17,9 +17,15 @@ import (
// Config represent aws access configuration.
type Config struct {
client *http.Client
region string
roleARN string
client *http.Client
region string
roleARN string
// IRSA may use a different role for assume API call.
// It can only be set via AWS_ROLE_ARN env variable.
// See https://docs.aws.amazon.com/eks/latest/userguide/pod-configuration.html
irsaRoleARN string
webTokenPath string
ec2Endpoint string
@ -49,6 +55,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey,
client: http.DefaultClient,
region: region,
roleARN: roleARN,
irsaRoleARN: os.Getenv("AWS_ROLE_ARN"),
service: service,
defaultAccessKey: os.Getenv("AWS_ACCESS_KEY_ID"),
defaultSecretKey: os.Getenv("AWS_SECRET_ACCESS_KEY"),
@ -69,7 +76,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey,
cfg.roleARN = os.Getenv("AWS_ROLE_ARN")
}
cfg.webTokenPath = os.Getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
if cfg.webTokenPath != "" && cfg.roleARN == "" {
if cfg.webTokenPath != "" && cfg.irsaRoleARN == "" {
return nil, fmt.Errorf("roleARN is missing for AWS_WEB_IDENTITY_TOKEN_FILE=%q; set it via env var AWS_ROLE_ARN", cfg.webTokenPath)
}
// explicitly set credentials has priority over env variables
@ -83,6 +90,7 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey,
AccessKeyID: cfg.defaultAccessKey,
SecretAccessKey: cfg.defaultSecretKey,
}
return cfg, nil
}
@ -201,7 +209,7 @@ func (cfg *Config) getAPICredentials() (*credentials, error) {
if err != nil {
return nil, fmt.Errorf("cannot read webToken from path: %q, err: %w", cfg.webTokenPath, err)
}
return cfg.getRoleWebIdentityCredentials(string(token))
return cfg.getRoleWebIdentityCredentials(string(token), cfg.irsaRoleARN)
}
if ecsMetaURI := os.Getenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"); len(ecsMetaURI) > 0 {
path := "http://169.254.170.2" + ecsMetaURI
@ -223,7 +231,7 @@ func (cfg *Config) getAPICredentials() (*credentials, error) {
// read credentials from sts api, if role_arn is defined
if len(cfg.roleARN) > 0 {
ac, err := cfg.getRoleARNCredentials(acNew)
ac, err := cfg.getRoleARNCredentials(acNew, cfg.roleARN)
if err != nil {
return nil, fmt.Errorf("cannot get credentials for role_arn %q: %w", cfg.roleARN, err)
}
@ -330,28 +338,44 @@ func getMetadataByPath(client *http.Client, apiPath string) ([]byte, error) {
}
// getRoleWebIdentityCredentials obtains credentials for the given roleARN with webToken.
//
// https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
// aws IRSA for kubernetes.
// https://aws.amazon.com/blogs/opensource/introducing-fine-grained-iam-roles-service-accounts/
func (cfg *Config) getRoleWebIdentityCredentials(token string) (*credentials, error) {
data, err := cfg.getSTSAPIResponse("AssumeRoleWithWebIdentity", func(apiURL string) (*http.Request, error) {
func (cfg *Config) getRoleWebIdentityCredentials(token, roleARN string) (*credentials, error) {
data, err := cfg.getSTSAPIResponse("AssumeRoleWithWebIdentity", roleARN, func(apiURL string) (*http.Request, error) {
apiURL += fmt.Sprintf("&WebIdentityToken=%s", url.QueryEscape(token))
return http.NewRequest(http.MethodGet, apiURL, nil)
})
if err != nil {
return nil, err
}
return parseARNCredentials(data, "AssumeRoleWithWebIdentity")
creds, err := parseARNCredentials(data, "AssumeRoleWithWebIdentity")
if err != nil {
return nil, err
}
if roleARN != cfg.roleARN {
// need to assume a different role
assumeCreds, err := cfg.getRoleARNCredentials(creds, cfg.roleARN)
if err != nil {
return nil, fmt.Errorf("cannot assume chained role=%q for roleARN=%q: %w", cfg.roleARN, roleARN, err)
}
if assumeCreds.Expiration.After(creds.Expiration) {
assumeCreds.Expiration = creds.Expiration
}
return assumeCreds, nil
}
return creds, nil
}
// getSTSAPIResponse makes request to aws sts api with the given cfg and returns temporary credentials with expiration time.
//
// See https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html
func (cfg *Config) getSTSAPIResponse(action string, reqBuilder func(apiURL string) (*http.Request, error)) ([]byte, error) {
func (cfg *Config) getSTSAPIResponse(action string, roleARN string, reqBuilder func(apiURL string) (*http.Request, error)) ([]byte, error) {
// See https://docs.aws.amazon.com/AWSEC2/latest/APIReference/Query-Requests.html
apiURL := fmt.Sprintf("%s?Action=%s", cfg.stsEndpoint, action)
apiURL += "&Version=2011-06-15"
apiURL += fmt.Sprintf("&RoleArn=%s", cfg.roleARN)
apiURL += fmt.Sprintf("&RoleArn=%s", roleARN)
// we have to provide unique session name for cloudtrail audit
apiURL += "&RoleSessionName=vmagent-ec2-discovery"
req, err := reqBuilder(apiURL)
@ -366,8 +390,8 @@ func (cfg *Config) getSTSAPIResponse(action string, reqBuilder func(apiURL strin
}
// getRoleARNCredentials obtains credentials for the given roleARN.
func (cfg *Config) getRoleARNCredentials(creds *credentials) (*credentials, error) {
data, err := cfg.getSTSAPIResponse("AssumeRole", func(apiURL string) (*http.Request, error) {
func (cfg *Config) getRoleARNCredentials(creds *credentials, roleARN string) (*credentials, error) {
data, err := cfg.getSTSAPIResponse("AssumeRole", roleARN, func(apiURL string) (*http.Request, error) {
return newSignedGetRequest(apiURL, "sts", cfg.region, creds)
})
if err != nil {