VictoriaMetrics/lib/promscrape/discovery/openstack/api.go

208 lines
6 KiB
Go
Raw Normal View History

package openstack
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"path"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
)
var configMap = discoveryutils.NewConfigMap()
// apiCredentials can be refreshed
type apiCredentials struct {
computeURL *url.URL
token string
expiration time.Time
}
type apiConfig struct {
client *http.Client
port int
// tokenLock guards creds refresh
tokenLock sync.Mutex
creds *apiCredentials
// authTokenReq contains request body for apiCredentials
authTokenReq []byte
// keystone endpoint
endpoint *url.URL
allTenants bool
region string
// availability public, internal, admin for filtering compute endpoint
availability string
}
func (cfg *apiConfig) getFreshAPICredentials() (*apiCredentials, error) {
cfg.tokenLock.Lock()
defer cfg.tokenLock.Unlock()
if cfg.creds != nil && time.Until(cfg.creds.expiration) > 10*time.Second {
// Credentials aren't expired yet.
return cfg.creds, nil
}
newCreds, err := getCreds(cfg)
if err != nil {
return nil, fmt.Errorf("cannot refresh OpenStack api token: %w", err)
}
cfg.creds = newCreds
logger.Infof("successfully refreshed OpenStack api token; expiration: %.3f seconds", time.Until(newCreds.expiration).Seconds())
return newCreds, nil
}
func getAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
v, err := configMap.Get(sdc, func() (any, error) { return newAPIConfig(sdc, baseDir) })
if err != nil {
return nil, err
}
return v.(*apiConfig), nil
}
func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
port := sdc.Port
if port == 0 {
port = 80
}
cfg := &apiConfig{
client: &http.Client{
Transport: &http.Transport{
MaxIdleConnsPerHost: 100,
},
},
availability: sdc.Availability,
region: sdc.Region,
allTenants: sdc.AllTenants,
port: port,
}
if sdc.TLSConfig != nil {
opts := &promauth.Options{
BaseDir: baseDir,
TLSConfig: sdc.TLSConfig,
}
ac, err := opts.NewConfig()
if err != nil {
cfg.client.CloseIdleConnections()
lib/promauth: follow-up for e16d3f5639d67ff970975d342aaa276e339e9b0c - Make sure that invalid/missing TLS CA file or TLS client certificate files at vmagent startup don't prevent from processing the corresponding scrape targets after the file becomes correct, without the need to restart vmagent. Previously scrape targets with invalid TLS CA file or TLS client certificate files were permanently dropped after the first attempt to initialize them, and they didn't appear until the next vmagent reload or the next change in other places of the loaded scrape configs. - Make sure that TLS CA is properly re-loaded from file after it changes without the need to restart vmagent. Previously the old TLS CA was used until vmagent restart. - Properly handle errors during http request creation for the second attempt to send data to remote system at vmagent and vmalert. Previously failed request creation could result in nil pointer dereferencing, since the returned request is nil on error. - Add more context to the logged error during AWS sigv4 request signing before sending the data to -remoteWrite.url at vmagent. Previously it could miss details on the source of the request. - Do not create a new HTTP client per second when generating OAuth2 token needed to put in Authorization header of every http request issued by vmagent during service discovery or target scraping. Re-use the HTTP client instead until the corresponding scrape config changes. - Cache error at lib/promauth.Config.GetAuthHeader() in the same way as the auth header is cached, e.g. the error is cached for a second now. This should reduce load on CPU and OAuth2 server when auth header cannot be obtained because of temporary error. - Share tls.Config.GetClientCertificate function among multiple scrape targets with the same tls_config. Cache the loaded certificate and the error for one second. This should significantly reduce CPU load when scraping big number of targets with the same tls_config. - Allow loading TLS certificates from HTTP and HTTPs urls by specifying these urls at `tls_config->cert_file` and `tls_config->key_file`. - Improve test coverage at lib/promauth - Skip unreachable or invalid files specified at `scrape_config_files` during vmagent startup, since these files may become valid later. Previously vmagent was exitting in this case. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4959
2023-10-25 21:19:33 +00:00
return nil, fmt.Errorf("cannot parse TLS config: %w", err)
}
cfg.client.Transport = ac.NewRoundTripper(&http.Transport{
MaxIdleConnsPerHost: 100,
lib/{promauth,promscrape}: automatically refresh root CA certificates after changes on disk (#5725) * lib/{promauth,promscrape}: automatically refresh root CA certificates after changes on disk Added a custom `http.RoundTripper` implementation which checks for root CA content changes and updates `tls.Config` used by `http.RoundTripper` after detecting CA change. Client certificate changes are not tracked by this implementation since `tls.Config` already supports passing certificate dynamically by overriding `tls.Config.GetClientCertificate`. This change implements dynamic reload of root CA only for streaming client used for scraping. Blocking client (`fasthttp.HostClient`) does not support using custom transport so can't use this implementation. See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5526 Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * lib/promauth/config: update NewRoundTripper API Update API to allow user to update only parameters required for transport. Add warning log when reloading Root CA failed. Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * lib/promauth/config: fix mutex acquire logic Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * lib/promauth/config: replace RWMutex with regular mutex to simplify the code - remove additional mutex used for getRootCABytes - require callee to use mutex - replace RWMutex with regular mutex Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * lib/promauth/config: refactor - hold the mutex lock to avoid round tripper being re-created twice - move recreation logic into separate func to simplify the code Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> --------- Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> Co-authored-by: Nikolay <nik@victoriametrics.com>
2024-04-03 08:01:43 +00:00
})
}
// use public compute endpoint by default
if len(cfg.availability) == 0 {
cfg.availability = "public"
}
// create new variable to prevent side effects
sdcAuth := *sdc
// special case if identity_endpoint is not defined
if len(sdcAuth.IdentityEndpoint) == 0 {
// override sdc
sdcAuth = readCredentialsFromEnv()
}
if strings.HasSuffix(sdcAuth.IdentityEndpoint, "v2.0") {
cfg.client.CloseIdleConnections()
return nil, errors.New("identity_endpoint v2.0 is not supported")
}
// trim .0 from v3.0 for prometheus cfg compatibility
sdcAuth.IdentityEndpoint = strings.TrimSuffix(sdcAuth.IdentityEndpoint, ".0")
parsedURL, err := url.Parse(sdcAuth.IdentityEndpoint)
if err != nil {
cfg.client.CloseIdleConnections()
lib/promauth: follow-up for e16d3f5639d67ff970975d342aaa276e339e9b0c - Make sure that invalid/missing TLS CA file or TLS client certificate files at vmagent startup don't prevent from processing the corresponding scrape targets after the file becomes correct, without the need to restart vmagent. Previously scrape targets with invalid TLS CA file or TLS client certificate files were permanently dropped after the first attempt to initialize them, and they didn't appear until the next vmagent reload or the next change in other places of the loaded scrape configs. - Make sure that TLS CA is properly re-loaded from file after it changes without the need to restart vmagent. Previously the old TLS CA was used until vmagent restart. - Properly handle errors during http request creation for the second attempt to send data to remote system at vmagent and vmalert. Previously failed request creation could result in nil pointer dereferencing, since the returned request is nil on error. - Add more context to the logged error during AWS sigv4 request signing before sending the data to -remoteWrite.url at vmagent. Previously it could miss details on the source of the request. - Do not create a new HTTP client per second when generating OAuth2 token needed to put in Authorization header of every http request issued by vmagent during service discovery or target scraping. Re-use the HTTP client instead until the corresponding scrape config changes. - Cache error at lib/promauth.Config.GetAuthHeader() in the same way as the auth header is cached, e.g. the error is cached for a second now. This should reduce load on CPU and OAuth2 server when auth header cannot be obtained because of temporary error. - Share tls.Config.GetClientCertificate function among multiple scrape targets with the same tls_config. Cache the loaded certificate and the error for one second. This should significantly reduce CPU load when scraping big number of targets with the same tls_config. - Allow loading TLS certificates from HTTP and HTTPs urls by specifying these urls at `tls_config->cert_file` and `tls_config->key_file`. - Improve test coverage at lib/promauth - Skip unreachable or invalid files specified at `scrape_config_files` during vmagent startup, since these files may become valid later. Previously vmagent was exitting in this case. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4959
2023-10-25 21:19:33 +00:00
return nil, fmt.Errorf("cannot parse identity_endpoint %s as url: %w", sdcAuth.IdentityEndpoint, err)
}
cfg.endpoint = parsedURL
tokenReq, err := buildAuthRequestBody(&sdcAuth)
if err != nil {
cfg.client.CloseIdleConnections()
return nil, err
}
cfg.authTokenReq = tokenReq
// cfg.creds is populated at getFreshAPICredentials
return cfg, nil
}
// getCreds makes a call to openstack keystone api and retrieves token and computeURL
//
// See https://docs.openstack.org/api-ref/identity/v3/
func getCreds(cfg *apiConfig) (*apiCredentials, error) {
apiURL := *cfg.endpoint
apiURL.Path = path.Join(apiURL.Path, "auth", "tokens")
resp, err := cfg.client.Post(apiURL.String(), "application/json", bytes.NewBuffer(cfg.authTokenReq))
if err != nil {
lib/promauth: follow-up for e16d3f5639d67ff970975d342aaa276e339e9b0c - Make sure that invalid/missing TLS CA file or TLS client certificate files at vmagent startup don't prevent from processing the corresponding scrape targets after the file becomes correct, without the need to restart vmagent. Previously scrape targets with invalid TLS CA file or TLS client certificate files were permanently dropped after the first attempt to initialize them, and they didn't appear until the next vmagent reload or the next change in other places of the loaded scrape configs. - Make sure that TLS CA is properly re-loaded from file after it changes without the need to restart vmagent. Previously the old TLS CA was used until vmagent restart. - Properly handle errors during http request creation for the second attempt to send data to remote system at vmagent and vmalert. Previously failed request creation could result in nil pointer dereferencing, since the returned request is nil on error. - Add more context to the logged error during AWS sigv4 request signing before sending the data to -remoteWrite.url at vmagent. Previously it could miss details on the source of the request. - Do not create a new HTTP client per second when generating OAuth2 token needed to put in Authorization header of every http request issued by vmagent during service discovery or target scraping. Re-use the HTTP client instead until the corresponding scrape config changes. - Cache error at lib/promauth.Config.GetAuthHeader() in the same way as the auth header is cached, e.g. the error is cached for a second now. This should reduce load on CPU and OAuth2 server when auth header cannot be obtained because of temporary error. - Share tls.Config.GetClientCertificate function among multiple scrape targets with the same tls_config. Cache the loaded certificate and the error for one second. This should significantly reduce CPU load when scraping big number of targets with the same tls_config. - Allow loading TLS certificates from HTTP and HTTPs urls by specifying these urls at `tls_config->cert_file` and `tls_config->key_file`. - Improve test coverage at lib/promauth - Skip unreachable or invalid files specified at `scrape_config_files` during vmagent startup, since these files may become valid later. Previously vmagent was exitting in this case. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4959
2023-10-25 21:19:33 +00:00
return nil, fmt.Errorf("failed query openstack identity api at url %s: %w", apiURL.String(), err)
}
r, err := io.ReadAll(resp.Body)
_ = resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("cannot read response from %q: %w", apiURL.String(), err)
}
if resp.StatusCode != http.StatusCreated {
return nil, fmt.Errorf("auth failed, bad status code: %d, want: 201", resp.StatusCode)
}
at := resp.Header.Get("X-Subject-Token")
if len(at) == 0 {
return nil, fmt.Errorf("auth failed, response without X-Subject-Token")
}
var ar authResponse
if err := json.Unmarshal(r, &ar); err != nil {
return nil, fmt.Errorf("cannot parse auth credentials response: %w", err)
}
computeURL, err := getComputeEndpointURL(ar.Token.Catalog, cfg.availability, cfg.region)
if err != nil {
return nil, fmt.Errorf("cannot get computeEndpoint, account doesn't have enough permissions, "+
"availability: %s, region: %s; error: %w", cfg.availability, cfg.region, err)
}
return &apiCredentials{
token: at,
expiration: ar.Token.ExpiresAt,
computeURL: computeURL,
}, nil
}
// readResponseBody reads body from http.Response.
func readResponseBody(resp *http.Response, apiURL string) ([]byte, error) {
data, err := io.ReadAll(resp.Body)
_ = resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("cannot read response from %q: %w", apiURL, err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code for %q; got %d; want %d; response body: %q",
apiURL, resp.StatusCode, http.StatusOK, data)
}
return data, nil
}
// getAPIResponse calls openstack apiURL and returns response body.
func getAPIResponse(apiURL string, cfg *apiConfig) ([]byte, error) {
creds, err := cfg.getFreshAPICredentials()
if err != nil {
return nil, err
}
req, err := http.NewRequest(http.MethodGet, apiURL, nil)
if err != nil {
return nil, fmt.Errorf("cannot create new request for openstack api url %s: %w", apiURL, err)
}
req.Header.Set("X-Auth-Token", creds.token)
resp, err := cfg.client.Do(req)
if err != nil {
return nil, fmt.Errorf("cannot query openstack api url %s: %w", apiURL, err)
}
return readResponseBody(resp, apiURL)
}