From 61594d2bd85cf7bb66f24f36d28cff47db55ee5b Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Mon, 13 Nov 2023 08:23:35 +0100
Subject: [PATCH] app/vmauth: follow-up for
 323f3720ed33a834ce32c2a6b95101aad2e5042c

- Re-use identically configured http.Transport across multiple users.
  This fixes handling of the limit on the number of connection, which can be established per each backend
  via -maxIdleConnsPerBackend command-line flag. This limit stopped working after 323f3720ed33a834ce32c2a6b95101aad2e5042c

- Add docs about backend TLS setup at https://docs.victoriametrics.com/vmauth.html#backend-tls-setup

- Add ability to disable backend TLS verification for all the users via -backend.tlsInsecureSkipVerify command-line flag.
  This flag may be useful when -auth.config contains big number of users, and every user must disable backend TLS verification.

- Add ability to specify TLS Root CA via tls_ca_file option at per-user basis and via -backend.tlsCAFile command-line flag
  across all the users.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5240
---
 app/vmauth/README.md           | 25 ++++++++++++
 app/vmauth/auth_config.go      | 15 +++++--
 app/vmauth/auth_config_test.go | 17 ++++++--
 app/vmauth/main.go             | 74 +++++++++++++++++++++++++++++++---
 docs/CHANGELOG.md              |  2 +-
 docs/vmauth.md                 | 25 ++++++++++++
 6 files changed, 145 insertions(+), 13 deletions(-)

diff --git a/app/vmauth/README.md b/app/vmauth/README.md
index cfaeaf44d9..96e656ba2f 100644
--- a/app/vmauth/README.md
+++ b/app/vmauth/README.md
@@ -101,6 +101,31 @@ The following [metrics](#monitoring) related to concurrency limits are exposed b
 - `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
   because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
 
+## Backend TLS setup
+
+By default `vmauth` uses system settings when performing requests to HTTPS backends specified via `url_prefix` option
+in the [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config). These settings can be overridden with the following command-line flags:
+
+- `backend.tlsInsecureSkipVerify` allows skipping TLS verification when connecting to HTTPS backends.
+  This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
+  via `tls_insecure_skip_verify` option. For example:
+
+  ```yml
+  - username: "foo"
+    url_prefix: "https://localhost"
+    tls_insecure_skip_verify: true
+  ```
+
+- `backend.tlsCAFile` allows specifying the path to TLS Root CA, which will be used for TLS verification when connecting to HTTPS backends.
+  The `backend.tlsCAFile` may point either to local file or to `http` / `https` url.
+  This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
+  via `tls_ca_file` option. For example:
+
+  ```yml
+  - username: "foo"
+    url_prefix: "https://localhost"
+    tls_ca_file: "/path/to/tls/root/ca"
+  ```
 
 ## IP filters
 
diff --git a/app/vmauth/auth_config.go b/app/vmauth/auth_config.go
index 7c96ea66ed..3b11f2c6c0 100644
--- a/app/vmauth/auth_config.go
+++ b/app/vmauth/auth_config.go
@@ -50,7 +50,8 @@ type UserInfo struct {
 	MaxConcurrentRequests int         `yaml:"max_concurrent_requests,omitempty"`
 	DefaultURL            *URLPrefix  `yaml:"default_url,omitempty"`
 	RetryStatusCodes      []int       `yaml:"retry_status_codes,omitempty"`
-	TLSInsecureSkipVerify bool        `yaml:"tls_insecure_skip_verify,omitempty"`
+	TLSInsecureSkipVerify *bool       `yaml:"tls_insecure_skip_verify,omitempty"`
+	TLSCAFile             string      `yaml:"tls_ca_file,omitempty"`
 
 	concurrencyLimitCh      chan struct{}
 	concurrencyLimitReached *metrics.Counter
@@ -447,7 +448,11 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
 		_ = metrics.GetOrCreateGauge(`vmauth_unauthorized_user_concurrent_requests_current`, func() float64 {
 			return float64(len(ui.concurrencyLimitCh))
 		})
-		ui.httpTransport = getTransport(ui.TLSInsecureSkipVerify)
+		tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
+		if err != nil {
+			return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
+		}
+		ui.httpTransport = tr
 	}
 	return &ac, nil
 }
@@ -518,7 +523,11 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
 		_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_current{username=%q}`, name), func() float64 {
 			return float64(len(ui.concurrencyLimitCh))
 		})
-		ui.httpTransport = getTransport(ui.TLSInsecureSkipVerify)
+		tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
+		if err != nil {
+			return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
+		}
+		ui.httpTransport = tr
 
 		byAuthToken[at1] = ui
 		byAuthToken[at2] = ui
diff --git a/app/vmauth/auth_config_test.go b/app/vmauth/auth_config_test.go
index bf9902e2be..8f24b60304 100644
--- a/app/vmauth/auth_config_test.go
+++ b/app/vmauth/auth_config_test.go
@@ -221,6 +221,7 @@ func TestParseAuthConfigSuccess(t *testing.T) {
 	}
 
 	// Single user
+	insecureSkipVerifyTrue := true
 	f(`
 users:
 - username: foo
@@ -234,11 +235,12 @@ users:
 			Password:              "bar",
 			URLPrefix:             mustParseURL("http://aaa:343/bbb"),
 			MaxConcurrentRequests: 5,
-			TLSInsecureSkipVerify: true,
+			TLSInsecureSkipVerify: &insecureSkipVerifyTrue,
 		},
 	})
 
 	// Multiple url_prefix entries
+	insecureSkipVerifyFalse := false
 	f(`
 users:
 - username: foo
@@ -246,6 +248,7 @@ users:
   url_prefix:
   - http://node1:343/bbb
   - http://node2:343/bbb
+  tls_insecure_skip_verify: false
 `, map[string]*UserInfo{
 		getAuthToken("", "foo", "bar"): {
 			Username: "foo",
@@ -254,6 +257,7 @@ users:
 				"http://node1:343/bbb",
 				"http://node2:343/bbb",
 			}),
+			TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
 		},
 	})
 
@@ -475,15 +479,20 @@ unauthorized_user:
 	}
 
 	ui := m[getAuthToken("", "foo", "bar")]
-	if ui.TLSInsecureSkipVerify != true || ui.httpTransport.TLSClientConfig.InsecureSkipVerify != true {
+	if !isSetBool(ui.TLSInsecureSkipVerify, true) || !ui.httpTransport.TLSClientConfig.InsecureSkipVerify {
 		t.Fatalf("unexpected TLSInsecureSkipVerify value for user foo")
 	}
 
-	if ac.UnauthorizedUser.TLSInsecureSkipVerify != false ||
-		ac.UnauthorizedUser.httpTransport.TLSClientConfig.InsecureSkipVerify != false {
+	if !isSetBool(ac.UnauthorizedUser.TLSInsecureSkipVerify, false) || ac.UnauthorizedUser.httpTransport.TLSClientConfig.InsecureSkipVerify {
 		t.Fatalf("unexpected TLSInsecureSkipVerify value for unauthorized_user")
 	}
+}
 
+func isSetBool(boolP *bool, expectedValue bool) bool {
+	if boolP == nil {
+		return false
+	}
+	return *boolP == expectedValue
 }
 
 func getSrcPaths(paths []string) []*SrcPath {
diff --git a/app/vmauth/main.go b/app/vmauth/main.go
index 3d3a94c21d..75bd8a4cb0 100644
--- a/app/vmauth/main.go
+++ b/app/vmauth/main.go
@@ -3,6 +3,7 @@ package main
 import (
 	"context"
 	"crypto/tls"
+	"crypto/x509"
 	"errors"
 	"flag"
 	"fmt"
@@ -20,8 +21,10 @@ import (
 
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
@@ -48,6 +51,10 @@ var (
 	failTimeout               = flag.Duration("failTimeout", 3*time.Second, "Sets a delay period for load balancing to skip a malfunctioning backend")
 	maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size, which can be cached and re-tried at other backends. "+
 		"Bigger values may require more memory")
+	backendTLSInsecureSkipVerify = flag.Bool("backend.tlsInsecureSkipVerify", false, "Whether to skip TLS verification when connecting to backends over HTTPS. "+
+		"See https://docs.victoriametrics.com/vmauth.html#backend-tls-setup")
+	backendTLSCAFile = flag.String("backend.TLSCAFile", "", "Optional path to TLS root CA file, which is used for TLS verification when connecting to backends over HTTPS. "+
+		"See https://docs.victoriametrics.com/vmauth.html#backend-tls-setup")
 )
 
 func main() {
@@ -354,7 +361,48 @@ var (
 	missingRouteRequests     = metrics.NewCounter(`vmauth_http_request_errors_total{reason="missing_route"}`)
 )
 
-func getTransport(skipTLSVerification bool) *http.Transport {
+func getTransport(insecureSkipVerifyP *bool, caFile string) (*http.Transport, error) {
+	if insecureSkipVerifyP == nil {
+		insecureSkipVerifyP = backendTLSInsecureSkipVerify
+	}
+	insecureSkipVerify := *insecureSkipVerifyP
+	if caFile == "" {
+		caFile = *backendTLSCAFile
+	}
+
+	bb := bbPool.Get()
+	defer bbPool.Put(bb)
+
+	bb.B = appendTransportKey(bb.B[:0], insecureSkipVerify, caFile)
+
+	transportMapLock.Lock()
+	defer transportMapLock.Unlock()
+
+	tr := transportMap[string(bb.B)]
+	if tr == nil {
+		trLocal, err := newTransport(insecureSkipVerify, caFile)
+		if err != nil {
+			return nil, err
+		}
+		transportMap[string(bb.B)] = trLocal
+		tr = trLocal
+	}
+
+	return tr, nil
+}
+
+var transportMap = make(map[string]*http.Transport)
+var transportMapLock sync.Mutex
+
+func appendTransportKey(dst []byte, insecureSkipVerify bool, caFile string) []byte {
+	dst = encoding.MarshalBool(dst, insecureSkipVerify)
+	dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(caFile))
+	return dst
+}
+
+var bbPool bytesutil.ByteBufferPool
+
+func newTransport(insecureSkipVerify bool, caFile string) (*http.Transport, error) {
 	tr := http.DefaultTransport.(*http.Transport).Clone()
 	tr.ResponseHeaderTimeout = *responseTimeout
 	// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
@@ -365,11 +413,27 @@ func getTransport(skipTLSVerification bool) *http.Transport {
 	if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
 		tr.MaxIdleConns = tr.MaxIdleConnsPerHost
 	}
-	if tr.TLSClientConfig == nil {
-		tr.TLSClientConfig = &tls.Config{}
+	tlsCfg := tr.TLSClientConfig
+	if tlsCfg == nil {
+		tlsCfg = &tls.Config{}
+		tr.TLSClientConfig = tlsCfg
 	}
-	tr.TLSClientConfig.InsecureSkipVerify = skipTLSVerification
-	return tr
+	if insecureSkipVerify || caFile != "" {
+		tlsCfg.ClientSessionCache = tls.NewLRUClientSessionCache(0)
+		tlsCfg.InsecureSkipVerify = insecureSkipVerify
+		if caFile != "" {
+			data, err := fs.ReadFileOrHTTP(caFile)
+			if err != nil {
+				return nil, fmt.Errorf("cannot read tls_ca_file: %w", err)
+			}
+			rootCA := x509.NewCertPool()
+			if !rootCA.AppendCertsFromPEM(data) {
+				return nil, fmt.Errorf("cannot parse data read from tls_ca_file %q", caFile)
+			}
+			tlsCfg.RootCAs = rootCA
+		}
+	}
+	return tr, nil
 }
 
 var (
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index b2012f6ff0..c25373d120 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -79,7 +79,7 @@ The sandbox cluster installation is running under the constant load generated by
 * FEATURE: [vmalert-tool](https://docs.victoriametrics.com/#vmalert-tool): add `unittest` command to run unittest for alerting and recording rules. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4789) for details.
 * FEATURE: dashboards/vmalert: add new panel `Missed evaluations` for indicating alerting groups that miss their evaluations.
 * FEATURE: all: track requests with wrong auth key and wrong basic auth at `vm_http_request_errors_total` [metric](https://docs.victoriametrics.com/#monitoring) with `reason="wrong_auth_key"` and `reason="wrong_basic_auth"`. See [this issue](https://github.com/victoriaMetrics/victoriaMetrics/issues/4590). Thanks to @venkatbvc for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5166).
-* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add `tls_insecure_skip_verify` parameter which can be set on a per-user level to disable TLS verification for backend connections. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5240).
+* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to skip TLS verification and to specify TLS Root CA when connecting to backends. See [these docs](https://docs.victoriametrics.com/vmauth.html#backend-tls-setup) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5240).
 * FEATURE: `vmstorage`: add `-blockcache.missesBeforeCaching` command-line flag, which can be used for fine-tuning RAM usage for `indexdb/dataBlocks` cache when queries touching big number of time series are executed.
 * FEATURE: add `-loggerMaxArgLen` command-line flag for fine-tuning the maximum lengths of logged args.
 
diff --git a/docs/vmauth.md b/docs/vmauth.md
index a156742b8c..a2ea738911 100644
--- a/docs/vmauth.md
+++ b/docs/vmauth.md
@@ -112,6 +112,31 @@ The following [metrics](#monitoring) related to concurrency limits are exposed b
 - `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
   because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
 
+## Backend TLS setup
+
+By default `vmauth` uses system settings when performing requests to HTTPS backends specified via `url_prefix` option
+in the [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config). These settings can be overridden with the following command-line flags:
+
+- `backend.tlsInsecureSkipVerify` allows skipping TLS verification when connecting to HTTPS backends.
+  This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
+  via `tls_insecure_skip_verify` option. For example:
+
+  ```yml
+  - username: "foo"
+    url_prefix: "https://localhost"
+    tls_insecure_skip_verify: true
+  ```
+
+- `backend.tlsCAFile` allows specifying the path to TLS Root CA, which will be used for TLS verification when connecting to HTTPS backends.
+  The `backend.tlsCAFile` may point either to local file or to `http` / `https` url.
+  This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
+  via `tls_ca_file` option. For example:
+
+  ```yml
+  - username: "foo"
+    url_prefix: "https://localhost"
+    tls_ca_file: "/path/to/tls/root/ca"
+  ```
 
 ## IP filters