From 85f60237e25d324819f4fd23a63274660f5bd560 Mon Sep 17 00:00:00 2001
From: Zhu Jiekun <jiekun@victoriametrics.com>
Date: Fri, 18 Oct 2024 19:41:43 +0800
Subject: [PATCH] vmstorage: auto calculate maxUniqueTimeseries based on
 resources (#6961)

### Describe Your Changes

Add support for
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6930

Calculate `-search.maxUniqueTimeseries` by
`-search.maxConcurrentRequests` and remaining memory if it's **not set**
or **less equal than 0**.

The remaining memory is affected by `-memory.allowedPercent`,
`-memory.allowedBytes` and cgroup memory limit.
### Checklist

The following checks are **mandatory**:

- [x] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>
---
 app/vmselect/prometheus/prometheus.go  |  3 +-
 app/vmstorage/main.go                  |  4 +++
 app/vmstorage/servers/vmselect.go      | 42 ++++++++++++++++++++++++--
 app/vmstorage/servers/vmselect_test.go | 33 ++++++++++++++++++++
 docs/Cluster-VictoriaMetrics.md        | 14 +++++----
 docs/README.md                         |  3 +-
 docs/changelog/CHANGELOG.md            |  4 +++
 lib/storage/index_db.go                |  2 +-
 8 files changed, 93 insertions(+), 12 deletions(-)
 create mode 100644 app/vmstorage/servers/vmselect_test.go

diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go
index 26f18991e..a72bd1243 100644
--- a/app/vmselect/prometheus/prometheus.go
+++ b/app/vmselect/prometheus/prometheus.go
@@ -53,7 +53,8 @@ var (
 		"points with timestamps closer than -search.latencyOffset to the current time. The adjustment is needed because such points may contain incomplete data")
 	selectNodes = flagutil.NewArrayString("selectNode", "Comma-separated addresses of vmselect nodes; usage: -selectNode=vmselect-host1,...,vmselect-hostN")
 
-	maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 300e3, "The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage")
+	maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage. "+
+		"The limit can't exceed the corresponding -search.maxUniqueTimeseries limit on vmstorage, it can be only set to lower values.")
 	maxFederateSeries   = flag.Int("search.maxFederateSeries", 1e6, "The maximum number of time series, which can be returned from /federate. This option allows limiting memory usage")
 	maxExportSeries     = flag.Int("search.maxExportSeries", 10e6, "The maximum number of time series, which can be returned from /api/v1/export* APIs. This option allows limiting memory usage")
 	maxTSDBStatusSeries = flag.Int("search.maxTSDBStatusSeries", 10e6, "The maximum number of time series, which can be processed during the call to /api/v1/status/tsdb. This option allows limiting memory usage")
diff --git a/app/vmstorage/main.go b/app/vmstorage/main.go
index 2403860d0..ad1fefe09 100644
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -122,6 +122,8 @@ func main() {
 	metrics.RegisterSet(storageMetrics)
 
 	common.StartUnmarshalWorkers()
+
+	servers.GetMaxUniqueTimeSeries() // for init and logging only.
 	vminsertSrv, err := servers.NewVMInsertServer(*vminsertAddr, strg)
 	if err != nil {
 		logger.Fatalf("cannot create a server with -vminsertAddr=%s: %s", *vminsertAddr, err)
@@ -565,6 +567,8 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
 
 	metrics.WriteGaugeUint64(w, `vm_downsampling_partitions_scheduled`, tm.ScheduledDownsamplingPartitions)
 	metrics.WriteGaugeUint64(w, `vm_downsampling_partitions_scheduled_size_bytes`, tm.ScheduledDownsamplingPartitionsSize)
+
+	metrics.WriteGaugeUint64(w, `vm_search_max_unique_timeseries`, uint64(servers.GetMaxUniqueTimeSeries()))
 }
 
 func jsonResponseError(w http.ResponseWriter, err error) {
diff --git a/app/vmstorage/servers/vmselect.go b/app/vmstorage/servers/vmselect.go
index 5a30851a1..08cb5589e 100644
--- a/app/vmstorage/servers/vmselect.go
+++ b/app/vmstorage/servers/vmselect.go
@@ -10,14 +10,17 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
 )
 
 var (
-	maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. Zero means 'no limit'. See also -search.max* command-line flags at vmselect")
-	maxTagKeys          = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search. "+
+	maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be scanned during every query. "+
+		"This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect")
+	maxTagKeys = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search. "+
 		"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
 	maxTagValues = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search. "+
 		"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
@@ -35,6 +38,11 @@ var (
 		"This may be useful when multiple data sources with distinct retentions are hidden behind query-tee")
 )
 
+var (
+	maxUniqueTimeseriesValue     int
+	maxUniqueTimeseriesValueOnce sync.Once
+)
+
 // NewVMSelectServer starts new server at the given addr, which serves vmselect requests from the given s.
 func NewVMSelectServer(addr string, s *storage.Storage) (*vmselectapi.Server, error) {
 	api := &vmstorageAPI{
@@ -249,10 +257,38 @@ func getMaxMetrics(sq *storage.SearchQuery) int {
 	maxMetrics := sq.MaxMetrics
 	maxMetricsLimit := *maxUniqueTimeseries
 	if maxMetricsLimit <= 0 {
-		maxMetricsLimit = 2e9
+		maxMetricsLimit = GetMaxUniqueTimeSeries()
 	}
 	if maxMetrics <= 0 || maxMetrics > maxMetricsLimit {
 		maxMetrics = maxMetricsLimit
 	}
 	return maxMetrics
 }
+
+// GetMaxUniqueTimeSeries returns the max metrics limit calculated by available resources.
+// The calculation is split into calculateMaxUniqueTimeSeriesForResource for unit testing.
+func GetMaxUniqueTimeSeries() int {
+	maxUniqueTimeseriesValueOnce.Do(func() {
+		maxUniqueTimeseriesValue = *maxUniqueTimeseries
+		if maxUniqueTimeseriesValue <= 0 {
+			maxUniqueTimeseriesValue = calculateMaxUniqueTimeSeriesForResource(*maxConcurrentRequests, memory.Remaining())
+		}
+	})
+	return maxUniqueTimeseriesValue
+}
+
+// calculateMaxUniqueTimeSeriesForResource calculate the max metrics limit calculated by available resources.
+func calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequests, remainingMemory int) int {
+	if maxConcurrentRequests <= 0 {
+		// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
+		// In such cases, fallback to unlimited.
+		logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
+		return 2e9
+	}
+
+	// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
+	// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
+	mts := remainingMemory / 200 / maxConcurrentRequests
+	logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
+	return mts
+}
diff --git a/app/vmstorage/servers/vmselect_test.go b/app/vmstorage/servers/vmselect_test.go
new file mode 100644
index 000000000..5e0b5c4ec
--- /dev/null
+++ b/app/vmstorage/servers/vmselect_test.go
@@ -0,0 +1,33 @@
+package servers
+
+import (
+	"math"
+	"runtime"
+	"testing"
+)
+
+func TestCalculateMaxMetricsLimitByResource(t *testing.T) {
+	f := func(maxConcurrentRequest, remainingMemory, expect int) {
+		t.Helper()
+		maxMetricsLimit := calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequest, remainingMemory)
+		if maxMetricsLimit != expect {
+			t.Fatalf("unexpected max metrics limit: got %d, want %d", maxMetricsLimit, expect)
+		}
+	}
+
+	// Skip when GOARCH=386
+	if runtime.GOARCH != "386" {
+		// 8 CPU & 32 GiB
+		f(16, int(math.Round(32*1024*1024*1024*0.4)), 4294967)
+		// 4 CPU & 32 GiB
+		f(8, int(math.Round(32*1024*1024*1024*0.4)), 8589934)
+	}
+
+	// 2 CPU & 4 GiB
+	f(4, int(math.Round(4*1024*1024*1024*0.4)), 2147483)
+
+	// other edge cases
+	f(0, int(math.Round(4*1024*1024*1024*0.4)), 2e9)
+	f(4, 0, 0)
+
+}
diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md
index 2d49a76a6..e3a59bfa7 100644
--- a/docs/Cluster-VictoriaMetrics.md
+++ b/docs/Cluster-VictoriaMetrics.md
@@ -711,10 +711,12 @@ Some workloads may need fine-grained resource usage limits. In these cases the f
   Queries, which need more memory, are rejected. Heavy queries, which select big number of time series,
   may exceed the per-query memory limit by a small percent. The total memory limit for concurrently executed queries can be estimated
   as `-search.maxMemoryPerQuery` multiplied by `-search.maxConcurrentRequests`.
-- `-search.maxUniqueTimeseries` at `vmselect` component limits the number of unique time series a single query can find and process.
-  `vmselect` passes the limit to `vmstorage` component, which keeps in memory some metainformation about the time series located
-  by each query and spends some CPU time for processing the found time series. This means that the maximum memory usage and CPU usage
-  a single query can use at `vmstorage` is proportional to `-search.maxUniqueTimeseries`.
+- `-search.maxUniqueTimeseries` at `vmstorage` component limits the number of unique time series a single query can find and process.
+  This means that the maximum memory usage and CPU usage a single query can use at `vmstorage` is proportional to `-search.maxUniqueTimeseries`.
+  By default, `vmstorage` calculates this limit automatically based on the available memory and the maximum number of concurrent read requests (see `-search.maxConcurrentRequests`).
+  The calculated limit will be printed during process start-up logs and exposed as `vm_search_max_unique_timeseries` metric.
+- `-search.maxUniqueTimeseries` at `vmselect` adjusts the limit with the same name at `vmstorage`. The vmstorage limit can be adjusted
+  only to **lower value** and can't exceed it. By default, vmselect doesn't apply limit adjustments.
 - `-search.maxQueryDuration` at `vmselect` limits the duration of a single query. If the query takes longer than the given duration, then it is canceled.
   This allows saving CPU and RAM at `vmselect` and `vmstorage` when executing unexpectedly heavy queries.
   The limit can be altered for each query by passing `timeout` GET parameter, but can't exceed the limit specified via `-search.maxQueryDuration` command-line flag.
@@ -1620,7 +1622,7 @@ Below is the output for `/path/to/vmselect -help`:
   -search.maxTagValueSuffixesPerSearch int
      The maximum number of tag value suffixes returned from /metrics/find (default 100000)
   -search.maxUniqueTimeseries int
-     The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage (default 300000)
+     The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage. The limit can't exceed the corresponding -search.maxUniqueTimeseries limit on vmstorage, it can be only set to lower values. (default 0)
   -search.maxWorkersPerQuery int
      The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 16)
   -search.minStalenessInterval duration
@@ -1894,7 +1896,7 @@ Below is the output for `/path/to/vmstorage -help`:
   -search.maxTagValues int
      The maximum number of tag values returned per search. See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
   -search.maxUniqueTimeseries int
-     The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. Zero means 'no limit'. See also -search.max* command-line flags at vmselect
+     The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect.
   -smallMergeConcurrency int
      Deprecated: this flag does nothing
   -snapshotAuthKey value
diff --git a/docs/README.md b/docs/README.md
index 06b6a41e3..b7d394c53 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1704,7 +1704,8 @@ By default, VictoriaMetrics is tuned for an optimal resource usage under typical
 - `-search.maxMemoryPerQuery` limits the amounts of memory, which can be used for processing a single query. Queries, which need more memory, are rejected.
   Heavy queries, which select big number of time series, may exceed the per-query memory limit by a small percent. The total memory limit
   for concurrently executed queries can be estimated as `-search.maxMemoryPerQuery` multiplied by `-search.maxConcurrentRequests`.
-- `-search.maxUniqueTimeseries` limits the number of unique time series a single query can find and process. VictoriaMetrics keeps in memory
+- `-search.maxUniqueTimeseries` limits the number of unique time series a single query can find and process. By default, VictoriaMetrics calculates the limit automatically 
+  based on the available memory and the maximum number of concurrent requests it can process (see `-search.maxConcurrentRequests`). VictoriaMetrics keeps in memory
   some metainformation about the time series located by each query and spends some CPU time for processing the found time series.
   This means that the maximum memory usage and CPU usage a single query can use is proportional to `-search.maxUniqueTimeseries`.
 - `-search.maxQueryDuration` limits the duration of a single query. If the query takes longer than the given duration, then it is canceled.
diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md
index 55c42b646..d7c6e12eb 100644
--- a/docs/changelog/CHANGELOG.md
+++ b/docs/changelog/CHANGELOG.md
@@ -18,6 +18,8 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
 
 ## tip
 
+**Update note 1: `-search.maxUniqueTimeseries` limit on `vmselect` can no longer exceed `-search.maxUniqueTimeseries` limit on `vmstorage`. If you don't set this flag at `vmstorage`, then it will be automatically calculated based on available resources. This can result into rejecting expensive read queries if they exceed auto-calculated limit. The limit can be overriden by manually setting `-search.maxUniqueTimeseries` at vmstorage, but for better reliability we recommend sticking to default values. Refer to the CHANGELOG below and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6930).**
+
 * FEATURE: add Darwin binaries for [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/) to the release flow. The binaries will be available in the new release.
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): allow using HTTP/2 client for Kubernetes service discovery if `-promscrape.kubernetes.useHTTP2Client` cmd-line flag is set. This could help to reduce the amount of opened connections to the Kubernetes API server. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5971) for the details.
 * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): `-rule` cmd-line flag now supports multi-document YAML files. This could be useful when rules are retrieved via HTTP URL where multiple rule files were merged together in one response. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6753). Thanks to @Irene-123 for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6995).
@@ -25,6 +27,8 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
 * FEATURE: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/), `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/) and [vmagent](https://docs.victoriametrics.com/vmagent/): disable stream processing mode for data [ingested via InfluxDB](https://docs.victoriametrics.com/#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) HTTP endpoints by default. With this change, the data is processed in batches (see `-influx.maxRequestSize`) and user will get parsing errors immediately as they happen. This also improves users' experience and resiliency against thundering herd problems caused by clients without backoff policies like telegraf. To enable stream mode back, pass HTTP header `Stream-Mode: "1"` with each request. For data sent via TCP and UDP (see `-influxListenAddr`) protocols streaming processing remains enabled.
 * FEATURE: [vmgateway](https://docs.victoriametrics.com/vmgateway/): allow parsing `scope` claim parsing in array format. This is useful for cases when identity provider does encode claims in array format.
 * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add the ability to cancel running queries. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7097).
+* FEATURE: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): automatically set `-search.maxUniqueTimeseries` limit based on available memory and `-search.maxConcurrentRequests`. The more memory is available to the process and the lower is `-search.maxConcurrentRequests`, the higher will be `-search.maxUniqueTimeseries` limit. This should protect vmstorage from expensive queries without the need to manually set `-search.maxUniqueTimeseries`. The calculated limit will be printed during process start-up logs and exposed as `vm_search_max_unique_timeseries` metric. Set `-search.maxUniqueTimeseries` manually to override auto calculation. Please note, `-search.maxUniqueTimeseries` on vmselect can't exceed the same name limit on vmstorage, it can only be set to lower values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6930).
+* FEATURE: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): set default value for `-search.maxUniqueTimeseries` to `0`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6930).
 
 * BUGFIX: [vmgateway](https://docs.victoriametrics.com/vmgateway/): fix possible panic during parsing of a token without `vm_access` claim. This issue was introduced in v1.104.0.
 * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix error messages rendering from overflowing the screen with long messages. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7207).
diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go
index 819206df8..2877b0e5c 100644
--- a/lib/storage/index_db.go
+++ b/lib/storage/index_db.go
@@ -2426,7 +2426,7 @@ func (is *indexSearch) searchMetricIDs(qt *querytracer.Tracer, tfss []*TagFilter
 
 func errTooManyTimeseries(maxMetrics int) error {
 	return fmt.Errorf("the number of matching timeseries exceeds %d; "+
-		"either narrow down the search or increase -search.max* command-line flag values at vmselect "+
+		"either narrow down the search or increase -search.max* command-line flag values "+
 		"(the most likely limit is -search.maxUniqueTimeseries); "+
 		"see https://docs.victoriametrics.com/#resource-usage-limits", maxMetrics)
 }