From a19a4f34ff76be45712064851809709007e105e4 Mon Sep 17 00:00:00 2001 From: Will Jordan Date: Wed, 20 Nov 2024 09:36:06 -0800 Subject: [PATCH] lib/tenantmetrics: improves CounterMap performance with large numbers of tenants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, map for storing tenant metrics was re-created to each newly ingested tenant. It has significant performance impact for systems with large number of tenants. This commit addresses this issue by changing algorithm of creating tenant metric records at map. Instead of map re-creation, it uses `sync.Map` primitive. Benchmark results: ``` goos: linux goarch: amd64 pkg: github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics cpu: AMD Ryzen 9 5900X 12-Core Processor │ lib/tenantmetrics/orig.bench │ lib/tenantmetrics/new.bench │ │ sec/op │ sec/op vs base │ CounterMapGrowth/n=100,nProcs=GOMAXPROCS-24 1943.2µ ± 5% 248.0µ ± 11% -87.24% (p=0.001 n=7) CounterMapGrowth/n=100-24 434.63µ ± 5% 98.82µ ± 16% -77.26% (p=0.001 n=7) CounterMapGrowth/n=1000-24 32.719m ± 20% 1.425m ± 5% -95.65% (p=0.001 n=7) CounterMapGrowth/n=10000-24 3653.60m ± 5% 18.00m ± 2% -99.51% (p=0.001 n=7) geomean 17.83m 890.4µ -95.00% ``` Related issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7482 --- Co-authored-by: Artem Fetishev --- docs/changelog/CHANGELOG.md | 1 + lib/tenantmetrics/counter_map.go | 51 +++++++++++++------- lib/tenantmetrics/counter_map_timing_test.go | 46 ++++++++++++++++++ 3 files changed, 80 insertions(+), 18 deletions(-) create mode 100644 lib/tenantmetrics/counter_map_timing_test.go diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md index 538325557..cfa1c634c 100644 --- a/docs/changelog/CHANGELOG.md +++ b/docs/changelog/CHANGELOG.md @@ -40,6 +40,7 @@ Released at 2024-11-15 * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Optimize resources usage for configured [downsampling](https://docs.victoriametrics.com/#downsampling) with time-series filter. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7440) for details. * BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly return query results for search requests after index rotation. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7417) for details. * BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly handle [multitenant](https://docs.victoriametrics.com/cluster-victoriametrics/#multitenancy-via-labels) query request errors and correctly perform search for available tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7549) for details. +* BUGFIX: `vmagent`, `vminsert` and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): fix a performance issue with tenant metrics counters across large numbers of tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7482) for details. ## [v1.102.7](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.7) diff --git a/lib/tenantmetrics/counter_map.go b/lib/tenantmetrics/counter_map.go index cc47c423c..b630d25ff 100644 --- a/lib/tenantmetrics/counter_map.go +++ b/lib/tenantmetrics/counter_map.go @@ -2,6 +2,7 @@ package tenantmetrics import ( "fmt" + "sync" "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" @@ -19,17 +20,16 @@ type TenantID struct { type CounterMap struct { metric string - // do not use atomic.Pointer, since the stored map there is already a pointer type. - m atomic.Value + m sync.Map + // mt holds value for multi-tenant metrics. + mt atomic.Value } // NewCounterMap creates new CounterMap for the given metric. func NewCounterMap(metric string) *CounterMap { - cm := &CounterMap{ + return &CounterMap{ metric: metric, } - cm.m.Store(make(map[TenantID]*metrics.Counter)) - return cm } // Get returns counter for the given at @@ -38,7 +38,7 @@ func (cm *CounterMap) Get(at *auth.Token) *metrics.Counter { AccountID: at.AccountID, ProjectID: at.ProjectID, } - return cm.GetByTenant(key) + return cm.GetByTenant(&key) } // MultiAdd adds multiple values grouped by auth.Token @@ -49,22 +49,25 @@ func (cm *CounterMap) MultiAdd(perTenantValues map[auth.Token]int) { } // GetByTenant returns counter for the given key. -func (cm *CounterMap) GetByTenant(key TenantID) *metrics.Counter { - m := cm.m.Load().(map[TenantID]*metrics.Counter) - if c := m[key]; c != nil { - // Fast path - the counter for k already exists. - return c +func (cm *CounterMap) GetByTenant(key *TenantID) *metrics.Counter { + if key == nil { + mtm := cm.mt.Load() + if mtm == nil { + mtc := metrics.GetOrCreateCounter(createMetricNameMultitenant(cm.metric)) + cm.mt.Store(mtc) + return mtc + } + return mtm.(*metrics.Counter) } - // Slow path - create missing counter for k and re-create m. - newM := make(map[TenantID]*metrics.Counter, len(m)+1) - for k, c := range m { - newM[k] = c + if counter, ok := cm.m.Load(*key); ok { + return counter.(*metrics.Counter) } - metricName := createMetricName(cm.metric, key) + + // Slow path - create missing counter for k. + metricName := createMetricName(cm.metric, *key) c := metrics.GetOrCreateCounter(metricName) - newM[key] = c - cm.m.Store(newM) + cm.m.Store(*key, c) return c } @@ -79,3 +82,15 @@ func createMetricName(metric string, key TenantID) string { // Metric with labels. return fmt.Sprintf(`%s,accountID="%d",projectID="%d"}`, metric[:len(metric)-1], key.AccountID, key.ProjectID) } + +func createMetricNameMultitenant(metric string) string { + if len(metric) == 0 { + logger.Panicf("BUG: metric cannot be empty") + } + if metric[len(metric)-1] != '}' { + // Metric without labels. + return fmt.Sprintf(`%s{accountID="multitenant",projectID="multitenant"}`, metric) + } + // Metric with labels. + return fmt.Sprintf(`%s,accountID="multitenant",projectID="multitenant"}`, metric[:len(metric)-1]) +} diff --git a/lib/tenantmetrics/counter_map_timing_test.go b/lib/tenantmetrics/counter_map_timing_test.go new file mode 100644 index 000000000..51eb9618b --- /dev/null +++ b/lib/tenantmetrics/counter_map_timing_test.go @@ -0,0 +1,46 @@ +package tenantmetrics + +import ( + "runtime" + "sync" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" +) + +func BenchmarkCounterMapGrowth(b *testing.B) { + f := func(b *testing.B, numTenants uint32, nProcs int) { + b.Helper() + + for i := 0; i < b.N; i++ { + cm := NewCounterMap("foobar") + var wg sync.WaitGroup + for range nProcs { + wg.Add(1) + go func() { + for i := range numTenants { + cm.Get(&auth.Token{AccountID: i, ProjectID: i}).Inc() + } + wg.Done() + }() + } + wg.Wait() + } + } + + b.Run("n=100,nProcs=GOMAXPROCS", func(b *testing.B) { + f(b, 100, runtime.GOMAXPROCS(0)) + }) + + b.Run("n=100,nProcs=2", func(b *testing.B) { + f(b, 100, 2) + }) + + b.Run("n=1000,nProcs=2", func(b *testing.B) { + f(b, 1000, 2) + }) + + b.Run("n=10000,nProcs=2", func(b *testing.B) { + f(b, 10000, 2) + }) +}