From 209a5024ce15014504f3762b1269dd27c3d0fc63 Mon Sep 17 00:00:00 2001 From: Will Jordan Date: Wed, 20 Nov 2024 09:36:06 -0800 Subject: [PATCH] lib/tenantmetrics: improves CounterMap performance with large numbers of tenants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, map for storing tenant metrics was re-created to each newly ingested tenant. It has significant performance impact for systems with large number of tenants. This commit addresses this issue by changing algorithm of creating tenant metric records at map. Instead of map re-creation, it uses `sync.Map` primitive. Benchmark results: ``` goos: linux goarch: amd64 pkg: github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics cpu: AMD Ryzen 9 5900X 12-Core Processor │ lib/tenantmetrics/orig.bench │ lib/tenantmetrics/new.bench │ │ sec/op │ sec/op vs base │ CounterMapGrowth/n=100,nProcs=GOMAXPROCS-24 1943.2µ ± 5% 248.0µ ± 11% -87.24% (p=0.001 n=7) CounterMapGrowth/n=100-24 434.63µ ± 5% 98.82µ ± 16% -77.26% (p=0.001 n=7) CounterMapGrowth/n=1000-24 32.719m ± 20% 1.425m ± 5% -95.65% (p=0.001 n=7) CounterMapGrowth/n=10000-24 3653.60m ± 5% 18.00m ± 2% -99.51% (p=0.001 n=7) geomean 17.83m 890.4µ -95.00% ``` Related issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7482 --- Co-authored-by: Artem Fetishev --- docs/changelog/CHANGELOG.md | 1 + lib/tenantmetrics/counter_map.go | 23 +++------- lib/tenantmetrics/counter_map_timing_test.go | 46 ++++++++++++++++++++ 3 files changed, 54 insertions(+), 16 deletions(-) create mode 100644 lib/tenantmetrics/counter_map_timing_test.go diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md index 1d46ada138..8aa34d61ad 100644 --- a/docs/changelog/CHANGELOG.md +++ b/docs/changelog/CHANGELOG.md @@ -38,6 +38,7 @@ Released at 2024-11-15 * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Optimize resources usage for configured [downsampling](https://docs.victoriametrics.com/#downsampling) with time-series filter. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7440) for details. * BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly return query results for search requests after index rotation. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7417) for details. * BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly handle [multitenant](https://docs.victoriametrics.com/cluster-victoriametrics/#multitenancy-via-labels) query request errors and correctly perform search for available tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7549) for details. +* BUGFIX: `vmagent`, `vminsert` and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): fix a performance issue with tenant metrics counters across large numbers of tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7482) for details. ## [v1.102.7](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.7) diff --git a/lib/tenantmetrics/counter_map.go b/lib/tenantmetrics/counter_map.go index 9b8759779b..b2daf831b9 100644 --- a/lib/tenantmetrics/counter_map.go +++ b/lib/tenantmetrics/counter_map.go @@ -2,6 +2,7 @@ package tenantmetrics import ( "fmt" + "sync" "sync/atomic" "github.com/VictoriaMetrics/metrics" @@ -20,19 +21,16 @@ type TenantID struct { type CounterMap struct { metric string - // do not use atomic.Pointer, since the stored map there is already a pointer type. - m atomic.Value + m sync.Map // mt holds value for multi-tenant metrics. mt atomic.Value } // NewCounterMap creates new CounterMap for the given metric. func NewCounterMap(metric string) *CounterMap { - cm := &CounterMap{ + return &CounterMap{ metric: metric, } - cm.m.Store(make(map[TenantID]*metrics.Counter)) - return cm } // Get returns counter for the given at @@ -67,21 +65,14 @@ func (cm *CounterMap) GetByTenant(key *TenantID) *metrics.Counter { return mtm.(*metrics.Counter) } - m := cm.m.Load().(map[TenantID]*metrics.Counter) - if c := m[*key]; c != nil { - // Fast path - the counter for k already exists. - return c + if counter, ok := cm.m.Load(*key); ok { + return counter.(*metrics.Counter) } - // Slow path - create missing counter for k and re-create m. - newM := make(map[TenantID]*metrics.Counter, len(m)+1) - for k, c := range m { - newM[k] = c - } + // Slow path - create missing counter for k. metricName := createMetricName(cm.metric, *key) c := metrics.GetOrCreateCounter(metricName) - newM[*key] = c - cm.m.Store(newM) + cm.m.Store(*key, c) return c } diff --git a/lib/tenantmetrics/counter_map_timing_test.go b/lib/tenantmetrics/counter_map_timing_test.go new file mode 100644 index 0000000000..51eb9618b8 --- /dev/null +++ b/lib/tenantmetrics/counter_map_timing_test.go @@ -0,0 +1,46 @@ +package tenantmetrics + +import ( + "runtime" + "sync" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" +) + +func BenchmarkCounterMapGrowth(b *testing.B) { + f := func(b *testing.B, numTenants uint32, nProcs int) { + b.Helper() + + for i := 0; i < b.N; i++ { + cm := NewCounterMap("foobar") + var wg sync.WaitGroup + for range nProcs { + wg.Add(1) + go func() { + for i := range numTenants { + cm.Get(&auth.Token{AccountID: i, ProjectID: i}).Inc() + } + wg.Done() + }() + } + wg.Wait() + } + } + + b.Run("n=100,nProcs=GOMAXPROCS", func(b *testing.B) { + f(b, 100, runtime.GOMAXPROCS(0)) + }) + + b.Run("n=100,nProcs=2", func(b *testing.B) { + f(b, 100, 2) + }) + + b.Run("n=1000,nProcs=2", func(b *testing.B) { + f(b, 1000, 2) + }) + + b.Run("n=10000,nProcs=2", func(b *testing.B) { + f(b, 10000, 2) + }) +}