diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md index 538325557..cfa1c634c 100644 --- a/docs/changelog/CHANGELOG.md +++ b/docs/changelog/CHANGELOG.md @@ -40,6 +40,7 @@ Released at 2024-11-15 * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Optimize resources usage for configured [downsampling](https://docs.victoriametrics.com/#downsampling) with time-series filter. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7440) for details. * BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly return query results for search requests after index rotation. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7417) for details. * BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly handle [multitenant](https://docs.victoriametrics.com/cluster-victoriametrics/#multitenancy-via-labels) query request errors and correctly perform search for available tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7549) for details. +* BUGFIX: `vmagent`, `vminsert` and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): fix a performance issue with tenant metrics counters across large numbers of tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7482) for details. ## [v1.102.7](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.7) diff --git a/lib/tenantmetrics/counter_map.go b/lib/tenantmetrics/counter_map.go index cc47c423c..b630d25ff 100644 --- a/lib/tenantmetrics/counter_map.go +++ b/lib/tenantmetrics/counter_map.go @@ -2,6 +2,7 @@ package tenantmetrics import ( "fmt" + "sync" "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" @@ -19,17 +20,16 @@ type TenantID struct { type CounterMap struct { metric string - // do not use atomic.Pointer, since the stored map there is already a pointer type. - m atomic.Value + m sync.Map + // mt holds value for multi-tenant metrics. + mt atomic.Value } // NewCounterMap creates new CounterMap for the given metric. func NewCounterMap(metric string) *CounterMap { - cm := &CounterMap{ + return &CounterMap{ metric: metric, } - cm.m.Store(make(map[TenantID]*metrics.Counter)) - return cm } // Get returns counter for the given at @@ -38,7 +38,7 @@ func (cm *CounterMap) Get(at *auth.Token) *metrics.Counter { AccountID: at.AccountID, ProjectID: at.ProjectID, } - return cm.GetByTenant(key) + return cm.GetByTenant(&key) } // MultiAdd adds multiple values grouped by auth.Token @@ -49,22 +49,25 @@ func (cm *CounterMap) MultiAdd(perTenantValues map[auth.Token]int) { } // GetByTenant returns counter for the given key. -func (cm *CounterMap) GetByTenant(key TenantID) *metrics.Counter { - m := cm.m.Load().(map[TenantID]*metrics.Counter) - if c := m[key]; c != nil { - // Fast path - the counter for k already exists. - return c +func (cm *CounterMap) GetByTenant(key *TenantID) *metrics.Counter { + if key == nil { + mtm := cm.mt.Load() + if mtm == nil { + mtc := metrics.GetOrCreateCounter(createMetricNameMultitenant(cm.metric)) + cm.mt.Store(mtc) + return mtc + } + return mtm.(*metrics.Counter) } - // Slow path - create missing counter for k and re-create m. - newM := make(map[TenantID]*metrics.Counter, len(m)+1) - for k, c := range m { - newM[k] = c + if counter, ok := cm.m.Load(*key); ok { + return counter.(*metrics.Counter) } - metricName := createMetricName(cm.metric, key) + + // Slow path - create missing counter for k. + metricName := createMetricName(cm.metric, *key) c := metrics.GetOrCreateCounter(metricName) - newM[key] = c - cm.m.Store(newM) + cm.m.Store(*key, c) return c } @@ -79,3 +82,15 @@ func createMetricName(metric string, key TenantID) string { // Metric with labels. return fmt.Sprintf(`%s,accountID="%d",projectID="%d"}`, metric[:len(metric)-1], key.AccountID, key.ProjectID) } + +func createMetricNameMultitenant(metric string) string { + if len(metric) == 0 { + logger.Panicf("BUG: metric cannot be empty") + } + if metric[len(metric)-1] != '}' { + // Metric without labels. + return fmt.Sprintf(`%s{accountID="multitenant",projectID="multitenant"}`, metric) + } + // Metric with labels. + return fmt.Sprintf(`%s,accountID="multitenant",projectID="multitenant"}`, metric[:len(metric)-1]) +} diff --git a/lib/tenantmetrics/counter_map_timing_test.go b/lib/tenantmetrics/counter_map_timing_test.go new file mode 100644 index 000000000..51eb9618b --- /dev/null +++ b/lib/tenantmetrics/counter_map_timing_test.go @@ -0,0 +1,46 @@ +package tenantmetrics + +import ( + "runtime" + "sync" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" +) + +func BenchmarkCounterMapGrowth(b *testing.B) { + f := func(b *testing.B, numTenants uint32, nProcs int) { + b.Helper() + + for i := 0; i < b.N; i++ { + cm := NewCounterMap("foobar") + var wg sync.WaitGroup + for range nProcs { + wg.Add(1) + go func() { + for i := range numTenants { + cm.Get(&auth.Token{AccountID: i, ProjectID: i}).Inc() + } + wg.Done() + }() + } + wg.Wait() + } + } + + b.Run("n=100,nProcs=GOMAXPROCS", func(b *testing.B) { + f(b, 100, runtime.GOMAXPROCS(0)) + }) + + b.Run("n=100,nProcs=2", func(b *testing.B) { + f(b, 100, 2) + }) + + b.Run("n=1000,nProcs=2", func(b *testing.B) { + f(b, 1000, 2) + }) + + b.Run("n=10000,nProcs=2", func(b *testing.B) { + f(b, 10000, 2) + }) +}