lib/tenantmetrics: improves CounterMap performance with large numbers of tenants

Previously, map for storing tenant metrics was re-created to each newly ingested tenant. It has significant performance impact for systems with large number of tenants.

 This commit addresses this issue by changing algorithm of creating tenant metric records at map. Instead of map re-creation, it uses `sync.Map` primitive.

Benchmark results:

```
goos: linux
goarch: amd64
pkg: github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics
cpu: AMD Ryzen 9 5900X 12-Core Processor
                                            │ lib/tenantmetrics/orig.bench │     lib/tenantmetrics/new.bench     │
                                            │            sec/op            │    sec/op     vs base               │
CounterMapGrowth/n=100,nProcs=GOMAXPROCS-24                  1943.2µ ±  5%   248.0µ ± 11%  -87.24% (p=0.001 n=7)
CounterMapGrowth/n=100-24                                    434.63µ ±  5%   98.82µ ± 16%  -77.26% (p=0.001 n=7)
CounterMapGrowth/n=1000-24                                   32.719m ± 20%   1.425m ±  5%  -95.65% (p=0.001 n=7)
CounterMapGrowth/n=10000-24                                 3653.60m ±  5%   18.00m ±  2%  -99.51% (p=0.001 n=7)
geomean                                                       17.83m         890.4µ        -95.00%
```

Related issue:
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7482

---
Co-authored-by: Artem Fetishev <rtm@victoriametrics.com>
This commit is contained in:
Will Jordan 2024-11-20 09:36:06 -08:00 committed by f41gh7
parent 93c63d77c0
commit a19a4f34ff
No known key found for this signature in database
GPG key ID: 4558311CF775EC72
3 changed files with 80 additions and 18 deletions

View file

@ -40,6 +40,7 @@ Released at 2024-11-15
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Optimize resources usage for configured [downsampling](https://docs.victoriametrics.com/#downsampling) with time-series filter. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7440) for details. * BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Optimize resources usage for configured [downsampling](https://docs.victoriametrics.com/#downsampling) with time-series filter. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7440) for details.
* BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly return query results for search requests after index rotation. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7417) for details. * BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly return query results for search requests after index rotation. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7417) for details.
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly handle [multitenant](https://docs.victoriametrics.com/cluster-victoriametrics/#multitenancy-via-labels) query request errors and correctly perform search for available tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7549) for details. * BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly handle [multitenant](https://docs.victoriametrics.com/cluster-victoriametrics/#multitenancy-via-labels) query request errors and correctly perform search for available tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7549) for details.
* BUGFIX: `vmagent`, `vminsert` and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): fix a performance issue with tenant metrics counters across large numbers of tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7482) for details.
## [v1.102.7](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.7) ## [v1.102.7](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.7)

View file

@ -2,6 +2,7 @@ package tenantmetrics
import ( import (
"fmt" "fmt"
"sync"
"sync/atomic" "sync/atomic"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
@ -19,17 +20,16 @@ type TenantID struct {
type CounterMap struct { type CounterMap struct {
metric string metric string
// do not use atomic.Pointer, since the stored map there is already a pointer type. m sync.Map
m atomic.Value // mt holds value for multi-tenant metrics.
mt atomic.Value
} }
// NewCounterMap creates new CounterMap for the given metric. // NewCounterMap creates new CounterMap for the given metric.
func NewCounterMap(metric string) *CounterMap { func NewCounterMap(metric string) *CounterMap {
cm := &CounterMap{ return &CounterMap{
metric: metric, metric: metric,
} }
cm.m.Store(make(map[TenantID]*metrics.Counter))
return cm
} }
// Get returns counter for the given at // Get returns counter for the given at
@ -38,7 +38,7 @@ func (cm *CounterMap) Get(at *auth.Token) *metrics.Counter {
AccountID: at.AccountID, AccountID: at.AccountID,
ProjectID: at.ProjectID, ProjectID: at.ProjectID,
} }
return cm.GetByTenant(key) return cm.GetByTenant(&key)
} }
// MultiAdd adds multiple values grouped by auth.Token // MultiAdd adds multiple values grouped by auth.Token
@ -49,22 +49,25 @@ func (cm *CounterMap) MultiAdd(perTenantValues map[auth.Token]int) {
} }
// GetByTenant returns counter for the given key. // GetByTenant returns counter for the given key.
func (cm *CounterMap) GetByTenant(key TenantID) *metrics.Counter { func (cm *CounterMap) GetByTenant(key *TenantID) *metrics.Counter {
m := cm.m.Load().(map[TenantID]*metrics.Counter) if key == nil {
if c := m[key]; c != nil { mtm := cm.mt.Load()
// Fast path - the counter for k already exists. if mtm == nil {
return c mtc := metrics.GetOrCreateCounter(createMetricNameMultitenant(cm.metric))
cm.mt.Store(mtc)
return mtc
}
return mtm.(*metrics.Counter)
} }
// Slow path - create missing counter for k and re-create m. if counter, ok := cm.m.Load(*key); ok {
newM := make(map[TenantID]*metrics.Counter, len(m)+1) return counter.(*metrics.Counter)
for k, c := range m {
newM[k] = c
} }
metricName := createMetricName(cm.metric, key)
// Slow path - create missing counter for k.
metricName := createMetricName(cm.metric, *key)
c := metrics.GetOrCreateCounter(metricName) c := metrics.GetOrCreateCounter(metricName)
newM[key] = c cm.m.Store(*key, c)
cm.m.Store(newM)
return c return c
} }
@ -79,3 +82,15 @@ func createMetricName(metric string, key TenantID) string {
// Metric with labels. // Metric with labels.
return fmt.Sprintf(`%s,accountID="%d",projectID="%d"}`, metric[:len(metric)-1], key.AccountID, key.ProjectID) return fmt.Sprintf(`%s,accountID="%d",projectID="%d"}`, metric[:len(metric)-1], key.AccountID, key.ProjectID)
} }
func createMetricNameMultitenant(metric string) string {
if len(metric) == 0 {
logger.Panicf("BUG: metric cannot be empty")
}
if metric[len(metric)-1] != '}' {
// Metric without labels.
return fmt.Sprintf(`%s{accountID="multitenant",projectID="multitenant"}`, metric)
}
// Metric with labels.
return fmt.Sprintf(`%s,accountID="multitenant",projectID="multitenant"}`, metric[:len(metric)-1])
}

View file

@ -0,0 +1,46 @@
package tenantmetrics
import (
"runtime"
"sync"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
)
func BenchmarkCounterMapGrowth(b *testing.B) {
f := func(b *testing.B, numTenants uint32, nProcs int) {
b.Helper()
for i := 0; i < b.N; i++ {
cm := NewCounterMap("foobar")
var wg sync.WaitGroup
for range nProcs {
wg.Add(1)
go func() {
for i := range numTenants {
cm.Get(&auth.Token{AccountID: i, ProjectID: i}).Inc()
}
wg.Done()
}()
}
wg.Wait()
}
}
b.Run("n=100,nProcs=GOMAXPROCS", func(b *testing.B) {
f(b, 100, runtime.GOMAXPROCS(0))
})
b.Run("n=100,nProcs=2", func(b *testing.B) {
f(b, 100, 2)
})
b.Run("n=1000,nProcs=2", func(b *testing.B) {
f(b, 1000, 2)
})
b.Run("n=10000,nProcs=2", func(b *testing.B) {
f(b, 10000, 2)
})
}