lib/storage: reduce the contention on dateMetricIDCache mutex when new time series are registered at high rate

The dateMetricIDCache puts recently registered (date, metricID) entries into mutable cache protected by the mutex.
The dateMetricIDCache.Has() checks for the entry in the mutable cache when it isn't found in the immutable cache.
Access to the mutable cache is protected by the mutex. This means this access is slow on systems with many CPU cores.
The mutabe cache was merged into immutable cache every 10 seconds in order to avoid slow access to mutable cache.
This means that ingestion of new time series to VictoriaMetrics could result in significant slowdown for up to 10 seconds
because of bottleneck at the mutex.

Fix this by merging the mutable cache into immutable cache after len(cacheItems) / 2
cache hits under the mutex, e.g. when the entry is found in the mutable cache.
This should automatically adjust intervals between merges depending on the addition rate
for new time series (aka churn rate):

- The interval will be much smaller than 10 seconds under high churn rate.
  This should reduce the mutex contention for mutable cache.
- The interval will be bigger than 10 seconds under low churn rate.
  This should reduce the uneeded work on merging of mutable cache into immutable cache.
This commit is contained in:
Aliaksandr Valialkin 2024-01-22 15:47:07 +02:00
parent 8b8d0e3677
commit 9b4294e53e
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB

View file

@ -2207,7 +2207,12 @@ type dateMetricIDCache struct {
// Contains mutable map protected by mu
byDateMutable *byDateMetricIDMap
nextSyncDeadline uint64
// Contains the number of slow accesses to byDateMutable.
// Is used for deciding when to merge byDateMutable to byDate.
// Protected by mu.
slowHits int
mu sync.Mutex
}
@ -2221,7 +2226,7 @@ func (dmc *dateMetricIDCache) resetLocked() {
// Do not reset syncsCount and resetsCount
dmc.byDate.Store(newByDateMetricIDMap())
dmc.byDateMutable = newByDateMetricIDMap()
dmc.nextSyncDeadline = 10 + fasttime.UnixTimestamp()
dmc.slowHits = 0
atomic.AddUint64(&dmc.resetsCount, 1)
}
@ -2255,9 +2260,16 @@ func (dmc *dateMetricIDCache) Has(generation, date, metricID uint64) bool {
// Slow path. Check mutable map.
dmc.mu.Lock()
v = dmc.byDateMutable.get(generation, date)
ok := v.Has(metricID)
dmc.syncLockedIfNeeded()
vMutable := dmc.byDateMutable.get(generation, date)
ok := vMutable.Has(metricID)
if ok {
dmc.slowHits++
if dmc.slowHits > (v.Len()+vMutable.Len())/2 {
// It is cheaper to merge byDateMutable into byDate than to pay inter-cpu sync costs when accessing vMutable.
dmc.syncLocked()
dmc.slowHits = 0
}
}
dmc.mu.Unlock()
return ok
@ -2298,14 +2310,6 @@ func (dmc *dateMetricIDCache) Set(generation, date, metricID uint64) {
dmc.mu.Unlock()
}
func (dmc *dateMetricIDCache) syncLockedIfNeeded() {
currentTime := fasttime.UnixTimestamp()
if currentTime >= dmc.nextSyncDeadline {
dmc.nextSyncDeadline = currentTime + 10
dmc.syncLocked()
}
}
func (dmc *dateMetricIDCache) syncLocked() {
if len(dmc.byDateMutable.m) == 0 {
// Nothing to sync.