From 9b4294e53e548d8732c55ac7f605bc4970e46673 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 22 Jan 2024 15:47:07 +0200 Subject: [PATCH] lib/storage: reduce the contention on dateMetricIDCache mutex when new time series are registered at high rate The dateMetricIDCache puts recently registered (date, metricID) entries into mutable cache protected by the mutex. The dateMetricIDCache.Has() checks for the entry in the mutable cache when it isn't found in the immutable cache. Access to the mutable cache is protected by the mutex. This means this access is slow on systems with many CPU cores. The mutabe cache was merged into immutable cache every 10 seconds in order to avoid slow access to mutable cache. This means that ingestion of new time series to VictoriaMetrics could result in significant slowdown for up to 10 seconds because of bottleneck at the mutex. Fix this by merging the mutable cache into immutable cache after len(cacheItems) / 2 cache hits under the mutex, e.g. when the entry is found in the mutable cache. This should automatically adjust intervals between merges depending on the addition rate for new time series (aka churn rate): - The interval will be much smaller than 10 seconds under high churn rate. This should reduce the mutex contention for mutable cache. - The interval will be bigger than 10 seconds under low churn rate. This should reduce the uneeded work on merging of mutable cache into immutable cache. --- lib/storage/storage.go | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/lib/storage/storage.go b/lib/storage/storage.go index b2a654064..9f0417da0 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -2206,9 +2206,14 @@ type dateMetricIDCache struct { byDate atomic.Pointer[byDateMetricIDMap] // Contains mutable map protected by mu - byDateMutable *byDateMetricIDMap - nextSyncDeadline uint64 - mu sync.Mutex + byDateMutable *byDateMetricIDMap + + // Contains the number of slow accesses to byDateMutable. + // Is used for deciding when to merge byDateMutable to byDate. + // Protected by mu. + slowHits int + + mu sync.Mutex } func newDateMetricIDCache() *dateMetricIDCache { @@ -2221,7 +2226,7 @@ func (dmc *dateMetricIDCache) resetLocked() { // Do not reset syncsCount and resetsCount dmc.byDate.Store(newByDateMetricIDMap()) dmc.byDateMutable = newByDateMetricIDMap() - dmc.nextSyncDeadline = 10 + fasttime.UnixTimestamp() + dmc.slowHits = 0 atomic.AddUint64(&dmc.resetsCount, 1) } @@ -2255,9 +2260,16 @@ func (dmc *dateMetricIDCache) Has(generation, date, metricID uint64) bool { // Slow path. Check mutable map. dmc.mu.Lock() - v = dmc.byDateMutable.get(generation, date) - ok := v.Has(metricID) - dmc.syncLockedIfNeeded() + vMutable := dmc.byDateMutable.get(generation, date) + ok := vMutable.Has(metricID) + if ok { + dmc.slowHits++ + if dmc.slowHits > (v.Len()+vMutable.Len())/2 { + // It is cheaper to merge byDateMutable into byDate than to pay inter-cpu sync costs when accessing vMutable. + dmc.syncLocked() + dmc.slowHits = 0 + } + } dmc.mu.Unlock() return ok @@ -2298,14 +2310,6 @@ func (dmc *dateMetricIDCache) Set(generation, date, metricID uint64) { dmc.mu.Unlock() } -func (dmc *dateMetricIDCache) syncLockedIfNeeded() { - currentTime := fasttime.UnixTimestamp() - if currentTime >= dmc.nextSyncDeadline { - dmc.nextSyncDeadline = currentTime + 10 - dmc.syncLocked() - } -} - func (dmc *dateMetricIDCache) syncLocked() { if len(dmc.byDateMutable.m) == 0 { // Nothing to sync.