From f673039e86611081299ce1ad596af9b4049eb2e8 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@victoriametrics.com> Date: Tue, 16 Jan 2024 15:26:08 +0200 Subject: [PATCH] lib/storage: follow-up for 4b8088e377d1d7c09f9914edd9121962c65e2e84 - Clarify the bugfix description at docs/CHANGELOG.md - Simplify the code by accessing prefetchedMetricIDs struct under the lock instead of using lockless access to immutable struct. This shouldn't worsen code scalability too much on busy systems with many CPU cores, since the code executed under the lock is quite small and fast. This allows removing cloning of prefetchedMetricIDs struct every time new metric names are pre-fetched. This should reduce load on Go GC, since the cloning of uin64set.Set struct allocates many new objects. --- docs/CHANGELOG.md | 2 +- lib/storage/storage.go | 37 +++++++++++++++++-------------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index cf23cdc363..8c03c6bb8d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -48,7 +48,7 @@ The sandbox cluster installation is running under the constant load generated by * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly return full results when `-search.skipSlowReplicas` command-line flag is passed to `vmselect` and when [vmstorage groups](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmstorage-groups-at-vmselect) are in use. Previously partial results could be returned in this case. * BUGFIX: `vminsert`: properly accept samples via [OpenTelemetry data ingestion protocol](https://docs.victoriametrics.com/#sending-data-via-opentelemetry) when these samples have no [resource attributes](https://opentelemetry.io/docs/instrumentation/go/resources/). Previously such samples were silently skipped. * BUGFIX: `vmstorage`: added missing `-inmemoryDataFlushInterval` command-line flag, which was missing in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) after implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337) in [v1.85.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.85.0). -* BUGFIX: `vmstorage`: properly check for `storage/prefetchedMetricIDs` cache expiration deadline. Before, this cache was limited only by size. +* BUGFIX: `vmstorage`: properly expire `storage/prefetchedMetricIDs` cache. Previously this cache was never expired, so it could grow big under [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). This could result in increasing CPU load over time. * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): check `-external.url` schema when starting vmalert, must be `http` or `https`. Before, alertmanager could reject alert notifications if `-external.url` contained no or wrong schema. * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): automatically add `exported_` prefix for original evaluation result label if it's conflicted with external or reserved one, previously it was overridden. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5161). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): consistently sort results for `q1 or q2` query, so they do not change colors with each refresh in Grafana. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5393). diff --git a/lib/storage/storage.go b/lib/storage/storage.go index 96b6e24599..d29524fb10 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -120,14 +120,12 @@ type Storage struct { pendingNextDayMetricIDs *uint64set.Set // prefetchedMetricIDs contains metricIDs for pre-fetched metricNames in the prefetchMetricNames function. - prefetchedMetricIDs atomic.Pointer[uint64set.Set] + prefetchedMetricIDsLock sync.Mutex + prefetchedMetricIDs *uint64set.Set // prefetchedMetricIDsDeadline is used for periodic reset of prefetchedMetricIDs in order to limit its size under high rate of creating new series. prefetchedMetricIDsDeadline uint64 - // prefetchedMetricIDsLock is used for serializing updates of prefetchedMetricIDs from concurrent goroutines. - prefetchedMetricIDsLock sync.Mutex - stop chan struct{} currHourMetricIDsUpdaterWG sync.WaitGroup @@ -231,7 +229,7 @@ func MustOpenStorage(path string, retention time.Duration, maxHourlySeries, maxD s.pendingNextDayMetricIDs = &uint64set.Set{} - s.prefetchedMetricIDs.Store(&uint64set.Set{}) + s.prefetchedMetricIDs = &uint64set.Set{} // Load metadata metadataDir := filepath.Join(path, metadataDirname) @@ -628,9 +626,11 @@ func (s *Storage) UpdateMetrics(m *Metrics) { m.NextDayMetricIDCacheSize += uint64(nextDayMetricIDs.Len()) m.NextDayMetricIDCacheSizeBytes += nextDayMetricIDs.SizeBytes() - prefetchedMetricIDs := s.prefetchedMetricIDs.Load() + s.prefetchedMetricIDsLock.Lock() + prefetchedMetricIDs := s.prefetchedMetricIDs m.PrefetchedMetricIDsSize += uint64(prefetchedMetricIDs.Len()) m.PrefetchedMetricIDsSizeBytes += uint64(prefetchedMetricIDs.SizeBytes()) + s.prefetchedMetricIDsLock.Unlock() d := s.nextRetentionSeconds() if d < 0 { @@ -1212,14 +1212,18 @@ func (s *Storage) prefetchMetricNames(qt *querytracer.Tracer, accountID, project qt.Printf("nothing to prefetch") return nil } + var metricIDs uint64Sorter - prefetchedMetricIDs := s.prefetchedMetricIDs.Load() + s.prefetchedMetricIDsLock.Lock() + prefetchedMetricIDs := s.prefetchedMetricIDs for _, metricID := range srcMetricIDs { if prefetchedMetricIDs.Has(metricID) { continue } metricIDs = append(metricIDs, metricID) } + s.prefetchedMetricIDsLock.Unlock() + qt.Printf("%d out of %d metric names must be pre-fetched", len(metricIDs), len(srcMetricIDs)) if len(metricIDs) < 500 { // It is cheaper to skip pre-fetching and obtain metricNames inline. @@ -1268,24 +1272,17 @@ func (s *Storage) prefetchMetricNames(qt *querytracer.Tracer, accountID, project // Store the pre-fetched metricIDs, so they aren't pre-fetched next time. s.prefetchedMetricIDsLock.Lock() - var prefetchedMetricIDsNew *uint64set.Set if fasttime.UnixTimestamp() > atomic.LoadUint64(&s.prefetchedMetricIDsDeadline) { // Periodically reset the prefetchedMetricIDs in order to limit its size. - prefetchedMetricIDsNew = &uint64set.Set{} - deadlineSec := 73 * 60 - jitterSec := fastrand.Uint32n(uint32(deadlineSec / 10)) - metricIDsDeadline := fasttime.UnixTimestamp() + uint64(deadlineSec) + uint64(jitterSec) + s.prefetchedMetricIDs = &uint64set.Set{} + const deadlineSec = 20 * 60 + jitterSec := fastrand.Uint32n(deadlineSec / 10) + metricIDsDeadline := fasttime.UnixTimestamp() + deadlineSec + uint64(jitterSec) atomic.StoreUint64(&s.prefetchedMetricIDsDeadline, metricIDsDeadline) - } else { - prefetchedMetricIDsNew = prefetchedMetricIDs.Clone() } - prefetchedMetricIDsNew.AddMulti(metricIDs) - if prefetchedMetricIDsNew.SizeBytes() > uint64(memory.Allowed())/32 { - // Reset prefetchedMetricIDsNew if it occupies too much space. - prefetchedMetricIDsNew = &uint64set.Set{} - } - s.prefetchedMetricIDs.Store(prefetchedMetricIDsNew) + s.prefetchedMetricIDs.AddMulti(metricIDs) s.prefetchedMetricIDsLock.Unlock() + qt.Printf("cache metric ids for pre-fetched metric names") return nil }