mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/{mergeset,storage}: make background merge more responsive and scalable
- Maintain a separate worker pool per each part type (in-memory, file, big and small).
Previously a shared pool was used for merging all the part types.
A single merge worker could merge parts with mixed types at once. For example,
it could merge simultaneously an in-memory part plus a big file part.
Such a merge could take hours for big file part. During the duration of this merge
the in-memory part was pinned in memory and couldn't be persisted to disk
under the configured -inmemoryDataFlushInterval .
Another common issue, which could happen when parts with mixed types are merged,
is uncontrolled growth of in-memory parts or small parts when all the merge workers
were busy with merging big files. Such growth could lead to significant performance
degradataion for queries, since every query needs to check ever growing list of parts.
This could also slow down the registration of new time series, since VictoriaMetrics
searches for the internal series_id in the indexdb for every new time series.
The third issue is graceful shutdown duration, which could be very long when a background
merge is running on in-memory parts plus big file parts. This merge couldn't be interrupted,
since it merges in-memory parts.
A separate pool of merge workers per every part type elegantly resolves both issues:
- In-memory parts are merged to file-based parts in a timely manner, since the maximum
size of in-memory parts is limited.
- Long-running merges for big parts do not block merges for in-memory parts and small parts.
- Graceful shutdown duration is now limited by the time needed for flushing in-memory parts to files.
Merging for file parts is instantly canceled on graceful shutdown now.
- Deprecate -smallMergeConcurrency command-line flag, since the new background merge algorithm
should automatically self-tune according to the number of available CPU cores.
- Deprecate -finalMergeDelay command-line flag, since it wasn't working correctly.
It is better to run forced merge when needed - https://docs.victoriametrics.com/#forced-merge
- Tune the number of shards for pending rows and items before the data goes to in-memory parts
and becomes visible for search. This improves the maximum data ingestion rate and the maximum rate
for registration of new time series. This should reduce the duration of data ingestion slowdown
in VictoriaMetrics cluster on e.g. re-routing events, when some of vmstorage nodes become temporarily
unavailable.
- Prevent from possible "sync: WaitGroup misuse" panic on graceful shutdown.
This is a follow-up for fa566c68a6
.
Thanks @misutoth to for the inspiration at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5212
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5190
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3790
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3551
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3647
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3641
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/648
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/291
This commit is contained in:
parent
97937d58c4
commit
bb7a419cc3
6 changed files with 925 additions and 732 deletions
|
@ -38,13 +38,10 @@ var (
|
||||||
// DataPath is a path to storage data.
|
// DataPath is a path to storage data.
|
||||||
DataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to storage data")
|
DataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to storage data")
|
||||||
|
|
||||||
finalMergeDelay = flag.Duration("finalMergeDelay", 0, "The delay before starting final merge for per-month partition after no new data is ingested into it. "+
|
_ = flag.Duration("finalMergeDelay", 0, "Deprecated: this flag does nothing")
|
||||||
"Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. "+
|
_ = flag.Int("bigMergeConcurrency", 0, "Deprecated: this flag does nothing")
|
||||||
"Zero value disables final merge")
|
_ = flag.Int("smallMergeConcurrency", 0, "Deprecated: this flag does nothing")
|
||||||
_ = flag.Int("bigMergeConcurrency", 0, "Deprecated: this flag does nothing. Please use -smallMergeConcurrency "+
|
|
||||||
"for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage")
|
|
||||||
smallMergeConcurrency = flag.Int("smallMergeConcurrency", 0, "The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . "+
|
|
||||||
"It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries")
|
|
||||||
retentionTimezoneOffset = flag.Duration("retentionTimezoneOffset", 0, "The offset for performing indexdb rotation. "+
|
retentionTimezoneOffset = flag.Duration("retentionTimezoneOffset", 0, "The offset for performing indexdb rotation. "+
|
||||||
"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. "+
|
"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. "+
|
||||||
"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)")
|
"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)")
|
||||||
|
@ -96,8 +93,6 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||||
|
|
||||||
resetResponseCacheIfNeeded = resetCacheIfNeeded
|
resetResponseCacheIfNeeded = resetCacheIfNeeded
|
||||||
storage.SetLogNewSeries(*logNewSeries)
|
storage.SetLogNewSeries(*logNewSeries)
|
||||||
storage.SetFinalMergeDelay(*finalMergeDelay)
|
|
||||||
storage.SetMergeWorkersCount(*smallMergeConcurrency)
|
|
||||||
storage.SetRetentionTimezoneOffset(*retentionTimezoneOffset)
|
storage.SetRetentionTimezoneOffset(*retentionTimezoneOffset)
|
||||||
storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N)
|
storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N)
|
||||||
storage.SetTSIDCacheSize(cacheSizeStorageTSID.IntN())
|
storage.SetTSIDCacheSize(cacheSizeStorageTSID.IntN())
|
||||||
|
@ -496,11 +491,8 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||||
metrics.WriteCounterUint64(w, `vm_composite_filter_success_conversions_total`, idbm.CompositeFilterSuccessConversions)
|
metrics.WriteCounterUint64(w, `vm_composite_filter_success_conversions_total`, idbm.CompositeFilterSuccessConversions)
|
||||||
metrics.WriteCounterUint64(w, `vm_composite_filter_missing_conversions_total`, idbm.CompositeFilterMissingConversions)
|
metrics.WriteCounterUint64(w, `vm_composite_filter_missing_conversions_total`, idbm.CompositeFilterMissingConversions)
|
||||||
|
|
||||||
metrics.WriteCounterUint64(w, `vm_assisted_merges_total{type="storage/inmemory"}`, tm.InmemoryAssistedMergesCount)
|
// vm_assisted_merges_total name is used for backwards compatibility.
|
||||||
metrics.WriteCounterUint64(w, `vm_assisted_merges_total{type="storage/small"}`, tm.SmallAssistedMergesCount)
|
metrics.WriteCounterUint64(w, `vm_assisted_merges_total{type="indexdb/inmemory"}`, idbm.InmemoryPartsLimitReachedCount)
|
||||||
|
|
||||||
metrics.WriteCounterUint64(w, `vm_assisted_merges_total{type="indexdb/inmemory"}`, idbm.InmemoryAssistedMergesCount)
|
|
||||||
metrics.WriteCounterUint64(w, `vm_assisted_merges_total{type="indexdb/file"}`, idbm.FileAssistedMergesCount)
|
|
||||||
|
|
||||||
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_total`, idbm.ItemsAdded)
|
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_total`, idbm.ItemsAdded)
|
||||||
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_size_bytes_total`, idbm.ItemsAddedSizeBytes)
|
metrics.WriteCounterUint64(w, `vm_indexdb_items_added_size_bytes_total`, idbm.ItemsAddedSizeBytes)
|
||||||
|
|
|
@ -31,6 +31,7 @@ The sandbox cluster installation is running under the constant load generated by
|
||||||
* SECURITY: upgrade Go builder from Go1.21.5 to Go1.21.6. See [the list of issues addressed in Go1.21.6](https://github.com/golang/go/issues?q=milestone%3AGo1.21.6+label%3ACherryPickApproved).
|
* SECURITY: upgrade Go builder from Go1.21.5 to Go1.21.6. See [the list of issues addressed in Go1.21.6](https://github.com/golang/go/issues?q=milestone%3AGo1.21.6+label%3ACherryPickApproved).
|
||||||
|
|
||||||
* FEATURE: improve new [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) registration speed on systems with high number of CPU cores. Thanks to @misutoth for the initial idea and [implementation](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5212).
|
* FEATURE: improve new [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) registration speed on systems with high number of CPU cores. Thanks to @misutoth for the initial idea and [implementation](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5212).
|
||||||
|
* FEATURE: make [background merge](https://docs.victoriametrics.com/#storage) more responsive and scalable. This should help the following issues: [5190](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5190), [3425](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425), [648](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/648).
|
||||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for discovering [Hetzner Cloud](https://www.hetzner.com/cloud) and [Hetzner Robot](https://docs.hetzner.com/robot) scrape targets. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3154) and [these docs](https://docs.victoriametrics.com/sd_configs.html#hetzner_sd_configs).
|
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for discovering [Hetzner Cloud](https://www.hetzner.com/cloud) and [Hetzner Robot](https://docs.hetzner.com/robot) scrape targets. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3154) and [these docs](https://docs.victoriametrics.com/sd_configs.html#hetzner_sd_configs).
|
||||||
* FEATURE: [graphite](https://docs.victoriametrics.com/#graphite-render-api-usage): add support for negative index in `groupByNode` and `aliasByNode` functions. Thanks to @rbizos for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5581).
|
* FEATURE: [graphite](https://docs.victoriametrics.com/#graphite-render-api-usage): add support for negative index in `groupByNode` and `aliasByNode` functions. Thanks to @rbizos for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5581).
|
||||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for [DataDog v2 data ingestion protocol](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics). See [these docs](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4451).
|
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for [DataDog v2 data ingestion protocol](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics). See [these docs](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4451).
|
||||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -663,7 +663,8 @@ func (s *Storage) startFreeDiskSpaceWatcher() {
|
||||||
// Use atomic.LoadUint32 in front of atomic.CompareAndSwapUint32 in order to avoid slow inter-CPU synchronization
|
// Use atomic.LoadUint32 in front of atomic.CompareAndSwapUint32 in order to avoid slow inter-CPU synchronization
|
||||||
// when the storage isn't in read-only mode.
|
// when the storage isn't in read-only mode.
|
||||||
if atomic.LoadUint32(&s.isReadOnly) == 1 && atomic.CompareAndSwapUint32(&s.isReadOnly, 1, 0) {
|
if atomic.LoadUint32(&s.isReadOnly) == 1 && atomic.CompareAndSwapUint32(&s.isReadOnly, 1, 0) {
|
||||||
logger.Warnf("enabling writing to the storage at %s, since it has more than -storage.minFreeDiskSpaceBytes=%d of free space: %d bytes left",
|
s.notifyReadWriteMode()
|
||||||
|
logger.Warnf("switching the storage at %s to read-write mode, since it has more than -storage.minFreeDiskSpaceBytes=%d of free space: %d bytes left",
|
||||||
s.path, freeDiskSpaceLimitBytes, freeSpaceBytes)
|
s.path, freeDiskSpaceLimitBytes, freeSpaceBytes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -685,6 +686,16 @@ func (s *Storage) startFreeDiskSpaceWatcher() {
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Storage) notifyReadWriteMode() {
|
||||||
|
s.tb.NotifyReadWriteMode()
|
||||||
|
|
||||||
|
idb := s.idb()
|
||||||
|
idb.tb.NotifyReadWriteMode()
|
||||||
|
idb.doExtDB(func(extDB *indexDB) {
|
||||||
|
extDB.tb.NotifyReadWriteMode()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Storage) startRetentionWatcher() {
|
func (s *Storage) startRetentionWatcher() {
|
||||||
s.retentionWatcherWG.Add(1)
|
s.retentionWatcherWG.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
|
|
|
@ -205,6 +205,14 @@ func (tb *table) flushPendingRows() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (tb *table) NotifyReadWriteMode() {
|
||||||
|
tb.ptwsLock.Lock()
|
||||||
|
for _, ptw := range tb.ptws {
|
||||||
|
ptw.pt.NotifyReadWriteMode()
|
||||||
|
}
|
||||||
|
tb.ptwsLock.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
// TableMetrics contains essential metrics for the table.
|
// TableMetrics contains essential metrics for the table.
|
||||||
type TableMetrics struct {
|
type TableMetrics struct {
|
||||||
partitionMetrics
|
partitionMetrics
|
||||||
|
|
Loading…
Reference in a new issue