app/vmstorage: deprecate -bigMergeConcurrency command-line flag

Improperly configured -bigMergeConcurrency command-line flag usually leads to uncontrolled
growth of unmerged parts, which, in turn, increases CPU usage and query durations.

So it is better deprecating this flag. In rare cases -smallMergeConcurrency command-line flag
can be used instead for controlling the concurrency of background merges.
This commit is contained in:
Aliaksandr Valialkin 2023-04-13 20:33:33 -07:00
parent e73dd1df2d
commit cf53ce83a0
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
7 changed files with 15 additions and 32 deletions

View file

@ -1231,7 +1231,7 @@ Below is the output for `/path/to/vmstorage -help`:
``` ```
-bigMergeConcurrency int -bigMergeConcurrency int
The maximum number of CPU cores to use for big merges. Default value is used if set to 0 Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
-cacheExpireDuration duration -cacheExpireDuration duration
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s) Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
-cluster.tls -cluster.tls
@ -1366,7 +1366,7 @@ Below is the output for `/path/to/vmstorage -help`:
-search.maxUniqueTimeseries int -search.maxUniqueTimeseries int
The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. Zero means 'no limit'. See also -search.max* command-line flags at vmselect The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. Zero means 'no limit'. See also -search.max* command-line flags at vmselect
-smallMergeConcurrency int -smallMergeConcurrency int
The maximum number of CPU cores to use for small merges. Default value is used if set to 0 The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
-snapshotAuthKey string -snapshotAuthKey string
authKey, which must be passed in query string to /snapshot* pages authKey, which must be passed in query string to /snapshot* pages
-snapshotCreateTimeout duration -snapshotCreateTimeout duration

View file

@ -43,8 +43,10 @@ var (
finalMergeDelay = flag.Duration("finalMergeDelay", 0, "The delay before starting final merge for per-month partition after no new data is ingested into it. "+ finalMergeDelay = flag.Duration("finalMergeDelay", 0, "The delay before starting final merge for per-month partition after no new data is ingested into it. "+
"Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. "+ "Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. "+
"Zero value disables final merge") "Zero value disables final merge")
bigMergeConcurrency = flag.Int("bigMergeConcurrency", 0, "The maximum number of CPU cores to use for big merges. Default value is used if set to 0") _ = flag.Int("bigMergeConcurrency", 0, "Deprecated: this flag does nothing. Please use -smallMergeConcurrency "+
smallMergeConcurrency = flag.Int("smallMergeConcurrency", 0, "The maximum number of CPU cores to use for small merges. Default value is used if set to 0") "for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage")
smallMergeConcurrency = flag.Int("smallMergeConcurrency", 0, "The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . "+
"It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries")
retentionTimezoneOffset = flag.Duration("retentionTimezoneOffset", 0, "The offset for performing indexdb rotation. "+ retentionTimezoneOffset = flag.Duration("retentionTimezoneOffset", 0, "The offset for performing indexdb rotation. "+
"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. "+ "If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. "+
"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)") "If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)")
@ -84,7 +86,6 @@ func main() {
storage.SetDedupInterval(*minScrapeInterval) storage.SetDedupInterval(*minScrapeInterval)
storage.SetLogNewSeries(*logNewSeries) storage.SetLogNewSeries(*logNewSeries)
storage.SetFinalMergeDelay(*finalMergeDelay) storage.SetFinalMergeDelay(*finalMergeDelay)
storage.SetBigMergeWorkersCount(*bigMergeConcurrency)
storage.SetMergeWorkersCount(*smallMergeConcurrency) storage.SetMergeWorkersCount(*smallMergeConcurrency)
storage.SetRetentionTimezoneOffset(*retentionTimezoneOffset) storage.SetRetentionTimezoneOffset(*retentionTimezoneOffset)
storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N) storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N)

View file

@ -17,6 +17,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): store backup creation and completion time in `backup_complete.ignore` file of backup contents. This is useful to determine point in time when backup was created and completed. * FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): store backup creation and completion time in `backup_complete.ignore` file of backup contents. This is useful to determine point in time when backup was created and completed.
* FEATURE: [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html): add `created_at` field to the output of `/api/v1/backups` API and `vmbackupmanager backup list` command. See this [doc](https://docs.victoriametrics.com/vmbackupmanager.html#api-methods) for data format details. * FEATURE: [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html): add `created_at` field to the output of `/api/v1/backups` API and `vmbackupmanager backup list` command. See this [doc](https://docs.victoriametrics.com/vmbackupmanager.html#api-methods) for data format details.
* FEATURE: deprecate `-bigMergeConcurrency` command-line flag, since improper configuration for this flag frequently led to uncontrolled growth of unmerged parts, which, in turn, could lead to queries slowdown and increased CPU usage. The concurrency for [background merges](https://docs.victoriametrics.com/#storage) can be controlled via `-smallMergeConcurrency` command-line flag, though it isn't recommended to do in general case.
## [v1.90.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.90.0) ## [v1.90.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.90.0)

View file

@ -1235,7 +1235,7 @@ Below is the output for `/path/to/vmstorage -help`:
``` ```
-bigMergeConcurrency int -bigMergeConcurrency int
The maximum number of CPU cores to use for big merges. Default value is used if set to 0 Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
-cacheExpireDuration duration -cacheExpireDuration duration
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s) Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
-cluster.tls -cluster.tls
@ -1370,7 +1370,7 @@ Below is the output for `/path/to/vmstorage -help`:
-search.maxUniqueTimeseries int -search.maxUniqueTimeseries int
The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. Zero means 'no limit'. See also -search.max* command-line flags at vmselect The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. Zero means 'no limit'. See also -search.max* command-line flags at vmselect
-smallMergeConcurrency int -smallMergeConcurrency int
The maximum number of CPU cores to use for small merges. Default value is used if set to 0 The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
-snapshotAuthKey string -snapshotAuthKey string
authKey, which must be passed in query string to /snapshot* pages authKey, which must be passed in query string to /snapshot* pages
-snapshotCreateTimeout duration -snapshotCreateTimeout duration

View file

@ -2177,7 +2177,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
``` ```
-bigMergeConcurrency int -bigMergeConcurrency int
The maximum number of CPU cores to use for big merges. Default value is used if set to 0 Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
-cacheExpireDuration duration -cacheExpireDuration duration
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s) Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
-configAuthKey string -configAuthKey string
@ -2527,7 +2527,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-selfScrapeJob string -selfScrapeJob string
Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics") Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics")
-smallMergeConcurrency int -smallMergeConcurrency int
The maximum number of CPU cores to use for small merges. Default value is used if set to 0 The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
-snapshotAuthKey string -snapshotAuthKey string
authKey, which must be passed in query string to /snapshot* pages authKey, which must be passed in query string to /snapshot* pages
-snapshotCreateTimeout duration -snapshotCreateTimeout duration

View file

@ -2180,7 +2180,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
``` ```
-bigMergeConcurrency int -bigMergeConcurrency int
The maximum number of CPU cores to use for big merges. Default value is used if set to 0 Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
-cacheExpireDuration duration -cacheExpireDuration duration
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s) Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
-configAuthKey string -configAuthKey string
@ -2530,7 +2530,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-selfScrapeJob string -selfScrapeJob string
Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics") Value for 'job' label, which is added to self-scraped metrics (default "victoria-metrics")
-smallMergeConcurrency int -smallMergeConcurrency int
The maximum number of CPU cores to use for small merges. Default value is used if set to 0 The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries
-snapshotAuthKey string -snapshotAuthKey string
authKey, which must be passed in query string to /snapshot* pages authKey, which must be passed in query string to /snapshot* pages
-snapshotCreateTimeout duration -snapshotCreateTimeout duration

View file

@ -1002,8 +1002,6 @@ func hasActiveMerges(pws []*partWrapper) bool {
var mergeWorkersLimitCh = make(chan struct{}, adjustMergeWorkersLimit(getDefaultMergeConcurrency(16))) var mergeWorkersLimitCh = make(chan struct{}, adjustMergeWorkersLimit(getDefaultMergeConcurrency(16)))
var bigMergeWorkersLimitCh = make(chan struct{}, getDefaultMergeConcurrency(4))
func getDefaultMergeConcurrency(max int) int { func getDefaultMergeConcurrency(max int) int {
v := (cgroup.AvailableCPUs() + 1) / 2 v := (cgroup.AvailableCPUs() + 1) / 2
if v > max { if v > max {
@ -1012,17 +1010,6 @@ func getDefaultMergeConcurrency(max int) int {
return v return v
} }
// SetBigMergeWorkersCount sets the maximum number of concurrent mergers for big blocks.
//
// The function must be called before opening or creating any storage.
func SetBigMergeWorkersCount(n int) {
if n <= 0 {
// Do nothing
return
}
bigMergeWorkersLimitCh = make(chan struct{}, n)
}
// SetMergeWorkersCount sets the maximum number of concurrent mergers for parts. // SetMergeWorkersCount sets the maximum number of concurrent mergers for parts.
// //
// The function must be called before opening or creating any storage. // The function must be called before opening or creating any storage.
@ -1143,7 +1130,8 @@ func (pt *partition) getMaxSmallPartSize() uint64 {
} }
func (pt *partition) getMaxBigPartSize() uint64 { func (pt *partition) getMaxBigPartSize() uint64 {
return getMaxOutBytes(pt.bigPartsPath, cap(bigMergeWorkersLimitCh)) workersCount := getDefaultMergeConcurrency(4)
return getMaxOutBytes(pt.bigPartsPath, workersCount)
} }
func getMaxOutBytes(path string, workersCount int) uint64 { func getMaxOutBytes(path string, workersCount int) uint64 {
@ -1277,13 +1265,6 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isFi
mergeIdx := pt.nextMergeIdx() mergeIdx := pt.nextMergeIdx()
dstPartPath := pt.getDstPartPath(dstPartType, mergeIdx) dstPartPath := pt.getDstPartPath(dstPartType, mergeIdx)
if dstPartType == partBig {
bigMergeWorkersLimitCh <- struct{}{}
defer func() {
<-bigMergeWorkersLimitCh
}()
}
if !isDedupEnabled() && isFinal && len(pws) == 1 && pws[0].mp != nil { if !isDedupEnabled() && isFinal && len(pws) == 1 && pws[0].mp != nil {
// Fast path: flush a single in-memory part to disk. // Fast path: flush a single in-memory part to disk.
mp := pws[0].mp mp := pws[0].mp