From 295f2aa8ca543e5c1714866f533919b8be485591 Mon Sep 17 00:00:00 2001 From: YuDong Tang <583125614@qq.com> Date: Mon, 26 Aug 2024 20:37:45 +0800 Subject: [PATCH] app/vmselect:add command-line flag -search.inmemoryBufSizeBytes (#6869) add command-line flag `-search.inmemoryBufSizeBytes` for configuring size of in-memory buffers used by vmselect during processing of vmstorage responses. A new summary metric `vm_tmp_blocks_inmemory_file_size_bytes` is exposed to show the size of the buffer during requests processing. The new setting can be used by experienced users to adjust memory usage by vmselect when processing many small read requests. Instead of allocating 4MB buffers each time, vmselect can be instructed to lower the buffer size via `-search.inmemoryBufSizeBytes`. To make the decision whether this flag needs to be adjusted users can consult with `vm_tmp_blocks_inmemory_file_size_bytes` which shows the actual size of buffers used during query processing. ---------- The detailed information of this PR can be found in https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6851 ### Checklist The following checks are **mandatory**: - [ ] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). --------- Co-authored-by: hagen1778 (cherry picked from commit cab3ef8294ec20dc90f6cf90e0b136c90e6d662c) --- app/vmselect/netstorage/tmp_blocks_file.go | 19 ++++++++++++++++--- docs/Cluster-VictoriaMetrics.md | 3 +++ docs/changelog/CHANGELOG.md | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/app/vmselect/netstorage/tmp_blocks_file.go b/app/vmselect/netstorage/tmp_blocks_file.go index f62b8e48e..d97172b19 100644 --- a/app/vmselect/netstorage/tmp_blocks_file.go +++ b/app/vmselect/netstorage/tmp_blocks_file.go @@ -7,6 +7,7 @@ import ( "sync" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/memory" @@ -14,6 +15,10 @@ import ( "github.com/VictoriaMetrics/metrics" ) +var tmpBufSize = flagutil.NewBytes("search.inmemoryBufSizeBytes", 0, "Size for in-memory data blocks used during processing search requests. "+ + "By default, the size is automatically calculated based on available memory. "+ + "Adjust this flag value if you observe that vm_tmp_blocks_max_inmemory_file_size_bytes metric constantly shows much higher values than vm_tmp_blocks_inmemory_file_size_bytes. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6851") + // InitTmpBlocksDir initializes directory to store temporary search results. // // It stores data in system-defined temporary directory if tmpDirPath is empty. @@ -29,6 +34,9 @@ func InitTmpBlocksDir(tmpDirPath string) { var tmpBlocksDir string func maxInmemoryTmpBlocksFile() int { + if tmpBufSize.IntN() > 0 { + return tmpBufSize.IntN() + } mem := memory.Allowed() maxLen := mem / 1024 if maxLen < 64*1024 { @@ -40,9 +48,12 @@ func maxInmemoryTmpBlocksFile() int { return maxLen } -var _ = metrics.NewGauge(`vm_tmp_blocks_max_inmemory_file_size_bytes`, func() float64 { - return float64(maxInmemoryTmpBlocksFile()) -}) +var ( + _ = metrics.NewGauge(`vm_tmp_blocks_max_inmemory_file_size_bytes`, func() float64 { + return float64(maxInmemoryTmpBlocksFile()) + }) + tmpBufSizeSummary = metrics.NewSummary(`vm_tmp_blocks_inmemory_file_size_bytes`) +) type tmpBlocksFile struct { buf []byte @@ -65,6 +76,8 @@ func getTmpBlocksFile() *tmpBlocksFile { func putTmpBlocksFile(tbf *tmpBlocksFile) { tbf.MustClose() + bufLen := tbf.Len() + tmpBufSizeSummary.Update(float64(bufLen)) tbf.buf = tbf.buf[:0] tbf.f = nil tbf.r = nil diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index ea47ef628..e2c60c06b 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -1582,6 +1582,9 @@ Below is the output for `/path/to/vmselect -help`: Whether to fix lookback interval to 'step' query arg value. If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored -search.skipSlowReplicas Whether to skip -replicationFactor - 1 slowest vmstorage nodes during querying. Enabling this setting may improve query speed, but it could also lead to incomplete results if some queried data has less than -replicationFactor copies at vmstorage nodes. Consider enabling this setting only if all the queried data contains -replicationFactor copies in the cluster + -search.inmemoryBufSizeBytes size + Size for in-memory data blocks used during processing search requests. By default, the size is automatically calculated based on available memory. Adjust this flag value if you observe that vm_tmp_blocks_max_inmemory_file_size_bytes metric constantly shows much higher values than vm_tmp_blocks_inmemory_file_size_bytes. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6851 + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0) -search.treatDotsAsIsInRegexps Whether to treat dots as is in regexp label filters used in queries. For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter -selectNode array diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md index e3333b4f4..c237595fd 100644 --- a/docs/changelog/CHANGELOG.md +++ b/docs/changelog/CHANGELOG.md @@ -35,6 +35,7 @@ The value of `instance` label for those scrape targets will be changed from `