From 28f054bb006a139ebd003e087d26aae72b1fa829 Mon Sep 17 00:00:00 2001 From: Zakhar Bessarab Date: Fri, 24 Mar 2023 16:15:13 +0300 Subject: [PATCH 1/4] vmselect/netstorage: remove direct calls to `Gosched` to reduce amount of locks for global scope using `runtime.Gosched` requires acquiring global lock to check if there are any other goroutines to perform tasks. with the latest versions of runtime it can pause running goroutines automatically without requiring to call `Gosched` directly. Updates #3966 Signed-off-by: Zakhar Bessarab --- app/vmselect/netstorage/netstorage.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go index 4fb850379d..a5f5de8e2a 100644 --- a/app/vmselect/netstorage/netstorage.go +++ b/app/vmselect/netstorage/netstorage.go @@ -152,8 +152,6 @@ func timeseriesWorker(qt *querytracer.Tracer, workChs []chan *timeseriesWork, wo idx := (i + workerID) % uint(len(workChs)) ch := workChs[idx] for len(ch) > 0 { - // Give a chance other goroutines to perform their work. - runtime.Gosched() // It is expected that every channel in the workChs is already closed, // so the next line should return immediately. tsw, ok := <-ch From a1e496ced6943801c6944eca340a47be7e21fe6e Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 25 Mar 2023 16:36:45 -0700 Subject: [PATCH 2/4] app/vmselect/netstorage: document why runtime.Gosched() is removed at 28f054bb006a139ebd003e087d26aae72b1fa829 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3966 --- app/vmselect/netstorage/netstorage.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go index a5f5de8e2a..8c941dccc7 100644 --- a/app/vmselect/netstorage/netstorage.go +++ b/app/vmselect/netstorage/netstorage.go @@ -152,6 +152,11 @@ func timeseriesWorker(qt *querytracer.Tracer, workChs []chan *timeseriesWork, wo idx := (i + workerID) % uint(len(workChs)) ch := workChs[idx] for len(ch) > 0 { + // Do not call runtime.Gosched() here in order to give a chance + // the real owner of the work to complete it, since it consumes additional CPU + // and slows down the code on systems with big number of CPU cores. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3966#issuecomment-1483208419 + // It is expected that every channel in the workChs is already closed, // so the next line should return immediately. tsw, ok := <-ch From 5832242b449a58eed089e160044f5d1600487a85 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 25 Mar 2023 16:33:09 -0700 Subject: [PATCH 3/4] app/vmselect/netstorage: reduce the contention at fs.ReaderAt stats collection on systems with big number of CPU cores This optimization is based on the profile provided at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3966#issuecomment-1483208419 --- app/vmselect/netstorage/tmp_blocks_file.go | 3 ++ lib/fs/reader_at.go | 34 ++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/app/vmselect/netstorage/tmp_blocks_file.go b/app/vmselect/netstorage/tmp_blocks_file.go index c7415caf4a..7155f5286a 100644 --- a/app/vmselect/netstorage/tmp_blocks_file.go +++ b/app/vmselect/netstorage/tmp_blocks_file.go @@ -142,6 +142,9 @@ func (tbf *tmpBlocksFile) Finalize() error { // This should reduce the number of disk seeks, which is important // for HDDs. r.MustFadviseSequentialRead(true) + // Collect local stats in order to improve performance on systems with big number of CPU cores. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3966 + r.SetUseLocalStats() tbf.r = r return nil } diff --git a/lib/fs/reader_at.go b/lib/fs/reader_at.go index abc1c46990..8a40a3ffa8 100644 --- a/lib/fs/reader_at.go +++ b/lib/fs/reader_at.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "os" + "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/metrics" @@ -27,8 +28,13 @@ type MustReadAtCloser interface { // ReaderAt implements rand-access reader. type ReaderAt struct { + readCalls uint64 + readBytes uint64 + f *os.File mmapData []byte + + useLocalStats bool } // MustReadAt reads len(p) bytes at off from r. @@ -56,8 +62,13 @@ func (r *ReaderAt) MustReadAt(p []byte, off int64) { // But production workload proved this is OK in most cases, so use it without fear :) copy(p, src) } - readCalls.Inc() - readBytes.Add(len(p)) + if r.useLocalStats { + atomic.AddUint64(&r.readCalls, 1) + atomic.AddUint64(&r.readBytes, uint64(len(p))) + } else { + readCalls.Inc() + readBytes.Add(len(p)) + } } // MustClose closes r. @@ -71,9 +82,28 @@ func (r *ReaderAt) MustClose() { } MustClose(r.f) r.f = nil + + if r.useLocalStats { + readCalls.Add(int(r.readCalls)) + readBytes.Add(int(r.readBytes)) + r.readCalls = 0 + r.readBytes = 0 + r.useLocalStats = false + } readersCount.Dec() } +// SetUseLocalStats switches to local stats collection instead of global stats collection. +// +// This function must be called before the first call to MustReadAt(). +// +// Collecting local stats may improve performance on systems with big number of CPU cores, +// since the locally collected stats is pushed to global stats only at MustClose() call +// instead of pushing it at every MustReadAt call. +func (r *ReaderAt) SetUseLocalStats() { + r.useLocalStats = true +} + // MustFadviseSequentialRead hints the OS that f is read mostly sequentially. // // if prefetch is set, then the OS is hinted to prefetch f data. From 72a0b493303086197d7135d42f2569125a6b81f1 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 25 Mar 2023 22:43:55 -0700 Subject: [PATCH 4/4] docs/CHANGELOG.md: document v1.87.4 LTS release --- docs/CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 2db98b47d1..aeaa67515e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -131,6 +131,19 @@ Released at 2023-02-24 * BUGFIX: properly parse timestamps in milliseconds when [ingesting data via OpenTSDB telnet put protocol](https://docs.victoriametrics.com/#sending-data-via-telnet-put-protocol). Previously timestamps in milliseconds were mistakenly multiplied by 1000. Thanks to @Droxenator for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3810). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): do not add extrapolated points outside the real points when using [interpolate()](https://docs.victoriametrics.com/MetricsQL.html#interpolate) function. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3816). +## [v1.87.4](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.4) + +Released at 2023-03-25 + +**v1.87.x is a line of LTS releases (e.g. long-time support). It contains important up-to-date bugfixes. +The v1.87.x line will be supported for at least 12 months since [v1.87.0](https://docs.victoriametrics.com/CHANGELOG.html#v1870) release** + +* BUGFIX: prevent from slow [snapshot creating](https://docs.victoriametrics.com/#how-to-work-with-snapshots) under high data ingestion rate. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3551). +* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth.html): suppress [proxy protocol](https://www.haproxy.org/download/2.3/doc/proxy-protocol.txt) parsing errors in case of `EOF`. Usually, the error is caused by health checks and is not a sign of an actual error. +* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): fix snapshot not being deleted in case of error during backup. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2055). +* BUGFIX: allow using dashes and dots in environment variables names referred in config files via `%{ENV-VAR.SYNTAX}`. See [these docs](https://docs.victoriametrics.com/#environment-variables) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3999). +* BUGFIX: return back query performance scalability on hosts with big number of CPU cores. The scalability has been reduced in [v1.86.0](https://docs.victoriametrics.com/CHANGELOG.html#v1860). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3966). + ## [v1.87.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.3) Released at 2023-03-12