From ce950d3b06a77192f54937d1207c530f7fc3fd0d Mon Sep 17 00:00:00 2001 From: Nikolay Date: Fri, 3 Mar 2023 12:33:42 +0100 Subject: [PATCH] =?UTF-8?q?lib{mergset,storage}:=20prevent=20possible=20ra?= =?UTF-8?q?ce=20condition=20with=20logging=20st=E2=80=A6=20(#3900)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib{mergset,storage}: prevent possible race condition with logging stats for merges Previously partwrapper could be release by background process and reference for part may be invalid during logging stats. It will lead to panic at vmstorage https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3897 --- docs/CHANGELOG.md | 1 + lib/mergeset/table.go | 17 +++++++++-------- lib/storage/partition.go | 16 +++++++++------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 905c96138..54ed3cd24 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f ## v1.87.x long-time support release (LTS) +* BUGFIX: prevent from possible panic during [background merge process](https://docs.victoriametrics.com/#storage). It may occur in rare case and was introduced at [v1.85.0](https://docs.victoriametrics.com/CHANGELOG.html#v1850) when implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not register `vm_promscrape_config_*` metrics if `-promscrape.config` flag is not used. Previously those metrics were registered and never updated, which was confusing and could trigger false-positive alerts. * BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): skip measurements with no fields when migrating data from influxdb. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3837). * BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth.html): fix `cannot serve http` panic when plain HTTP request is sent to `vmauth` configured to accept requests over [proxy protocol](https://www.haproxy.org/download/2.3/doc/proxy-protocol.txt)-encoded request (e.g. when `vmauth` runs with `-httpListenAddr.useProxyProtocol` command-line flag). The issue has been introduced at [v1.87.0](https://docs.victoriametrics.com/CHANGELOG.html#v1870) when implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3335). diff --git a/lib/mergeset/table.go b/lib/mergeset/table.go index bac671194..486eb4742 100644 --- a/lib/mergeset/table.go +++ b/lib/mergeset/table.go @@ -1164,14 +1164,6 @@ func (tb *Table) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isFinal if err != nil { return fmt.Errorf("cannot atomically register the created part: %w", err) } - tb.swapSrcWithDstParts(pws, pwNew, dstPartType) - - d := time.Since(startTime) - if d <= 30*time.Second { - return nil - } - - // Log stats for long merges. dstItemsCount := uint64(0) dstBlocksCount := uint64(0) dstSize := uint64(0) @@ -1183,6 +1175,15 @@ func (tb *Table) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isFinal dstSize = pDst.size dstPartPath = pDst.path } + + tb.swapSrcWithDstParts(pws, pwNew, dstPartType) + + d := time.Since(startTime) + if d <= 30*time.Second { + return nil + } + + // Log stats for long merges. durationSecs := d.Seconds() itemsPerSec := int(float64(srcItemsCount) / durationSecs) logger.Infof("merged (%d parts, %d items, %d blocks, %d bytes) into (1 part, %d items, %d blocks, %d bytes) in %.3f seconds at %d items/sec to %q", diff --git a/lib/storage/partition.go b/lib/storage/partition.go index ea66d5c99..b54ae1d84 100644 --- a/lib/storage/partition.go +++ b/lib/storage/partition.go @@ -1376,14 +1376,7 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isFi if err != nil { return fmt.Errorf("cannot atomically register the created part: %w", err) } - pt.swapSrcWithDstParts(pws, pwNew, dstPartType) - d := time.Since(startTime) - if d <= 30*time.Second { - return nil - } - - // Log stats for long merges. dstRowsCount := uint64(0) dstBlocksCount := uint64(0) dstSize := uint64(0) @@ -1395,6 +1388,15 @@ func (pt *partition) mergeParts(pws []*partWrapper, stopCh <-chan struct{}, isFi dstSize = pDst.size dstPartPath = pDst.String() } + + pt.swapSrcWithDstParts(pws, pwNew, dstPartType) + + d := time.Since(startTime) + if d <= 30*time.Second { + return nil + } + + // Log stats for long merges. durationSecs := d.Seconds() rowsPerSec := int(float64(srcRowsCount) / durationSecs) logger.Infof("merged (%d parts, %d rows, %d blocks, %d bytes) into (1 part, %d rows, %d blocks, %d bytes) in %.3f seconds at %d rows/sec to %q",