From 5a9300be13e621c8bada98d4972e5823362bc743 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sun, 12 Mar 2023 16:53:00 -0700 Subject: [PATCH] app/vmselect: remove data race on updating EvalConfig.IsPartialResponse from concurrently running goroutines This properly returns `is_partial: true` for partial responses. --- app/vmselect/prometheus/prometheus.go | 4 ++-- app/vmselect/promql/eval.go | 12 +++++------- docs/CHANGELOG.md | 1 + 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index b0f66a779b..c4f77473b1 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -881,7 +881,7 @@ func QueryHandler(qt *querytracer.Tracer, startTime time.Time, at *auth.Token, w qtDone := func() { qt.Donef("query=%s, time=%d: series=%d", query, start, len(result)) } - WriteQueryResponse(bw, ec.IsPartialResponse, result, qt, qtDone) + WriteQueryResponse(bw, ec.IsPartialResponse.Load(), result, qt, qtDone) if err := bw.Flush(); err != nil { return fmt.Errorf("cannot flush query response to remote client: %w", err) } @@ -986,7 +986,7 @@ func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, at *auth.Tok qtDone := func() { qt.Donef("start=%d, end=%d, step=%d, query=%q: series=%d", start, end, step, query, len(result)) } - WriteQueryRangeResponse(bw, ec.IsPartialResponse, result, qt, qtDone) + WriteQueryRangeResponse(bw, ec.IsPartialResponse.Load(), result, qt, qtDone) if err := bw.Flush(); err != nil { return fmt.Errorf("cannot send query range response to remote client: %w", err) } diff --git a/app/vmselect/promql/eval.go b/app/vmselect/promql/eval.go index 3ac1073ed4..a45bede8b4 100644 --- a/app/vmselect/promql/eval.go +++ b/app/vmselect/promql/eval.go @@ -129,7 +129,7 @@ type EvalConfig struct { DenyPartialResponse bool // IsPartialResponse is set during query execution and can be used by Exec caller after query execution. - IsPartialResponse bool + IsPartialResponse atomic.Bool timestamps []int64 timestampsOnce sync.Once @@ -150,16 +150,14 @@ func copyEvalConfig(src *EvalConfig) *EvalConfig { ec.RoundDigits = src.RoundDigits ec.EnforcedTagFilterss = src.EnforcedTagFilterss ec.DenyPartialResponse = src.DenyPartialResponse - ec.IsPartialResponse = src.IsPartialResponse + ec.IsPartialResponse.Store(src.IsPartialResponse.Load()) // do not copy src.timestamps - they must be generated again. return &ec } func (ec *EvalConfig) updateIsPartialResponse(isPartialResponse bool) { - if !ec.IsPartialResponse { - ec.IsPartialResponse = isPartialResponse - } + ec.IsPartialResponse.CompareAndSwap(false, isPartialResponse) } func (ec *EvalConfig) validate() { @@ -843,7 +841,7 @@ func evalRollupFuncWithoutAt(qt *querytracer.Tracer, ec *EvalConfig, funcName st if funcName == "absent_over_time" { rvs = aggregateAbsentOverTime(ec, re.Expr, rvs) } - ec.updateIsPartialResponse(ecNew.IsPartialResponse) + ec.updateIsPartialResponse(ecNew.IsPartialResponse.Load()) if offset != 0 && len(rvs) > 0 { // Make a copy of timestamps, since they may be used in other values. srcTimestamps := rvs[0].Timestamps @@ -903,7 +901,7 @@ func evalRollupFuncWithSubquery(qt *querytracer.Tracer, ec *EvalConfig, funcName if err != nil { return nil, err } - ec.updateIsPartialResponse(ecSQ.IsPartialResponse) + ec.updateIsPartialResponse(ecSQ.IsPartialResponse.Load()) if len(tssSQ) == 0 { return nil, nil } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 733921f839..cc99d9ff4a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -21,6 +21,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: vmselect: reduce memory usage and CPU usage when performing heavy queries. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3692). * BUGFIX: prevent from possible `invalid memory address or nil pointer dereference` panic during [background merge](https://docs.victoriametrics.com/#storage). The issue has been introduced at [v1.85.0](https://docs.victoriametrics.com/CHANGELOG.html#v1850). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3897). * BUGFIX: prevent from possible `SIGBUS` crash on ARM architectures (Raspberry Pi), which deny unaligned access to 8-byte words. Thanks to @oliverpool for narrowing down the issue and for [the initial attempt to fix it](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3927). +* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): always return `is_partial: true` in partial responses. Previously partial responses could be returned as non-partial in some cases. * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly take into account `-rpc.disableCompression` command-line flag at `vmstorage`. It was ignored since [v1.78.0](https://docs.victoriametrics.com/CHANGELOG.html#v1780). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3932). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not register `vm_promscrape_config_*` metrics if `-promscrape.config` flag is not used. Previously those metrics were registered and never updated, which was confusing and could trigger false-positive alerts. * BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): skip measurements with no fields when migrating data from influxdb. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3837).