mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-11 15:34:56 +00:00
app/vmselect: fix the way of counting raw samples in single query (#6464)
The limit is specified with command-line flag
`-search.maxSamplesPerQuery`.
Previously, samples might be over-counted and query can't be fixed by
reducing time range.
address https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5851
(cherry picked from commit 6e395048d3
)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
8ca1813bd2
commit
5ecf439078
2 changed files with 27 additions and 9 deletions
|
@ -116,6 +116,13 @@ type timeseriesWork struct {
|
|||
err error
|
||||
|
||||
rowsProcessed int
|
||||
|
||||
querySamplesQuota *querySamplesQuota
|
||||
}
|
||||
|
||||
type querySamplesQuota struct {
|
||||
mu sync.Mutex
|
||||
samplesQuota int
|
||||
}
|
||||
|
||||
func (tsw *timeseriesWork) do(r *Result, workerID uint) error {
|
||||
|
@ -132,6 +139,19 @@ func (tsw *timeseriesWork) do(r *Result, workerID uint) error {
|
|||
return fmt.Errorf("error during time series unpacking: %w", err)
|
||||
}
|
||||
tsw.rowsProcessed = len(r.Timestamps)
|
||||
|
||||
tsw.querySamplesQuota.mu.Lock()
|
||||
tsw.querySamplesQuota.samplesQuota -= tsw.rowsProcessed
|
||||
if tsw.querySamplesQuota.samplesQuota < 0 {
|
||||
tsw.mustStop.Store(true)
|
||||
tsw.querySamplesQuota.mu.Unlock()
|
||||
return &limitExceededErr{
|
||||
err: fmt.Errorf("cannot select more than -search.maxSamplesPerQuery=%d samples; possible solutions: increase the -search.maxSamplesPerQuery; "+
|
||||
"reduce time range for the query; use more specific label filters in order to select fewer series", *maxSamplesPerQuery),
|
||||
}
|
||||
}
|
||||
tsw.querySamplesQuota.mu.Unlock()
|
||||
|
||||
if len(r.Timestamps) > 0 {
|
||||
if err := tsw.f(r, workerID); err != nil {
|
||||
tsw.mustStop.Store(true)
|
||||
|
@ -267,11 +287,16 @@ func (rss *Results) runParallel(qt *querytracer.Tracer, f func(rs *Result, worke
|
|||
}
|
||||
|
||||
var mustStop atomic.Bool
|
||||
limit := *maxSamplesPerQuery
|
||||
sampleQuota := &querySamplesQuota{
|
||||
samplesQuota: limit,
|
||||
}
|
||||
initTimeseriesWork := func(tsw *timeseriesWork, pts *packedTimeseries) {
|
||||
tsw.rss = rss
|
||||
tsw.pts = pts
|
||||
tsw.f = f
|
||||
tsw.mustStop = &mustStop
|
||||
tsw.querySamplesQuota = sampleQuota
|
||||
}
|
||||
maxWorkers := MaxWorkers()
|
||||
if maxWorkers == 1 || tswsLen == 1 {
|
||||
|
@ -1678,17 +1703,9 @@ func ProcessSearchQuery(qt *querytracer.Tracer, denyPartialResponse bool, sq *st
|
|||
tbfw := newTmpBlocksFileWrapper(sns)
|
||||
blocksRead := newPerNodeCounter(sns)
|
||||
samples := newPerNodeCounter(sns)
|
||||
maxSamplesPerWorker := uint64(*maxSamplesPerQuery) / uint64(len(sns))
|
||||
processBlock := func(mb *storage.MetricBlock, workerID uint) error {
|
||||
blocksRead.Add(workerID, 1)
|
||||
n := samples.Add(workerID, uint64(mb.Block.RowsCount()))
|
||||
if *maxSamplesPerQuery > 0 && n > maxSamplesPerWorker && samples.GetTotal() > uint64(*maxSamplesPerQuery) {
|
||||
return &limitExceededErr{
|
||||
err: fmt.Errorf("cannot select more than -search.maxSamplesPerQuery=%d samples; possible solutions: "+
|
||||
"increase the -search.maxSamplesPerQuery; reduce time range for the query; "+
|
||||
"use more specific label filters in order to select fewer series", *maxSamplesPerQuery),
|
||||
}
|
||||
}
|
||||
samples.Add(workerID, uint64(mb.Block.RowsCount()))
|
||||
if err := tbfw.RegisterAndWriteBlock(mb, workerID); err != nil {
|
||||
return fmt.Errorf("cannot write MetricBlock to temporary blocks file: %w", err)
|
||||
}
|
||||
|
|
|
@ -46,6 +46,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
|||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert/) enterprise: properly configure authentication with S3 when `-s3.configFilePath` cmd-line flag is specified for reading rule configs.
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert/): properly specify oauth2 `ClientSecret` when configuring authentication for `notifier.url`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6471) for details. Thanks to @yincongcyincong for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6478).
|
||||
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): add validation for the max value specified for `-retentionPeriod`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6330) for details.
|
||||
* BUGFIX: [vmselect](https://docs.victoriametrics.com/cluster-victoriametrics/): calculate the exact number of [raw samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) during query processing, the limit is specified via command-line flag `-search.maxSamplesPerQuery`. Previously, due to historical merged data, samples could have been over-counted, leading to false-positive errors of maxSamplesPerQuery exceeded. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5851).
|
||||
|
||||
## [v1.102.0-rc1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.0-rc1)
|
||||
|
||||
|
|
Loading…
Reference in a new issue