app/vmselect: add -search.estimatedSeriesCountAfterAggregation command-line flag for tuning the probability of OOMs vs false-positive not enough memory errors

This commit is contained in:
Aliaksandr Valialkin 2020-04-28 12:51:36 +03:00
parent 83aca79137
commit 17d96e4503

View file

@ -17,7 +17,10 @@ import (
) )
var ( var (
maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 30e3, "The maximum points per a single timeseries returned from the search") maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 30e3, "The maximum points per a single timeseries returned from the search")
estimatedSeriesCountAfterAggregation = flag.Int("search.estimatedSeriesCountAfterAggregation", 1000, "Estimated number of series returned by aggregation with grouping "+
"such as `sum(...) by (...)`. Increase this value in order to reduce the probability of OOMs. Reduce this value in order to reduce 'not enough memory' errors "+
"for queries containing aggregation with grouping")
) )
// The minimum number of points per timeseries for enabling time rounding. // The minimum number of points per timeseries for enabling time rounding.
@ -668,8 +671,7 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
if iafc.ae.Modifier.Op != "" { if iafc.ae.Modifier.Op != "" {
// Increase the number of timeseries for non-empty group list: `aggr() by (something)`, // Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
// since each group can have own set of time series in memory. // since each group can have own set of time series in memory.
// Estimate the number of such groups is lower than 1000 :) timeseriesLen *= *estimatedSeriesCountAfterAggregation
timeseriesLen *= 1000
} }
} }
rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(timeseriesLen*len(rcs))) rollupPoints := mulNoOverflow(pointsPerTimeseries, int64(timeseriesLen*len(rcs)))
@ -679,8 +681,8 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
rss.Cancel() rss.Cancel()
return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+ return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+ "possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
"increasing -memory.allowedPercent; increasing `step` query arg (%gs)", "increasing -memory.allowedPercent; increasing `step` query arg (%gs); reducing -search.estimatedSeriesCountAfterAggregation",
rollupPoints, rssLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3) rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, float64(ec.Step)/1e3)
} }
defer rml.Put(uint64(rollupMemorySize)) defer rml.Put(uint64(rollupMemorySize))