mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
app/vmselect/promql: add hoeffding_bound_upper(phi, m[d])
and hoeffding_bound_lower(phi, m[d])
functions
These functions can be used for calculating Hoeffding bounds for `m` over `d` time range and for the given `phi` in the range `[0..1]`. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/283
This commit is contained in:
parent
8eaced8cae
commit
8b14572f70
6 changed files with 205 additions and 63 deletions
|
@ -605,6 +605,14 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
|
||||||
rollupResultCacheMiss.Inc()
|
rollupResultCacheMiss.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Obtain rollup configs before fetching data from db,
|
||||||
|
// so type errors can be caught earlier.
|
||||||
|
sharedTimestamps := getTimestamps(start, ec.End, ec.Step)
|
||||||
|
preFunc, rcs, err := getRollupConfigs(name, rf, expr, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
// Fetch the remaining part of the result.
|
// Fetch the remaining part of the result.
|
||||||
tfs := toTagFilters(me.LabelFilters)
|
tfs := toTagFilters(me.LabelFilters)
|
||||||
sq := &storage.SearchQuery{
|
sq := &storage.SearchQuery{
|
||||||
|
@ -629,12 +637,6 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc,
|
||||||
tss = mergeTimeseries(tssCached, tss, start, ec)
|
tss = mergeTimeseries(tssCached, tss, start, ec)
|
||||||
return tss, nil
|
return tss, nil
|
||||||
}
|
}
|
||||||
sharedTimestamps := getTimestamps(start, ec.End, ec.Step)
|
|
||||||
preFunc, rcs, err := getRollupConfigs(name, rf, expr, start, ec.End, ec.Step, window, ec.LookbackDelta, sharedTimestamps)
|
|
||||||
if err != nil {
|
|
||||||
rss.Cancel()
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify timeseries fit available memory after the rollup.
|
// Verify timeseries fit available memory after the rollup.
|
||||||
// Take into account points from tssCached.
|
// Take into account points from tssCached.
|
||||||
|
|
|
@ -4480,6 +4480,29 @@ func TestExecSuccess(t *testing.T) {
|
||||||
resultExpected := []netstorage.Result{r}
|
resultExpected := []netstorage.Result{r}
|
||||||
f(q, resultExpected)
|
f(q, resultExpected)
|
||||||
})
|
})
|
||||||
|
t.Run(`hoeffding_bound_lower()`, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
q := `hoeffding_bound_lower(0.9, rand(0)[:10s])`
|
||||||
|
r := netstorage.Result{
|
||||||
|
MetricName: metricNameExpected,
|
||||||
|
Values: []float64{0.2516770508510652, 0.2830570387745462, 0.27716232108436645, 0.3679356319931767, 0.3168460474120903, 0.23156726248243734},
|
||||||
|
Timestamps: timestampsExpected,
|
||||||
|
}
|
||||||
|
resultExpected := []netstorage.Result{r}
|
||||||
|
f(q, resultExpected)
|
||||||
|
})
|
||||||
|
t.Run(`hoeffding_bound_upper()`, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
q := `hoeffding_bound_upper(0.9, alias(rand(0), "foobar")[:10s])`
|
||||||
|
r := netstorage.Result{
|
||||||
|
MetricName: metricNameExpected,
|
||||||
|
Values: []float64{0.6510581320042821, 0.7261021731890429, 0.7245290097397009, 0.8113950442584258, 0.7736122275568004, 0.6658564048254882},
|
||||||
|
Timestamps: timestampsExpected,
|
||||||
|
}
|
||||||
|
r.MetricName.MetricGroup = []byte("foobar")
|
||||||
|
resultExpected := []netstorage.Result{r}
|
||||||
|
f(q, resultExpected)
|
||||||
|
})
|
||||||
t.Run(`aggr_over_time(single-func)`, func(t *testing.T) {
|
t.Run(`aggr_over_time(single-func)`, func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
q := `aggr_over_time("increase", rand(0)[:10s])`
|
q := `aggr_over_time("increase", rand(0)[:10s])`
|
||||||
|
@ -5267,6 +5290,12 @@ func TestExecError(t *testing.T) {
|
||||||
f(`aggr_over_time()`)
|
f(`aggr_over_time()`)
|
||||||
f(`aggr_over_time(foo)`)
|
f(`aggr_over_time(foo)`)
|
||||||
f(`aggr_over_time("foo", bar, 1)`)
|
f(`aggr_over_time("foo", bar, 1)`)
|
||||||
|
f(`hoeffding_bound_lower()`)
|
||||||
|
f(`hoeffding_bound_lower(1)`)
|
||||||
|
f(`hoeffding_bound_lower(0.99, foo, 1)`)
|
||||||
|
f(`hoeffding_bound_upper()`)
|
||||||
|
f(`hoeffding_bound_upper(1)`)
|
||||||
|
f(`hoeffding_bound_upper(0.99, foo, 1)`)
|
||||||
|
|
||||||
// Invalid argument type
|
// Invalid argument type
|
||||||
f(`median_over_time({}, 2)`)
|
f(`median_over_time({}, 2)`)
|
||||||
|
|
|
@ -41,30 +41,32 @@ var rollupFuncs = map[string]newRollupFunc{
|
||||||
"absent_over_time": newRollupFuncOneArg(rollupAbsent),
|
"absent_over_time": newRollupFuncOneArg(rollupAbsent),
|
||||||
|
|
||||||
// Additional rollup funcs.
|
// Additional rollup funcs.
|
||||||
"sum2_over_time": newRollupFuncOneArg(rollupSum2),
|
"sum2_over_time": newRollupFuncOneArg(rollupSum2),
|
||||||
"geomean_over_time": newRollupFuncOneArg(rollupGeomean),
|
"geomean_over_time": newRollupFuncOneArg(rollupGeomean),
|
||||||
"first_over_time": newRollupFuncOneArg(rollupFirst),
|
"first_over_time": newRollupFuncOneArg(rollupFirst),
|
||||||
"last_over_time": newRollupFuncOneArg(rollupLast),
|
"last_over_time": newRollupFuncOneArg(rollupLast),
|
||||||
"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
|
"distinct_over_time": newRollupFuncOneArg(rollupDistinct),
|
||||||
"increases_over_time": newRollupFuncOneArg(rollupIncreases),
|
"increases_over_time": newRollupFuncOneArg(rollupIncreases),
|
||||||
"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
|
"decreases_over_time": newRollupFuncOneArg(rollupDecreases),
|
||||||
"integrate": newRollupFuncOneArg(rollupIntegrate),
|
"integrate": newRollupFuncOneArg(rollupIntegrate),
|
||||||
"ideriv": newRollupFuncOneArg(rollupIderiv),
|
"ideriv": newRollupFuncOneArg(rollupIderiv),
|
||||||
"lifetime": newRollupFuncOneArg(rollupLifetime),
|
"lifetime": newRollupFuncOneArg(rollupLifetime),
|
||||||
"lag": newRollupFuncOneArg(rollupLag),
|
"lag": newRollupFuncOneArg(rollupLag),
|
||||||
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
|
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
|
||||||
"tmin_over_time": newRollupFuncOneArg(rollupTmin),
|
"tmin_over_time": newRollupFuncOneArg(rollupTmin),
|
||||||
"tmax_over_time": newRollupFuncOneArg(rollupTmax),
|
"tmax_over_time": newRollupFuncOneArg(rollupTmax),
|
||||||
"share_le_over_time": newRollupShareLE,
|
"share_le_over_time": newRollupShareLE,
|
||||||
"share_gt_over_time": newRollupShareGT,
|
"share_gt_over_time": newRollupShareGT,
|
||||||
"histogram_over_time": newRollupFuncOneArg(rollupHistogram),
|
"histogram_over_time": newRollupFuncOneArg(rollupHistogram),
|
||||||
"rollup": newRollupFuncOneArg(rollupFake),
|
"rollup": newRollupFuncOneArg(rollupFake),
|
||||||
"rollup_rate": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
|
"rollup_rate": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
|
||||||
"rollup_deriv": newRollupFuncOneArg(rollupFake),
|
"rollup_deriv": newRollupFuncOneArg(rollupFake),
|
||||||
"rollup_delta": newRollupFuncOneArg(rollupFake),
|
"rollup_delta": newRollupFuncOneArg(rollupFake),
|
||||||
"rollup_increase": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
|
"rollup_increase": newRollupFuncOneArg(rollupFake), // + rollupFuncsRemoveCounterResets
|
||||||
"rollup_candlestick": newRollupFuncOneArg(rollupFake),
|
"rollup_candlestick": newRollupFuncOneArg(rollupFake),
|
||||||
"aggr_over_time": newRollupFuncTwoArgs(rollupFake),
|
"aggr_over_time": newRollupFuncTwoArgs(rollupFake),
|
||||||
|
"hoeffding_bound_upper": newRollupHoeffdingBoundUpper,
|
||||||
|
"hoeffding_bound_lower": newRollupHoeffdingBoundLower,
|
||||||
}
|
}
|
||||||
|
|
||||||
// rollupAggrFuncs are functions that can be passed to `aggr_over_time()`
|
// rollupAggrFuncs are functions that can be passed to `aggr_over_time()`
|
||||||
|
@ -136,13 +138,15 @@ var rollupFuncsRemoveCounterResets = map[string]bool{
|
||||||
}
|
}
|
||||||
|
|
||||||
var rollupFuncsKeepMetricGroup = map[string]bool{
|
var rollupFuncsKeepMetricGroup = map[string]bool{
|
||||||
"default_rollup": true,
|
"default_rollup": true,
|
||||||
"avg_over_time": true,
|
"avg_over_time": true,
|
||||||
"min_over_time": true,
|
"min_over_time": true,
|
||||||
"max_over_time": true,
|
"max_over_time": true,
|
||||||
"quantile_over_time": true,
|
"quantile_over_time": true,
|
||||||
"rollup": true,
|
"rollup": true,
|
||||||
"geomean_over_time": true,
|
"geomean_over_time": true,
|
||||||
|
"hoeffding_bound_lower": true,
|
||||||
|
"hoeffding_bound_upper": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) {
|
func getRollupAggrFuncNames(expr metricsql.Expr) ([]string, error) {
|
||||||
|
@ -200,7 +204,8 @@ func getRollupArgIdx(funcName string) int {
|
||||||
logger.Panicf("BUG: getRollupArgIdx is called for non-rollup func %q", funcName)
|
logger.Panicf("BUG: getRollupArgIdx is called for non-rollup func %q", funcName)
|
||||||
}
|
}
|
||||||
switch funcName {
|
switch funcName {
|
||||||
case "quantile_over_time", "aggr_over_time":
|
case "quantile_over_time", "aggr_over_time",
|
||||||
|
"hoeffding_bound_lower", "hoeffding_bound_upper":
|
||||||
return 1
|
return 1
|
||||||
default:
|
default:
|
||||||
return 0
|
return 0
|
||||||
|
@ -828,6 +833,66 @@ func newRollupShareFilter(args []interface{}, countFilter func(values []float64,
|
||||||
return rf, nil
|
return rf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newRollupHoeffdingBoundLower(args []interface{}) (rollupFunc, error) {
|
||||||
|
if err := expectRollupArgsNum(args, 2); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
phis, err := getScalar(args[0], 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rf := func(rfa *rollupFuncArg) float64 {
|
||||||
|
bound, avg := rollupHoeffdingBoundInternal(rfa, phis)
|
||||||
|
return avg - bound
|
||||||
|
}
|
||||||
|
return rf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newRollupHoeffdingBoundUpper(args []interface{}) (rollupFunc, error) {
|
||||||
|
if err := expectRollupArgsNum(args, 2); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
phis, err := getScalar(args[0], 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rf := func(rfa *rollupFuncArg) float64 {
|
||||||
|
bound, avg := rollupHoeffdingBoundInternal(rfa, phis)
|
||||||
|
return avg + bound
|
||||||
|
}
|
||||||
|
return rf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func rollupHoeffdingBoundInternal(rfa *rollupFuncArg, phis []float64) (float64, float64) {
|
||||||
|
// There is no need in handling NaNs here, since they must be cleaned up
|
||||||
|
// before calling rollup funcs.
|
||||||
|
values := rfa.values
|
||||||
|
if len(values) == 0 {
|
||||||
|
return nan, nan
|
||||||
|
}
|
||||||
|
if len(values) == 1 {
|
||||||
|
return 0, values[0]
|
||||||
|
}
|
||||||
|
vMax := rollupMax(rfa)
|
||||||
|
vMin := rollupMin(rfa)
|
||||||
|
vAvg := rollupAvg(rfa)
|
||||||
|
vRange := vMax - vMin
|
||||||
|
if vRange <= 0 {
|
||||||
|
return 0, vAvg
|
||||||
|
}
|
||||||
|
phi := phis[rfa.idx]
|
||||||
|
if phi >= 1 {
|
||||||
|
return inf, vAvg
|
||||||
|
}
|
||||||
|
if phi <= 0 {
|
||||||
|
return 0, vAvg
|
||||||
|
}
|
||||||
|
// See https://en.wikipedia.org/wiki/Hoeffding%27s_inequality
|
||||||
|
// and https://www.youtube.com/watch?v=6UwcqiNsZ8U&feature=youtu.be&t=1237
|
||||||
|
bound := vRange * math.Sqrt(math.Log(1/(1-phi))/(2*float64(len(values))))
|
||||||
|
return bound, vAvg
|
||||||
|
}
|
||||||
|
|
||||||
func newRollupQuantile(args []interface{}) (rollupFunc, error) {
|
func newRollupQuantile(args []interface{}) (rollupFunc, error) {
|
||||||
if err := expectRollupArgsNum(args, 2); err != nil {
|
if err := expectRollupArgsNum(args, 2); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
|
@ -310,6 +310,48 @@ func TestRollupHoltWinters(t *testing.T) {
|
||||||
f(0.9, 0.9, 33.99637566941818)
|
f(0.9, 0.9, 33.99637566941818)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRollupHoeffdingBoundLower(t *testing.T) {
|
||||||
|
f := func(phi, vExpected float64) {
|
||||||
|
t.Helper()
|
||||||
|
phis := []*timeseries{{
|
||||||
|
Values: []float64{phi},
|
||||||
|
Timestamps: []int64{123},
|
||||||
|
}}
|
||||||
|
var me metricsql.MetricExpr
|
||||||
|
args := []interface{}{phis, &metricsql.RollupExpr{Expr: &me}}
|
||||||
|
testRollupFunc(t, "hoeffding_bound_lower", args, &me, vExpected)
|
||||||
|
}
|
||||||
|
|
||||||
|
f(0.5, 28.21949401521037)
|
||||||
|
f(-1, 47.083333333333336)
|
||||||
|
f(0, 47.083333333333336)
|
||||||
|
f(1, -inf)
|
||||||
|
f(2, -inf)
|
||||||
|
f(0.1, 39.72878000047643)
|
||||||
|
f(0.9, 12.701803086472331)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRollupHoeffdingBoundUpper(t *testing.T) {
|
||||||
|
f := func(phi, vExpected float64) {
|
||||||
|
t.Helper()
|
||||||
|
phis := []*timeseries{{
|
||||||
|
Values: []float64{phi},
|
||||||
|
Timestamps: []int64{123},
|
||||||
|
}}
|
||||||
|
var me metricsql.MetricExpr
|
||||||
|
args := []interface{}{phis, &metricsql.RollupExpr{Expr: &me}}
|
||||||
|
testRollupFunc(t, "hoeffding_bound_upper", args, &me, vExpected)
|
||||||
|
}
|
||||||
|
|
||||||
|
f(0.5, 65.9471726514563)
|
||||||
|
f(-1, 47.083333333333336)
|
||||||
|
f(0, 47.083333333333336)
|
||||||
|
f(1, inf)
|
||||||
|
f(2, inf)
|
||||||
|
f(0.1, 54.43788666619024)
|
||||||
|
f(0.9, 81.46486358019433)
|
||||||
|
}
|
||||||
|
|
||||||
func TestRollupNewRollupFuncSuccess(t *testing.T) {
|
func TestRollupNewRollupFuncSuccess(t *testing.T) {
|
||||||
f := func(funcName string, vExpected float64) {
|
f := func(funcName string, vExpected float64) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
|
@ -103,3 +103,5 @@ This functionality can be tried at [an editable Grafana dashboard](http://play-g
|
||||||
- `aggr_over_time(("aggr_func1", "aggr_func2", ...), m[d])` - simultaneously calculates all the listed `aggr_func*` for `m` over `d` time range.
|
- `aggr_over_time(("aggr_func1", "aggr_func2", ...), m[d])` - simultaneously calculates all the listed `aggr_func*` for `m` over `d` time range.
|
||||||
`aggr_func*` can contain any functions that accept range vector. For instance, `aggr_over_time(("min_over_time", "max_over_time", "rate"), m[d])`
|
`aggr_func*` can contain any functions that accept range vector. For instance, `aggr_over_time(("min_over_time", "max_over_time", "rate"), m[d])`
|
||||||
would calculate `min_over_time`, `max_over_time` and `rate` for `m[d]`.
|
would calculate `min_over_time`, `max_over_time` and `rate` for `m[d]`.
|
||||||
|
- `hoeffding_bound_upper(phi, m[d])` and `hoeffding_bound_lower(phi, m[d])` - return upper and lower [Hoeffding bounds](https://en.wikipedia.org/wiki/Hoeffding%27s_inequality)
|
||||||
|
for the given `phi` in the range `[0..1]`.
|
||||||
|
|
|
@ -29,31 +29,33 @@ var rollupFuncs = map[string]bool{
|
||||||
"absent_over_time": true,
|
"absent_over_time": true,
|
||||||
|
|
||||||
// Additional rollup funcs.
|
// Additional rollup funcs.
|
||||||
"default_rollup": true,
|
"default_rollup": true,
|
||||||
"sum2_over_time": true,
|
"sum2_over_time": true,
|
||||||
"geomean_over_time": true,
|
"geomean_over_time": true,
|
||||||
"first_over_time": true,
|
"first_over_time": true,
|
||||||
"last_over_time": true,
|
"last_over_time": true,
|
||||||
"distinct_over_time": true,
|
"distinct_over_time": true,
|
||||||
"increases_over_time": true,
|
"increases_over_time": true,
|
||||||
"decreases_over_time": true,
|
"decreases_over_time": true,
|
||||||
"integrate": true,
|
"integrate": true,
|
||||||
"ideriv": true,
|
"ideriv": true,
|
||||||
"lifetime": true,
|
"lifetime": true,
|
||||||
"lag": true,
|
"lag": true,
|
||||||
"scrape_interval": true,
|
"scrape_interval": true,
|
||||||
"tmin_over_time": true,
|
"tmin_over_time": true,
|
||||||
"tmax_over_time": true,
|
"tmax_over_time": true,
|
||||||
"share_le_over_time": true,
|
"share_le_over_time": true,
|
||||||
"share_gt_over_time": true,
|
"share_gt_over_time": true,
|
||||||
"histogram_over_time": true,
|
"histogram_over_time": true,
|
||||||
"rollup": true,
|
"rollup": true,
|
||||||
"rollup_rate": true,
|
"rollup_rate": true,
|
||||||
"rollup_deriv": true,
|
"rollup_deriv": true,
|
||||||
"rollup_delta": true,
|
"rollup_delta": true,
|
||||||
"rollup_increase": true,
|
"rollup_increase": true,
|
||||||
"rollup_candlestick": true,
|
"rollup_candlestick": true,
|
||||||
"aggr_over_time": true,
|
"aggr_over_time": true,
|
||||||
|
"hoeffding_bound_upper": true,
|
||||||
|
"hoeffding_bound_lower": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsRollupFunc returns whether funcName is known rollup function.
|
// IsRollupFunc returns whether funcName is known rollup function.
|
||||||
|
|
Loading…
Reference in a new issue