app/vmselect/promql: add outliers_iqr(q) and outlier_iqr_over_time(m[d]) functions

These functions allow detecting anomalies in series and samples using Interquartile range method.
See Outliers section at https://en.wikipedia.org/wiki/Interquartile_range for more details.
This commit is contained in:
Aliaksandr Valialkin 2023-10-31 22:10:29 +01:00
parent fba93dbe0b
commit ea81f6fc36
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
11 changed files with 173 additions and 9 deletions

View file

@ -38,6 +38,7 @@ var aggrFuncs = map[string]aggrFunc{
"median": aggrFuncMedian,
"min": newAggrFunc(aggrFuncMin),
"mode": newAggrFunc(aggrFuncMode),
"outliers_iqr": aggrFuncOutliersIQR,
"outliers_mad": aggrFuncOutliersMAD,
"outliersk": aggrFuncOutliersK,
"quantile": aggrFuncQuantile,
@ -944,6 +945,58 @@ func aggrFuncMAD(tss []*timeseries) []*timeseries {
return tss[:1]
}
func aggrFuncOutliersIQR(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if err := expectTransformArgsNum(args, 1); err != nil {
return nil, err
}
afe := func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries {
// Calculate lower and upper bounds for interquartile range per each point across tss
// according to Outliers section at https://en.wikipedia.org/wiki/Interquartile_range
lower, upper := getPerPointIQRBounds(tss)
// Leave only time series with outliers above upper bound or below lower bound
tssDst := tss[:0]
for _, ts := range tss {
values := ts.Values
for i, v := range values {
if v > upper[i] || v < lower[i] {
tssDst = append(tssDst, ts)
break
}
}
}
return tssDst
}
return aggrFuncExt(afe, args[0], &afa.ae.Modifier, afa.ae.Limit, true)
}
func getPerPointIQRBounds(tss []*timeseries) ([]float64, []float64) {
if len(tss) == 0 {
return nil, nil
}
pointsLen := len(tss[0].Values)
values := make([]float64, 0, len(tss))
var qs []float64
lower := make([]float64, pointsLen)
upper := make([]float64, pointsLen)
for i := 0; i < pointsLen; i++ {
values = values[:0]
for _, ts := range tss {
v := ts.Values[i]
if !math.IsNaN(v) {
values = append(values, v)
}
}
qs := quantiles(qs[:0], iqrPhis, values)
iqr := 1.5 * (qs[1] - qs[0])
lower[i] = qs[0] - iqr
upper[i] = qs[1] + iqr
}
return lower, upper
}
var iqrPhis = []float64{0.25, 0.75}
func aggrFuncOutliersMAD(afa *aggrFuncArg) ([]*timeseries, error) {
args := afa.args
if err := expectTransformArgsNum(args, 2); err != nil {

View file

@ -6910,6 +6910,30 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`outliers_iqr()`, func(t *testing.T) {
t.Parallel()
q := `sort(outliers_iqr((
alias(time(), "m1"),
alias(time()*1.5, "m2"),
alias(time()*10, "m3"),
alias(time()*1.2, "m4"),
alias(time()*0.1, "m5"),
)))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{100, 120, 140, 160, 180, 200},
Timestamps: timestampsExpected,
}
r1.MetricName.MetricGroup = []byte("m5")
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{10000, 12000, 14000, 16000, 18000, 20000},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte("m3")
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`outliers_mad(1)`, func(t *testing.T) {
t.Parallel()
q := `outliers_mad(1, (

View file

@ -62,6 +62,7 @@ var rollupFuncs = map[string]newRollupFunc{
"median_over_time": newRollupFuncOneArg(rollupMedian),
"min_over_time": newRollupFuncOneArg(rollupMin),
"mode_over_time": newRollupFuncOneArg(rollupModeOverTime),
"outlier_iqr_over_time": newRollupFuncOneArg(rollupOutlierIQR),
"predict_linear": newRollupPredictLinear,
"present_over_time": newRollupFuncOneArg(rollupPresent),
"quantile_over_time": newRollupQuantile,
@ -122,6 +123,7 @@ var rollupAggrFuncs = map[string]rollupFunc{
"increases_over_time": rollupIncreases,
"integrate": rollupIntegrate,
"irate": rollupIderiv,
"iqr_over_time": rollupOutlierIQR,
"lag": rollupLag,
"last_over_time": rollupLast,
"lifetime": rollupLifetime,
@ -225,6 +227,7 @@ var rollupFuncsKeepMetricName = map[string]bool{
"hoeffding_bound_lower": true,
"hoeffding_bound_upper": true,
"holt_winters": true,
"iqr_over_time": true,
"last_over_time": true,
"max_over_time": true,
"median_over_time": true,
@ -1287,6 +1290,29 @@ func newRollupQuantiles(args []interface{}) (rollupFunc, error) {
return rf, nil
}
func rollupOutlierIQR(rfa *rollupFuncArg) float64 {
// There is no need in handling NaNs here, since they must be cleaned up
// before calling rollup funcs.
// See Outliers section at https://en.wikipedia.org/wiki/Interquartile_range
values := rfa.values
if len(values) < 2 {
return nan
}
qs := getFloat64s()
qs.A = quantiles(qs.A[:0], iqrPhis, values)
q25 := qs.A[0]
q75 := qs.A[1]
iqr := 1.5 * (q75 - q25)
putFloat64s(qs)
v := values[len(values)-1]
if v > q75+iqr || v < q25-iqr {
return v
}
return nan
}
func newRollupQuantile(args []interface{}) (rollupFunc, error) {
if err := expectRollupArgsNum(args, 2); err != nil {
return nil, err

View file

@ -12,6 +12,35 @@ var (
testTimestamps = []int64{5, 15, 24, 36, 49, 60, 78, 80, 97, 115, 120, 130}
)
func TestRollupOutlierIQR(t *testing.T) {
f := func(values []float64, resultExpected float64) {
t.Helper()
rfa := &rollupFuncArg{
values: values,
timestamps: nil,
}
result := rollupOutlierIQR(rfa)
if math.IsNaN(result) {
if !math.IsNaN(resultExpected) {
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
}
} else {
if math.IsNaN(resultExpected) {
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
}
if result != resultExpected {
t.Fatalf("unexpected value; got %v; want %v", result, resultExpected)
}
}
}
f([]float64{1, 2, 3, 4, 5}, nan)
f([]float64{1, 2, 3, 4, 7}, nan)
f([]float64{1, 2, 3, 4, 8}, 8)
f([]float64{1, 2, 3, 4, -2}, nan)
f([]float64{1, 2, 3, 4, -3}, -3)
}
func TestRollupIderivDuplicateTimestamps(t *testing.T) {
rfa := &rollupFuncArg{
values: []float64{1, 2, 3, 4, 5},
@ -186,6 +215,9 @@ func testRollupFunc(t *testing.T, funcName string, args []interface{}, vExpected
t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
}
} else {
if math.IsNaN(v) {
t.Fatalf("unexpected value; got %v want %v", v, vExpected)
}
eps := math.Abs(v - vExpected)
if eps > 1e-14 {
t.Fatalf("unexpected value; got %v; want %v", v, vExpected)
@ -514,6 +546,7 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
f("increase", 398)
f("increase_prometheus", 275)
f("irate", 0)
f("outlier_iqr_over_time", nan)
f("rate", 2200)
f("resets", 5)
f("range_over_time", 111)

View file

@ -44,6 +44,7 @@ The sandbox cluster installation is running under the constant load generated by
* FEATURE: `vmselect`: expose `vm_memory_intensive_queries_total` counter metric which gets increased each time `-search.logQueryMemoryUsage` memory limit is exceeded by a query. This metric should help to identify expensive and heavy queries without inspecting the logs.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [drop_empty_series()](https://docs.victoriametrics.com/MetricsQL.html#drop_empty_series) function, which can be used for filtering out empty series before performing additional calculations as shown in [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5071).
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [labels_equal()](https://docs.victoriametrics.com/MetricsQL.html#labels_equal) function, which can be used for searching series with identical values for the given labels. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5148).
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [`outlier_iqr_over_time(m[d])`](https://docs.victoriametrics.com/MetricsQL.html#outlier_iqr_over_time) and [`outliers_iqr(q)`](https://docs.victoriametrics.com/MetricsQL.html#outliers_iqr) functions, which allow detecting anomalies in samples and series using [Interquartile range method](https://en.wikipedia.org/wiki/Interquartile_range).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `eval_alignment` attribute for [Groups](https://docs.victoriametrics.com/vmalert.html#groups), it will align group query requests timestamp with interval like `datasource.queryTimeAlignment` did.
This also means that `datasource.queryTimeAlignment` command-line flag becomes deprecated now and will have no effect if configured. If `datasource.queryTimeAlignment` was set to `false` before, then `eval_alignment` has to be set to `false` explicitly under group.
See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049).

View file

@ -532,7 +532,7 @@ See also [duration_over_time](#duration_over_time) and [lag](#lag).
`mad_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation)
over raw samples on the given lookbehind window `d` per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
See also [mad](#mad) and [range_mad](#range_mad).
See also [mad](#mad), [range_mad](#range_mad) and [outlier_iqr_over_time](#outlier_iqr_over_time).
#### max_over_time
@ -562,6 +562,18 @@ This function is supported by PromQL. See also [tmin_over_time](#tmin_over_time)
for raw samples on the given lookbehind window `d`. It is calculated individually per each time series returned
from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). It is expected that raw sample values are discrete.
#### outlier_iqr_over_time
`outlier_iqr_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which returns the last sample on the given lookbehind window `d`
if its value is either smaller than the `q25-1.5*iqr` or bigger than `q75+1.5*iqr` where:
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) over raw samples on the lookbehind window `d`
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) over raw samples on the lookbehind window `d`.
The `outlier_iqr_over_time()` is useful for detecting anomalies in gauge values based on the previous history of values.
For example, `outlier_iqr_over_time(memory_usage_bytes[1h])` triggers when `memory_usage_bytes` suddenly goes outside the usual value range for the last 24 hours.
See also [outliers_iqr](#outliers_iqr).
#### predict_linear
`predict_linear(series_selector[d], t)` is a [rollup function](#rollup-functions), which calculates the value `t` seconds in the future using
@ -866,7 +878,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
See also [zscore](#zscore) and [range_trim_zscore](#range_trim_zscore).
See also [zscore](#zscore), [range_trim_zscore](#range_trim_zscore) and [outlier_iqr_over_time](#outlier_iqr_over_time).
### Transform functions
@ -1858,20 +1870,33 @@ This function is supported by PromQL.
`mode(q) by (group_labels)` is [aggregate function](#aggregate-functions), which returns [mode](https://en.wikipedia.org/wiki/Mode_(statistics))
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
#### outliers_iqr
`outliers_iqr(q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least a single point
outside e.g. [Interquartile range outlier bounds](https://en.wikipedia.org/wiki/Interquartile_range) `[q25-1.5*iqr .. q75+1.5*iqr]`
comparing to other time series at the given point, where:
- `iqr` is an [Interquartile range](https://en.wikipedia.org/wiki/Interquartile_range) calculated independently per each point on the graph across `q` series.
- `q25` and `q75` are 25th and 75th [percentiles](https://en.wikipedia.org/wiki/Percentile) calculated independently per each point on the graph across `q` series.
The `outliers_iqr()` is useful for detecting anomalous series in the group of series. For example, `outliers_iqr(temperature) by (country)` returns
per-country series with anomalous outlier values comparing to the rest of per-country series.
See also [outliers_mad](#outliers_mad), [outliersk](#outliersk) and [outlier_iqr_over_time](#outlier_iqr_over_time).
#### outliers_mad
`outliers_mad(tolerance, q)` is [aggregate function](#aggregate-functions), which returns time series from `q` with at least
a single point outside [Median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) (aka MAD) multiplied by `tolerance`.
E.g. it returns time series with at least a single point below `median(q) - mad(q)` or a single point above `median(q) + mad(q)`.
See also [outliersk](#outliersk) and [mad](#mad).
See also [outliers_iqr](#outliers_iqr), [outliersk](#outliersk) and [mad](#mad).
#### outliersk
`outliersk(k, q)` is [aggregate function](#aggregate-functions), which returns up to `k` time series with the biggest standard deviation (aka outliers)
out of time series returned by `q`.
See also [outliers_mad](#outliers_mad).
See also [outliers_iqr](#outliers_iqr) and [outliers_mad](#outliers_mad).
#### quantile
@ -1991,7 +2016,7 @@ See also [bottomk_min](#bottomk_min).
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
This function is useful for detecting anomalies in the group of related time series.
See also [zscore_over_time](#zscore_over_time) and [range_trim_zscore](#range_trim_zscore).
See also [zscore_over_time](#zscore_over_time), [range_trim_zscore](#range_trim_zscore) and [outliers_iqr](#outliers_iqr).
## Subqueries

2
go.mod
View file

@ -12,7 +12,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.2.0
github.com/VictoriaMetrics/metrics v1.24.0
github.com/VictoriaMetrics/metricsql v0.68.0
github.com/VictoriaMetrics/metricsql v0.69.0
github.com/aws/aws-sdk-go-v2 v1.22.0
github.com/aws/aws-sdk-go-v2/config v1.20.0
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.12.0

4
go.sum
View file

@ -70,8 +70,8 @@ github.com/VictoriaMetrics/fasthttp v1.2.0 h1:nd9Wng4DlNtaI27WlYh5mGXCJOmee/2c2b
github.com/VictoriaMetrics/fasthttp v1.2.0/go.mod h1:zv5YSmasAoSyv8sBVexfArzFDIGGTN4TfCKAtAw7IfE=
github.com/VictoriaMetrics/metrics v1.24.0 h1:ILavebReOjYctAGY5QU2F9X0MYvkcrG3aEn2RKa1Zkw=
github.com/VictoriaMetrics/metrics v1.24.0/go.mod h1:eFT25kvsTidQFHb6U0oa0rTrDRdz4xTYjpL8+UPohys=
github.com/VictoriaMetrics/metricsql v0.68.0 h1:fAzYPjYkEipM/L/+WYbAK/gYuqt5rQHnb3cTY2cN628=
github.com/VictoriaMetrics/metricsql v0.68.0/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I=
github.com/VictoriaMetrics/metricsql v0.69.0 h1:6np68zGOnMiGEJR/rCvywS1gbLGXVrmQC3BKydsbWHw=
github.com/VictoriaMetrics/metricsql v0.69.0/go.mod h1:k4UaP/+CjuZslIjd+kCigNG9TQmUqh5v0TP/nMEy90I=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=

View file

@ -25,6 +25,7 @@ var aggrFuncs = map[string]bool{
"median": true,
"min": true,
"mode": true,
"outliers_iqr": true,
"outliers_mad": true,
"outliersk": true,
"quantile": true,

View file

@ -47,6 +47,7 @@ var rollupFuncs = map[string]bool{
"median_over_time": true,
"min_over_time": true,
"mode_over_time": true,
"outlier_iqr_over_time": true,
"predict_linear": true,
"present_over_time": true,
"quantile_over_time": true,

2
vendor/modules.txt vendored
View file

@ -99,7 +99,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.24.0
## explicit; go 1.20
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.68.0
# github.com/VictoriaMetrics/metricsql v0.69.0
## explicit; go 1.13
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop