mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
app/vmselect/promql: add range_zscore(q) and range_trim_zscore(z, q) functions
These functions may be useful for dropping outliers at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759
This commit is contained in:
parent
94334ed0bb
commit
84b5532bc1
9 changed files with 116 additions and 14 deletions
|
@ -6535,6 +6535,28 @@ func TestExecSuccess(t *testing.T) {
|
|||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`range_trim_zscore()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `range_trim_zscore(0.9, time())`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{nan, 1200, 1400, 1600, 1800, nan},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`range_zscore()`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `round(range_zscore(time()), 0.1)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{-1.5, -0.9, -0.3, 0.3, 0.9, 1.5},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`range_quantile(0.5)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `range_quantile(0.5, time())`
|
||||
|
@ -8355,8 +8377,10 @@ func TestExecError(t *testing.T) {
|
|||
f(`running_sum(1, 2)`)
|
||||
f(`range_mad()`)
|
||||
f(`range_sum(1, 2)`)
|
||||
f(`range_trim_spikes()`)
|
||||
f(`range_trim_outliers()`)
|
||||
f(`range_trim_spikes()`)
|
||||
f(`range_trim_zscore()`)
|
||||
f(`range_zscore()`)
|
||||
f(`range_first(1, 2)`)
|
||||
f(`range_last(1, 2)`)
|
||||
f(`range_linear_regression(1, 2)`)
|
||||
|
|
|
@ -99,6 +99,8 @@ var transformFuncs = map[string]transformFunc{
|
|||
"range_sum": newTransformFuncRange(runningSum),
|
||||
"range_trim_outliers": transformRangeTrimOutliers,
|
||||
"range_trim_spikes": transformRangeTrimSpikes,
|
||||
"range_trim_zscore": transformRangeTrimZscore,
|
||||
"range_zscore": transformRangeZscore,
|
||||
"remove_resets": transformRemoveResets,
|
||||
"round": transformRound,
|
||||
"running_avg": newTransformFuncRunning(runningAvg),
|
||||
|
@ -1277,6 +1279,64 @@ func transformRangeNormalize(tfa *transformFuncArg) ([]*timeseries, error) {
|
|||
return rvs, nil
|
||||
}
|
||||
|
||||
func transformRangeTrimZscore(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 2); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
zs, err := getScalar(args[0], 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
z := float64(0)
|
||||
if len(zs) > 0 {
|
||||
z = math.Abs(zs[0])
|
||||
}
|
||||
// Trim samples with z-score above z.
|
||||
rvs := args[1]
|
||||
for _, ts := range rvs {
|
||||
values := ts.Values
|
||||
qStddev := stddev(values)
|
||||
avg := mean(values)
|
||||
for i, v := range values {
|
||||
zCurr := math.Abs(v-avg) / qStddev
|
||||
if zCurr > z {
|
||||
values[i] = nan
|
||||
}
|
||||
}
|
||||
}
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func transformRangeZscore(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rvs := args[0]
|
||||
for _, ts := range rvs {
|
||||
values := ts.Values
|
||||
qStddev := stddev(values)
|
||||
avg := mean(values)
|
||||
for i, v := range values {
|
||||
values[i] = (v - avg) / qStddev
|
||||
}
|
||||
}
|
||||
return rvs, nil
|
||||
}
|
||||
|
||||
func mean(values []float64) float64 {
|
||||
var sum float64
|
||||
var n int
|
||||
for _, v := range values {
|
||||
if !math.IsNaN(v) {
|
||||
sum += v
|
||||
n++
|
||||
}
|
||||
}
|
||||
return sum / float64(n)
|
||||
}
|
||||
|
||||
func transformRangeTrimOutliers(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
args := tfa.args
|
||||
if err := expectTransformArgsNum(args, 2); err != nil {
|
||||
|
@ -1290,7 +1350,7 @@ func transformRangeTrimOutliers(tfa *transformFuncArg) ([]*timeseries, error) {
|
|||
if len(ks) > 0 {
|
||||
k = ks[0]
|
||||
}
|
||||
// Trim samples v satisfying the `abs(v - range_median(q)) > k*range_mad(q)`
|
||||
// Trim samples satisfying the `abs(v - range_median(q)) > k*range_mad(q)`
|
||||
rvs := args[1]
|
||||
for _, ts := range rvs {
|
||||
values := ts.Values
|
||||
|
|
|
@ -24,7 +24,9 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
|||
* FEATURE: [vmalert enterprise](https://docs.victoriametrics.com/vmalert.html): add ability to read alerting and recording rules from S3, GCS or S3-compatible object storage. See [these docs](https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage).
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `mad_over_time(m[d])` function for calculating the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) over raw samples on the lookbehind window `d`. See [this feature request](https://github.com/prometheus/prometheus/issues/5514).
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_mad(q)` function for calculating the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) over points per each time series returned by `q`.
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_outliers(k, q)` function for dropping outliers farther than `k*range_mad(q)` from the `range_median(q)`. This should removing outliers at query time at [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759).
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_zscore(q)` function for calculating [z-score](https://en.wikipedia.org/wiki/Standard_score) over points per each time series returned from `q`.
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_outliers(k, q)` function for dropping outliers located farther than `k*range_mad(q)` from the `range_median(q)`. This should help removing outliers during query time at [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759).
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_zscore(z, q)` function for dropping outliers located farther than `z*range_stddev(q)` from `range_avg(q)`. This should help removing outliers during query time at [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): show `median` instead of `avg` in graph tooltip and line legend, since `median` is more tolerant against spikes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3706).
|
||||
|
||||
* BUGFIX: prevent from possible data ingestion slowdown and query performance slowdown during [background merges of big parts](https://docs.victoriametrics.com/#storage) on systems with small number of CPU cores (1 or 2 CPU cores). The issue has been introduced in [v1.85.0](https://docs.victoriametrics.com/CHANGELOG.html#v1850) when implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337). See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3790).
|
||||
|
|
|
@ -809,12 +809,14 @@ See also [min_over_time](#min_over_time).
|
|||
|
||||
#### zscore_over_time
|
||||
|
||||
`zscore_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates returns [z-score](https://en.wikipedia.org/wiki/Standard_score)
|
||||
`zscore_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which returns [z-score](https://en.wikipedia.org/wiki/Standard_score)
|
||||
for raw samples on the given lookbehind window `d`. It is calculated independently per each time series returned
|
||||
from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
|
||||
|
||||
Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
|
||||
|
||||
See also [zscore](#zscore) and [range_trim_zscore](#range_trim_zscore).
|
||||
|
||||
|
||||
### Transform functions
|
||||
|
||||
|
@ -1267,18 +1269,28 @@ per each time series returned by `q` on the selected time range.
|
|||
#### range_trim_outliers
|
||||
|
||||
`range_trim_outliers(k, q)` is a [transform function](#transform-functions), which drops points located farther than `k*range_mad(q)`
|
||||
from the `range_median(q)`. E.g., it is equivalent to the following query: `q ifnot (abs(q - range_median(q)) > k*range_mad(q))`.
|
||||
from the `range_median(q)`. E.g. it is equivalent to the following query: `q ifnot (abs(q - range_median(q)) > k*range_mad(q))`.
|
||||
|
||||
The `phi` must be in the range `[0..1]`, where `0` means `0%` and `1` means `100%`.
|
||||
|
||||
See also [range_trim_outliers](#range_trim_outliers).
|
||||
See also [range_trim_spikes](#range_trim_spikes) and [range_trim_zscore](#range_trim_zscore).
|
||||
|
||||
#### range_trim_spikes
|
||||
|
||||
`range_trim_spikes(phi, q)` is a [transform function](#transform-functions), which drops `phi` percent of biggest spikes from time series returned by `q`.
|
||||
The `phi` must be in the range `[0..1]`, where `0` means `0%` and `1` means `100%`.
|
||||
|
||||
See also [range_trim_outliers](#range_trim_outliers).
|
||||
See also [range_trim_outliers](#range_trim_outliers) and [range_trim_zscore](#range_trim_zscore).
|
||||
|
||||
#### range_trim_zscore
|
||||
|
||||
`range_trim_zscore(z, q)` is a [transform function](#transform-functions), which drops points located farther than `z*range_stddev(q)`
|
||||
from the `range_avg(q)`. E.g. it is equivalent to the following query: `q ifnot (abs(q - range_avg(q)) > z*range_avg(q))`.
|
||||
|
||||
See also [range_trim_outliers](#range_trim_outliers) and [range_trim_spikes](#range_trim_spikes).
|
||||
|
||||
#### range_zscore
|
||||
|
||||
`range_zscore(q)` is a [transform function](#transform-functions), which calculates [z-score](https://en.wikipedia.org/wiki/Standard_score)
|
||||
for points returned by `q`, e.g. it is equivalent to the following query: `(q - range_avg(q)) / range_stddev(q)`.
|
||||
|
||||
#### remove_resets
|
||||
|
||||
|
@ -1890,6 +1902,8 @@ See also [bottomk_min](#bottomk_min).
|
|||
per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp.
|
||||
This function is useful for detecting anomalies in the group of related time series.
|
||||
|
||||
See also [zscore_over_time](#zscore_over_time) and [range_trim_zscore](#range_trim_zscore).
|
||||
|
||||
## Subqueries
|
||||
|
||||
MetricsQL supports and extends PromQL subqueries. See [this article](https://valyala.medium.com/prometheus-subqueries-in-victoriametrics-9b1492b720b3) for details.
|
||||
|
|
2
go.mod
2
go.mod
|
@ -12,7 +12,7 @@ require (
|
|||
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
|
||||
github.com/VictoriaMetrics/fasthttp v1.1.0
|
||||
github.com/VictoriaMetrics/metrics v1.23.1
|
||||
github.com/VictoriaMetrics/metricsql v0.54.0
|
||||
github.com/VictoriaMetrics/metricsql v0.55.0
|
||||
github.com/aws/aws-sdk-go-v2 v1.17.4
|
||||
github.com/aws/aws-sdk-go-v2/config v1.18.13
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.53
|
||||
|
|
4
go.sum
4
go.sum
|
@ -69,8 +69,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR
|
|||
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
|
||||
github.com/VictoriaMetrics/metrics v1.23.1 h1:/j8DzeJBxSpL2qSIdqnRFLvQQhbJyJbbEi22yMm7oL0=
|
||||
github.com/VictoriaMetrics/metrics v1.23.1/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc=
|
||||
github.com/VictoriaMetrics/metricsql v0.54.0 h1:dKAIJtWcSPKnMNhRY5MYpqC77ZyHtA1xuDRr1pJuN5Q=
|
||||
github.com/VictoriaMetrics/metricsql v0.54.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
|
||||
github.com/VictoriaMetrics/metricsql v0.55.0 h1:GZMZ1dUKPMhKsSPtVTRHfMChwRZ4KrXBxnSQgr3mjSg=
|
||||
github.com/VictoriaMetrics/metricsql v0.55.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
|
||||
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
|
||||
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
||||
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
|
||||
|
|
2
vendor/github.com/VictoriaMetrics/metricsql/optimizer.go
generated
vendored
2
vendor/github.com/VictoriaMetrics/metricsql/optimizer.go
generated
vendored
|
@ -393,7 +393,7 @@ func getTransformArgIdxForOptimization(funcName string, args []Expr) int {
|
|||
case "limit_offset":
|
||||
return 2
|
||||
case "buckets_limit", "histogram_quantile", "histogram_share", "range_quantile",
|
||||
"range_trim_outliers", "range_trim_spikes":
|
||||
"range_trim_outliers", "range_trim_spikes", "range_trim_zscore":
|
||||
return 1
|
||||
case "histogram_quantiles":
|
||||
return len(args) - 1
|
||||
|
|
2
vendor/github.com/VictoriaMetrics/metricsql/transform.go
generated
vendored
2
vendor/github.com/VictoriaMetrics/metricsql/transform.go
generated
vendored
|
@ -84,6 +84,8 @@ var transformFuncs = map[string]bool{
|
|||
"range_sum": true,
|
||||
"range_trim_outliers": true,
|
||||
"range_trim_spikes": true,
|
||||
"range_trim_zscore": true,
|
||||
"range_zscore": true,
|
||||
"remove_resets": true,
|
||||
"round": true,
|
||||
"running_avg": true,
|
||||
|
|
2
vendor/modules.txt
vendored
2
vendor/modules.txt
vendored
|
@ -71,7 +71,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
|
|||
# github.com/VictoriaMetrics/metrics v1.23.1
|
||||
## explicit; go 1.15
|
||||
github.com/VictoriaMetrics/metrics
|
||||
# github.com/VictoriaMetrics/metricsql v0.54.0
|
||||
# github.com/VictoriaMetrics/metricsql v0.55.0
|
||||
## explicit; go 1.13
|
||||
github.com/VictoriaMetrics/metricsql
|
||||
github.com/VictoriaMetrics/metricsql/binaryop
|
||||
|
|
Loading…
Reference in a new issue