diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index a5819aa64..55d2bcb86 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -6525,6 +6525,28 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`range_trim_zscore()`, func(t *testing.T) { + t.Parallel() + q := `range_trim_zscore(0.9, time())` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{nan, 1200, 1400, 1600, 1800, nan}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) + t.Run(`range_zscore()`, func(t *testing.T) { + t.Parallel() + q := `round(range_zscore(time()), 0.1)` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{-1.5, -0.9, -0.3, 0.3, 0.9, 1.5}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`range_quantile(0.5)`, func(t *testing.T) { t.Parallel() q := `range_quantile(0.5, time())` @@ -8341,8 +8363,10 @@ func TestExecError(t *testing.T) { f(`running_sum(1, 2)`) f(`range_mad()`) f(`range_sum(1, 2)`) - f(`range_trim_spikes()`) f(`range_trim_outliers()`) + f(`range_trim_spikes()`) + f(`range_trim_zscore()`) + f(`range_zscore()`) f(`range_first(1, 2)`) f(`range_last(1, 2)`) f(`range_linear_regression(1, 2)`) diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index 5d71d2f08..2bc506d5f 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -99,6 +99,8 @@ var transformFuncs = map[string]transformFunc{ "range_sum": newTransformFuncRange(runningSum), "range_trim_outliers": transformRangeTrimOutliers, "range_trim_spikes": transformRangeTrimSpikes, + "range_trim_zscore": transformRangeTrimZscore, + "range_zscore": transformRangeZscore, "remove_resets": transformRemoveResets, "round": transformRound, "running_avg": newTransformFuncRunning(runningAvg), @@ -1277,6 +1279,64 @@ func transformRangeNormalize(tfa *transformFuncArg) ([]*timeseries, error) { return rvs, nil } +func transformRangeTrimZscore(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 2); err != nil { + return nil, err + } + zs, err := getScalar(args[0], 0) + if err != nil { + return nil, err + } + z := float64(0) + if len(zs) > 0 { + z = math.Abs(zs[0]) + } + // Trim samples with z-score above z. + rvs := args[1] + for _, ts := range rvs { + values := ts.Values + qStddev := stddev(values) + avg := mean(values) + for i, v := range values { + zCurr := math.Abs(v-avg) / qStddev + if zCurr > z { + values[i] = nan + } + } + } + return rvs, nil +} + +func transformRangeZscore(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 1); err != nil { + return nil, err + } + rvs := args[0] + for _, ts := range rvs { + values := ts.Values + qStddev := stddev(values) + avg := mean(values) + for i, v := range values { + values[i] = (v - avg) / qStddev + } + } + return rvs, nil +} + +func mean(values []float64) float64 { + var sum float64 + var n int + for _, v := range values { + if !math.IsNaN(v) { + sum += v + n++ + } + } + return sum / float64(n) +} + func transformRangeTrimOutliers(tfa *transformFuncArg) ([]*timeseries, error) { args := tfa.args if err := expectTransformArgsNum(args, 2); err != nil { @@ -1290,7 +1350,7 @@ func transformRangeTrimOutliers(tfa *transformFuncArg) ([]*timeseries, error) { if len(ks) > 0 { k = ks[0] } - // Trim samples v satisfying the `abs(v - range_median(q)) > k*range_mad(q)` + // Trim samples satisfying the `abs(v - range_median(q)) > k*range_mad(q)` rvs := args[1] for _, ts := range rvs { values := ts.Values diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index e48bf02e5..4b8b0bf24 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -24,7 +24,9 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: [vmalert enterprise](https://docs.victoriametrics.com/vmalert.html): add ability to read alerting and recording rules from S3, GCS or S3-compatible object storage. See [these docs](https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage). * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `mad_over_time(m[d])` function for calculating the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) over raw samples on the lookbehind window `d`. See [this feature request](https://github.com/prometheus/prometheus/issues/5514). * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_mad(q)` function for calculating the [median absolute deviation](https://en.wikipedia.org/wiki/Median_absolute_deviation) over points per each time series returned by `q`. -* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_outliers(k, q)` function for dropping outliers farther than `k*range_mad(q)` from the `range_median(q)`. This should removing outliers at query time at [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759). +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_zscore(q)` function for calculating [z-score](https://en.wikipedia.org/wiki/Standard_score) over points per each time series returned from `q`. +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_outliers(k, q)` function for dropping outliers located farther than `k*range_mad(q)` from the `range_median(q)`. This should help removing outliers during query time at [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759). +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_zscore(z, q)` function for dropping outliers located farther than `z*range_stddev(q)` from `range_avg(q)`. This should help removing outliers during query time at [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): show `median` instead of `avg` in graph tooltip and line legend, since `median` is more tolerant against spikes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3706). * BUGFIX: prevent from possible data ingestion slowdown and query performance slowdown during [background merges of big parts](https://docs.victoriametrics.com/#storage) on systems with small number of CPU cores (1 or 2 CPU cores). The issue has been introduced in [v1.85.0](https://docs.victoriametrics.com/CHANGELOG.html#v1850) when implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337). See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3790). diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 95860e83d..34e82d5d5 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -809,12 +809,14 @@ See also [min_over_time](#min_over_time). #### zscore_over_time -`zscore_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which calculates returns [z-score](https://en.wikipedia.org/wiki/Standard_score) +`zscore_over_time(series_selector[d])` is a [rollup function](#rollup-functions), which returns [z-score](https://en.wikipedia.org/wiki/Standard_score) for raw samples on the given lookbehind window `d`. It is calculated independently per each time series returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering). Metric names are stripped from the resulting rollups. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names. +See also [zscore](#zscore) and [range_trim_zscore](#range_trim_zscore). + ### Transform functions @@ -1267,18 +1269,28 @@ per each time series returned by `q` on the selected time range. #### range_trim_outliers `range_trim_outliers(k, q)` is a [transform function](#transform-functions), which drops points located farther than `k*range_mad(q)` -from the `range_median(q)`. E.g., it is equivalent to the following query: `q ifnot (abs(q - range_median(q)) > k*range_mad(q))`. +from the `range_median(q)`. E.g. it is equivalent to the following query: `q ifnot (abs(q - range_median(q)) > k*range_mad(q))`. -The `phi` must be in the range `[0..1]`, where `0` means `0%` and `1` means `100%`. - -See also [range_trim_outliers](#range_trim_outliers). +See also [range_trim_spikes](#range_trim_spikes) and [range_trim_zscore](#range_trim_zscore). #### range_trim_spikes `range_trim_spikes(phi, q)` is a [transform function](#transform-functions), which drops `phi` percent of biggest spikes from time series returned by `q`. The `phi` must be in the range `[0..1]`, where `0` means `0%` and `1` means `100%`. -See also [range_trim_outliers](#range_trim_outliers). +See also [range_trim_outliers](#range_trim_outliers) and [range_trim_zscore](#range_trim_zscore). + +#### range_trim_zscore + +`range_trim_zscore(z, q)` is a [transform function](#transform-functions), which drops points located farther than `z*range_stddev(q)` +from the `range_avg(q)`. E.g. it is equivalent to the following query: `q ifnot (abs(q - range_avg(q)) > z*range_avg(q))`. + +See also [range_trim_outliers](#range_trim_outliers) and [range_trim_spikes](#range_trim_spikes). + +#### range_zscore + +`range_zscore(q)` is a [transform function](#transform-functions), which calculates [z-score](https://en.wikipedia.org/wiki/Standard_score) +for points returned by `q`, e.g. it is equivalent to the following query: `(q - range_avg(q)) / range_stddev(q)`. #### remove_resets @@ -1890,6 +1902,8 @@ See also [bottomk_min](#bottomk_min). per each `group_labels` for all the time series returned by `q`. The aggregate is calculated individually per each group of points with the same timestamp. This function is useful for detecting anomalies in the group of related time series. +See also [zscore_over_time](#zscore_over_time) and [range_trim_zscore](#range_trim_zscore). + ## Subqueries MetricsQL supports and extends PromQL subqueries. See [this article](https://valyala.medium.com/prometheus-subqueries-in-victoriametrics-9b1492b720b3) for details. diff --git a/go.mod b/go.mod index d0141dce7..340510374 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/metrics v1.23.1 - github.com/VictoriaMetrics/metricsql v0.54.0 + github.com/VictoriaMetrics/metricsql v0.55.0 github.com/aws/aws-sdk-go-v2 v1.17.4 github.com/aws/aws-sdk-go-v2/config v1.18.13 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.53 diff --git a/go.sum b/go.sum index aad3891ea..a3fc01392 100644 --- a/go.sum +++ b/go.sum @@ -69,8 +69,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= github.com/VictoriaMetrics/metrics v1.23.1 h1:/j8DzeJBxSpL2qSIdqnRFLvQQhbJyJbbEi22yMm7oL0= github.com/VictoriaMetrics/metrics v1.23.1/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc= -github.com/VictoriaMetrics/metricsql v0.54.0 h1:dKAIJtWcSPKnMNhRY5MYpqC77ZyHtA1xuDRr1pJuN5Q= -github.com/VictoriaMetrics/metricsql v0.54.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= +github.com/VictoriaMetrics/metricsql v0.55.0 h1:GZMZ1dUKPMhKsSPtVTRHfMChwRZ4KrXBxnSQgr3mjSg= +github.com/VictoriaMetrics/metricsql v0.55.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go b/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go index 1ae86f027..ff5ad3524 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go @@ -393,7 +393,7 @@ func getTransformArgIdxForOptimization(funcName string, args []Expr) int { case "limit_offset": return 2 case "buckets_limit", "histogram_quantile", "histogram_share", "range_quantile", - "range_trim_outliers", "range_trim_spikes": + "range_trim_outliers", "range_trim_spikes", "range_trim_zscore": return 1 case "histogram_quantiles": return len(args) - 1 diff --git a/vendor/github.com/VictoriaMetrics/metricsql/transform.go b/vendor/github.com/VictoriaMetrics/metricsql/transform.go index d80cefeb6..22769604b 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/transform.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/transform.go @@ -84,6 +84,8 @@ var transformFuncs = map[string]bool{ "range_sum": true, "range_trim_outliers": true, "range_trim_spikes": true, + "range_trim_zscore": true, + "range_zscore": true, "remove_resets": true, "round": true, "running_avg": true, diff --git a/vendor/modules.txt b/vendor/modules.txt index cbc19a9dc..a4823bf82 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -71,7 +71,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.23.1 ## explicit; go 1.15 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.54.0 +# github.com/VictoriaMetrics/metricsql v0.55.0 ## explicit; go 1.13 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop