From 5eae9a9914d4272582514ba69a9dc2196a3ecd29 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 5 Dec 2022 21:55:01 -0800 Subject: [PATCH] app/vmselect/promql: add range_trim_spikes(phi, q) function for trimming phi percent of largest spikes per each time series returned by q --- app/vmselect/promql/exec_test.go | 12 +++++ app/vmselect/promql/transform.go | 49 +++++++++++++++++++ docs/CHANGELOG.md | 3 +- docs/MetricsQL.md | 5 ++ go.mod | 2 +- go.sum | 4 +- .../VictoriaMetrics/metricsql/optimizer.go | 2 +- .../VictoriaMetrics/metricsql/transform.go | 1 + vendor/modules.txt | 2 +- 9 files changed, 74 insertions(+), 6 deletions(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 49eb8d540..0ca7c6224 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -6385,6 +6385,17 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r1, r2} f(q, resultExpected) }) + t.Run(`range_trim_spikes()`, func(t *testing.T) { + t.Parallel() + q := `range_trim_spikes(0.2, time())` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{nan, 1200, 1400, 1600, 1800, nan}, + Timestamps: timestampsExpected, + } + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`range_quantile(0.5)`, func(t *testing.T) { t.Parallel() q := `range_quantile(0.5, time())` @@ -8189,6 +8200,7 @@ func TestExecError(t *testing.T) { f(`step(1)`) f(`running_sum(1, 2)`) f(`range_sum(1, 2)`) + f(`range_trim_spikes()`) f(`range_first(1, 2)`) f(`range_last(1, 2)`) f(`range_linear_regression(1, 2)`) diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index f4d151295..752775b4e 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -96,6 +96,7 @@ var transformFuncs = map[string]transformFunc{ "range_stddev": transformRangeStddev, "range_stdvar": transformRangeStdvar, "range_sum": newTransformFuncRange(runningSum), + "range_trim_spikes": transformRangeTrimSpikes, "remove_resets": transformRemoveResets, "round": transformRound, "running_avg": newTransformFuncRunning(runningAvg), @@ -1274,6 +1275,54 @@ func transformRangeNormalize(tfa *transformFuncArg) ([]*timeseries, error) { return rvs, nil } +func transformRangeTrimSpikes(tfa *transformFuncArg) ([]*timeseries, error) { + args := tfa.args + if err := expectTransformArgsNum(args, 2); err != nil { + return nil, err + } + phis, err := getScalar(args[0], 0) + if err != nil { + return nil, err + } + phi := float64(0) + if len(phis) > 0 { + phi = phis[0] + } + // Trim 100% * (phi / 2) samples with the lowest / highest values per each time series + phi /= 2 + phiUpper := 1 - phi + phiLower := phi + rvs := args[1] + a := getFloat64s() + values := a.A[:0] + for _, ts := range rvs { + values := values[:0] + originValues := ts.Values + for _, v := range originValues { + if math.IsNaN(v) { + continue + } + values = append(values, v) + } + sort.Float64s(values) + vMax := quantileSorted(phiUpper, values) + vMin := quantileSorted(phiLower, values) + for i, v := range originValues { + if math.IsNaN(v) { + continue + } + if v > vMax { + originValues[i] = nan + } else if v < vMin { + originValues[i] = nan + } + } + } + a.A = values + putFloat64s(a) + return rvs, nil +} + func transformRangeLinearRegression(tfa *transformFuncArg) ([]*timeseries, error) { args := tfa.args if err := expectTransformArgsNum(args, 1); err != nil { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a13a6273c..af033101d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -48,9 +48,10 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): improve [service discovery](https://docs.victoriametrics.com/sd_configs.html) performance when discovering big number of targets (10K and more). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `exported_` prefix to metric names exported by scrape targets if these metric names clash with [automatically generated metrics](https://docs.victoriametrics.com/vmagent.html#automatically-generated-metrics) such as `up`, `scrape_samples_scraped`, etc. This prevents from corruption of automatically generated metrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3406). * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): improve error message when the requested path cannot be properly parsed, so users could identify the issue and properly fix the path. Now the error message links to [url format docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3402). -* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add ability to copy data from sources via Prometheus `remote_read` protocol. See [these docs](https://docs.victoriametrics.com/vmctl.html#migrating-data-by-remote-read-protocol). The related issues: [one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3132) and [two](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1101). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `-remoteWrite.sendTimeout` command-line flag, which allows configuring timeout for sending data to `-remoteWrite.url`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3408). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add ability to migrate data between VictoriaMetrics clusters with automatic tenants discovery. See [these docs](https://docs.victoriametrics.com/vmctl.html#cluster-to-cluster-migration-mode) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2930) +* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add ability to copy data from sources via Prometheus `remote_read` protocol. See [these docs](https://docs.victoriametrics.com/vmctl.html#migrating-data-by-remote-read-protocol). The related issues: [one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3132) and [two](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1101). +* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_spikes(phi, q)` function for trimming `phi` percent of the largest spikes per each time series returned by `q`. See [these docs](https://docs.victoriametrics.com/MetricsQL.html#range_trim_spikes). * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly pass HTTP headers during the alert state restore procedure. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3418). * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly specify rule evaluation step during the [replay mode](https://docs.victoriametrics.com/vmalert.html#rules-backfilling). The `step` value was previously overriden by `-datasource.queryStep` command-line flag. diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 67bd9aa61..9b0a8c447 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -1247,6 +1247,11 @@ per each time series returned by `q` on the selected time range. `range_sum(q)` is a [transform function](#transform-functions), which calculates the sum of points per each time series returned by `q`. +#### range_trim_spikes + +`range_trim_spikes(phi, q)` is a [transform function](#transform-functions), which drops `phi` percent of biggest spikes from time series returned by `q`. +The `phi` must be in the range `[0..1]`, where `0` means `0%` and `1` means `100%`. + #### remove_resets `remove_resets(q)` is a [transform function](#transform-functions), which removes counter resets from time series returned by `q`. diff --git a/go.mod b/go.mod index 164b550bd..35f09f6f9 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( // like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b github.com/VictoriaMetrics/fasthttp v1.1.0 github.com/VictoriaMetrics/metrics v1.23.0 - github.com/VictoriaMetrics/metricsql v0.49.1 + github.com/VictoriaMetrics/metricsql v0.50.0 github.com/aws/aws-sdk-go-v2 v1.17.2 github.com/aws/aws-sdk-go-v2/config v1.18.4 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.43 diff --git a/go.sum b/go.sum index a942cb491..44d818e92 100644 --- a/go.sum +++ b/go.sum @@ -71,8 +71,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA= github.com/VictoriaMetrics/metrics v1.23.0 h1:WzfqyzCaxUZip+OBbg1+lV33WChDSu4ssYII3nxtpeA= github.com/VictoriaMetrics/metrics v1.23.0/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc= -github.com/VictoriaMetrics/metricsql v0.49.1 h1:9JAbpiZhlQnylclcf5xNtYRaBd5dr2CTPQ85RIoruuk= -github.com/VictoriaMetrics/metricsql v0.49.1/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= +github.com/VictoriaMetrics/metricsql v0.50.0 h1:MCBhjn1qlfMqPGP6HiR9JgmEw7oTRGm/O8YwSeoaI1E= +github.com/VictoriaMetrics/metricsql v0.50.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= diff --git a/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go b/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go index 3a432e63e..341528586 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/optimizer.go @@ -392,7 +392,7 @@ func getTransformArgIdxForOptimization(funcName string, args []Expr) int { return -1 case "limit_offset": return 2 - case "buckets_limit", "histogram_quantile", "histogram_share", "range_quantile": + case "buckets_limit", "histogram_quantile", "histogram_share", "range_quantile", "range_trim_spikes": return 1 case "histogram_quantiles": return len(args) - 1 diff --git a/vendor/github.com/VictoriaMetrics/metricsql/transform.go b/vendor/github.com/VictoriaMetrics/metricsql/transform.go index 5876c8290..31029f2c3 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/transform.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/transform.go @@ -81,6 +81,7 @@ var transformFuncs = map[string]bool{ "range_stddev": true, "range_stdvar": true, "range_sum": true, + "range_trim_spikes": true, "remove_resets": true, "round": true, "running_avg": true, diff --git a/vendor/modules.txt b/vendor/modules.txt index dfb5e08c6..4bc16ef08 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -69,7 +69,7 @@ github.com/VictoriaMetrics/fasthttp/stackless # github.com/VictoriaMetrics/metrics v1.23.0 ## explicit; go 1.15 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.49.1 +# github.com/VictoriaMetrics/metricsql v0.50.0 ## explicit; go 1.13 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop