app/vmselect/promql: add range_trim_spikes(phi, q) function for trimming phi percent of largest spikes per each time series returned by q

This commit is contained in:
Aliaksandr Valialkin 2022-12-05 21:55:01 -08:00
parent d99d222f0a
commit 5eae9a9914
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
9 changed files with 74 additions and 6 deletions

View file

@ -6385,6 +6385,17 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`range_trim_spikes()`, func(t *testing.T) {
t.Parallel()
q := `range_trim_spikes(0.2, time())`
r := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{nan, 1200, 1400, 1600, 1800, nan},
Timestamps: timestampsExpected,
}
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`range_quantile(0.5)`, func(t *testing.T) {
t.Parallel()
q := `range_quantile(0.5, time())`
@ -8189,6 +8200,7 @@ func TestExecError(t *testing.T) {
f(`step(1)`)
f(`running_sum(1, 2)`)
f(`range_sum(1, 2)`)
f(`range_trim_spikes()`)
f(`range_first(1, 2)`)
f(`range_last(1, 2)`)
f(`range_linear_regression(1, 2)`)

View file

@ -96,6 +96,7 @@ var transformFuncs = map[string]transformFunc{
"range_stddev": transformRangeStddev,
"range_stdvar": transformRangeStdvar,
"range_sum": newTransformFuncRange(runningSum),
"range_trim_spikes": transformRangeTrimSpikes,
"remove_resets": transformRemoveResets,
"round": transformRound,
"running_avg": newTransformFuncRunning(runningAvg),
@ -1274,6 +1275,54 @@ func transformRangeNormalize(tfa *transformFuncArg) ([]*timeseries, error) {
return rvs, nil
}
func transformRangeTrimSpikes(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 2); err != nil {
return nil, err
}
phis, err := getScalar(args[0], 0)
if err != nil {
return nil, err
}
phi := float64(0)
if len(phis) > 0 {
phi = phis[0]
}
// Trim 100% * (phi / 2) samples with the lowest / highest values per each time series
phi /= 2
phiUpper := 1 - phi
phiLower := phi
rvs := args[1]
a := getFloat64s()
values := a.A[:0]
for _, ts := range rvs {
values := values[:0]
originValues := ts.Values
for _, v := range originValues {
if math.IsNaN(v) {
continue
}
values = append(values, v)
}
sort.Float64s(values)
vMax := quantileSorted(phiUpper, values)
vMin := quantileSorted(phiLower, values)
for i, v := range originValues {
if math.IsNaN(v) {
continue
}
if v > vMax {
originValues[i] = nan
} else if v < vMin {
originValues[i] = nan
}
}
}
a.A = values
putFloat64s(a)
return rvs, nil
}
func transformRangeLinearRegression(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {

View file

@ -48,9 +48,10 @@ The following tip changes can be tested by building VictoriaMetrics components f
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): improve [service discovery](https://docs.victoriametrics.com/sd_configs.html) performance when discovering big number of targets (10K and more).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `exported_` prefix to metric names exported by scrape targets if these metric names clash with [automatically generated metrics](https://docs.victoriametrics.com/vmagent.html#automatically-generated-metrics) such as `up`, `scrape_samples_scraped`, etc. This prevents from corruption of automatically generated metrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3406).
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): improve error message when the requested path cannot be properly parsed, so users could identify the issue and properly fix the path. Now the error message links to [url format docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3402).
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add ability to copy data from sources via Prometheus `remote_read` protocol. See [these docs](https://docs.victoriametrics.com/vmctl.html#migrating-data-by-remote-read-protocol). The related issues: [one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3132) and [two](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1101).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `-remoteWrite.sendTimeout` command-line flag, which allows configuring timeout for sending data to `-remoteWrite.url`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3408).
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add ability to migrate data between VictoriaMetrics clusters with automatic tenants discovery. See [these docs](https://docs.victoriametrics.com/vmctl.html#cluster-to-cluster-migration-mode) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2930)
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): add ability to copy data from sources via Prometheus `remote_read` protocol. See [these docs](https://docs.victoriametrics.com/vmctl.html#migrating-data-by-remote-read-protocol). The related issues: [one](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3132) and [two](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1101).
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_spikes(phi, q)` function for trimming `phi` percent of the largest spikes per each time series returned by `q`. See [these docs](https://docs.victoriametrics.com/MetricsQL.html#range_trim_spikes).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly pass HTTP headers during the alert state restore procedure. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3418).
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly specify rule evaluation step during the [replay mode](https://docs.victoriametrics.com/vmalert.html#rules-backfilling). The `step` value was previously overriden by `-datasource.queryStep` command-line flag.

View file

@ -1247,6 +1247,11 @@ per each time series returned by `q` on the selected time range.
`range_sum(q)` is a [transform function](#transform-functions), which calculates the sum of points per each time series returned by `q`.
#### range_trim_spikes
`range_trim_spikes(phi, q)` is a [transform function](#transform-functions), which drops `phi` percent of biggest spikes from time series returned by `q`.
The `phi` must be in the range `[0..1]`, where `0` means `0%` and `1` means `100%`.
#### remove_resets
`remove_resets(q)` is a [transform function](#transform-functions), which removes counter resets from time series returned by `q`.

2
go.mod
View file

@ -12,7 +12,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.23.0
github.com/VictoriaMetrics/metricsql v0.49.1
github.com/VictoriaMetrics/metricsql v0.50.0
github.com/aws/aws-sdk-go-v2 v1.17.2
github.com/aws/aws-sdk-go-v2/config v1.18.4
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.43

4
go.sum
View file

@ -71,8 +71,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metrics v1.23.0 h1:WzfqyzCaxUZip+OBbg1+lV33WChDSu4ssYII3nxtpeA=
github.com/VictoriaMetrics/metrics v1.23.0/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc=
github.com/VictoriaMetrics/metricsql v0.49.1 h1:9JAbpiZhlQnylclcf5xNtYRaBd5dr2CTPQ85RIoruuk=
github.com/VictoriaMetrics/metricsql v0.49.1/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VictoriaMetrics/metricsql v0.50.0 h1:MCBhjn1qlfMqPGP6HiR9JgmEw7oTRGm/O8YwSeoaI1E=
github.com/VictoriaMetrics/metricsql v0.50.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=

View file

@ -392,7 +392,7 @@ func getTransformArgIdxForOptimization(funcName string, args []Expr) int {
return -1
case "limit_offset":
return 2
case "buckets_limit", "histogram_quantile", "histogram_share", "range_quantile":
case "buckets_limit", "histogram_quantile", "histogram_share", "range_quantile", "range_trim_spikes":
return 1
case "histogram_quantiles":
return len(args) - 1

View file

@ -81,6 +81,7 @@ var transformFuncs = map[string]bool{
"range_stddev": true,
"range_stdvar": true,
"range_sum": true,
"range_trim_spikes": true,
"remove_resets": true,
"round": true,
"running_avg": true,

2
vendor/modules.txt vendored
View file

@ -69,7 +69,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.23.0
## explicit; go 1.15
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.49.1
# github.com/VictoriaMetrics/metricsql v0.50.0
## explicit; go 1.13
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop