mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
app/vmselect/promql: add range_linear_regression(q)
function for calculating simple linear regression for the selected time series on the selected time range
This commit is contained in:
parent
5955d23232
commit
c1a3192d8b
10 changed files with 98 additions and 25 deletions
|
@ -6905,6 +6905,51 @@ func TestExecSuccess(t *testing.T) {
|
||||||
resultExpected := []netstorage.Result{r}
|
resultExpected := []netstorage.Result{r}
|
||||||
f(q, resultExpected)
|
f(q, resultExpected)
|
||||||
})
|
})
|
||||||
|
t.Run(`range_linear_regression(time())`, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
q := `range_linear_regression(time())`
|
||||||
|
r := netstorage.Result{
|
||||||
|
MetricName: metricNameExpected,
|
||||||
|
Values: []float64{1000, 1200, 1400, 1600, 1800, 2000},
|
||||||
|
Timestamps: timestampsExpected,
|
||||||
|
}
|
||||||
|
resultExpected := []netstorage.Result{r}
|
||||||
|
f(q, resultExpected)
|
||||||
|
})
|
||||||
|
t.Run(`range_linear_regression(-time())`, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
q := `range_linear_regression(-time())`
|
||||||
|
r := netstorage.Result{
|
||||||
|
MetricName: metricNameExpected,
|
||||||
|
Values: []float64{-1000, -1200, -1400, -1600, -1800, -2000},
|
||||||
|
Timestamps: timestampsExpected,
|
||||||
|
}
|
||||||
|
resultExpected := []netstorage.Result{r}
|
||||||
|
f(q, resultExpected)
|
||||||
|
})
|
||||||
|
t.Run(`range_linear_regression(100/time())`, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
q := `sort_desc(round((
|
||||||
|
alias(range_linear_regression(100/time()), "regress"),
|
||||||
|
alias(100/time(), "orig"),
|
||||||
|
),
|
||||||
|
0.001
|
||||||
|
))`
|
||||||
|
r1 := netstorage.Result{
|
||||||
|
MetricName: metricNameExpected,
|
||||||
|
Values: []float64{0.1, 0.083, 0.071, 0.062, 0.056, 0.05},
|
||||||
|
Timestamps: timestampsExpected,
|
||||||
|
}
|
||||||
|
r1.MetricName.MetricGroup = []byte("orig")
|
||||||
|
r2 := netstorage.Result{
|
||||||
|
MetricName: metricNameExpected,
|
||||||
|
Values: []float64{0.095, 0.085, 0.075, 0.066, 0.056, 0.046},
|
||||||
|
Timestamps: timestampsExpected,
|
||||||
|
}
|
||||||
|
r2.MetricName.MetricGroup = []byte("regress")
|
||||||
|
resultExpected := []netstorage.Result{r1, r2}
|
||||||
|
f(q, resultExpected)
|
||||||
|
})
|
||||||
t.Run(`deriv(N)`, func(t *testing.T) {
|
t.Run(`deriv(N)`, func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
q := `deriv(1000)`
|
q := `deriv(1000)`
|
||||||
|
@ -8097,6 +8142,7 @@ func TestExecError(t *testing.T) {
|
||||||
f(`range_sum(1, 2)`)
|
f(`range_sum(1, 2)`)
|
||||||
f(`range_first(1, 2)`)
|
f(`range_first(1, 2)`)
|
||||||
f(`range_last(1, 2)`)
|
f(`range_last(1, 2)`)
|
||||||
|
f(`range_linear_regression(1, 2)`)
|
||||||
f(`smooth_exponential()`)
|
f(`smooth_exponential()`)
|
||||||
f(`smooth_exponential(1)`)
|
f(`smooth_exponential(1)`)
|
||||||
f(`remove_resets()`)
|
f(`remove_resets()`)
|
||||||
|
|
|
@ -894,7 +894,7 @@ func newRollupPredictLinear(args []interface{}) (rollupFunc, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
rf := func(rfa *rollupFuncArg) float64 {
|
rf := func(rfa *rollupFuncArg) float64 {
|
||||||
v, k := linearRegression(rfa)
|
v, k := linearRegression(rfa.values, rfa.timestamps, rfa.currTimestamp)
|
||||||
if math.IsNaN(v) {
|
if math.IsNaN(v) {
|
||||||
return nan
|
return nan
|
||||||
}
|
}
|
||||||
|
@ -904,13 +904,8 @@ func newRollupPredictLinear(args []interface{}) (rollupFunc, error) {
|
||||||
return rf, nil
|
return rf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func linearRegression(rfa *rollupFuncArg) (float64, float64) {
|
func linearRegression(values []float64, timestamps []int64, interceptTime int64) (float64, float64) {
|
||||||
// There is no need in handling NaNs here, since they must be cleaned up
|
if len(values) == 0 {
|
||||||
// before calling rollup funcs.
|
|
||||||
values := rfa.values
|
|
||||||
timestamps := rfa.timestamps
|
|
||||||
n := float64(len(values))
|
|
||||||
if n == 0 {
|
|
||||||
return nan, nan
|
return nan, nan
|
||||||
}
|
}
|
||||||
if areConstValues(values) {
|
if areConstValues(values) {
|
||||||
|
@ -918,25 +913,32 @@ func linearRegression(rfa *rollupFuncArg) (float64, float64) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example
|
// See https://en.wikipedia.org/wiki/Simple_linear_regression#Numerical_example
|
||||||
interceptTime := rfa.currTimestamp
|
|
||||||
vSum := float64(0)
|
vSum := float64(0)
|
||||||
tSum := float64(0)
|
tSum := float64(0)
|
||||||
tvSum := float64(0)
|
tvSum := float64(0)
|
||||||
ttSum := float64(0)
|
ttSum := float64(0)
|
||||||
|
n := 0
|
||||||
for i, v := range values {
|
for i, v := range values {
|
||||||
|
if math.IsNaN(v) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
dt := float64(timestamps[i]-interceptTime) / 1e3
|
dt := float64(timestamps[i]-interceptTime) / 1e3
|
||||||
vSum += v
|
vSum += v
|
||||||
tSum += dt
|
tSum += dt
|
||||||
tvSum += dt * v
|
tvSum += dt * v
|
||||||
ttSum += dt * dt
|
ttSum += dt * dt
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
if n == 0 {
|
||||||
|
return nan, nan
|
||||||
}
|
}
|
||||||
k := float64(0)
|
k := float64(0)
|
||||||
tDiff := ttSum - tSum*tSum/n
|
tDiff := ttSum - tSum*tSum/float64(n)
|
||||||
if math.Abs(tDiff) >= 1e-6 {
|
if math.Abs(tDiff) >= 1e-6 {
|
||||||
// Prevent from incorrect division for too small tDiff values.
|
// Prevent from incorrect division for too small tDiff values.
|
||||||
k = (tvSum - tSum*vSum/n) / tDiff
|
k = (tvSum - tSum*vSum/float64(n)) / tDiff
|
||||||
}
|
}
|
||||||
v := vSum/n - k*tSum/n
|
v := vSum/float64(n) - k*tSum/float64(n)
|
||||||
return v, k
|
return v, k
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1605,7 +1607,7 @@ func rollupIdelta(rfa *rollupFuncArg) float64 {
|
||||||
func rollupDerivSlow(rfa *rollupFuncArg) float64 {
|
func rollupDerivSlow(rfa *rollupFuncArg) float64 {
|
||||||
// Use linear regression like Prometheus does.
|
// Use linear regression like Prometheus does.
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/73
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/73
|
||||||
_, k := linearRegression(rfa)
|
_, k := linearRegression(rfa.values, rfa.timestamps, rfa.currTimestamp)
|
||||||
return k
|
return k
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -388,12 +388,7 @@ func TestRollupPredictLinear(t *testing.T) {
|
||||||
func TestLinearRegression(t *testing.T) {
|
func TestLinearRegression(t *testing.T) {
|
||||||
f := func(values []float64, timestamps []int64, expV, expK float64) {
|
f := func(values []float64, timestamps []int64, expV, expK float64) {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
rfa := &rollupFuncArg{
|
v, k := linearRegression(values, timestamps, timestamps[0] + 100)
|
||||||
values: values,
|
|
||||||
timestamps: timestamps,
|
|
||||||
currTimestamp: timestamps[0] + 100,
|
|
||||||
}
|
|
||||||
v, k := linearRegression(rfa)
|
|
||||||
if err := compareValues([]float64{v}, []float64{expV}); err != nil {
|
if err := compareValues([]float64{v}, []float64{expV}); err != nil {
|
||||||
t.Fatalf("unexpected v err: %s", err)
|
t.Fatalf("unexpected v err: %s", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,7 @@ var transformFuncs = map[string]transformFunc{
|
||||||
"range_avg": newTransformFuncRange(runningAvg),
|
"range_avg": newTransformFuncRange(runningAvg),
|
||||||
"range_first": transformRangeFirst,
|
"range_first": transformRangeFirst,
|
||||||
"range_last": transformRangeLast,
|
"range_last": transformRangeLast,
|
||||||
|
"range_linear_regression": transformRangeLinearRegression,
|
||||||
"range_max": newTransformFuncRange(runningMax),
|
"range_max": newTransformFuncRange(runningMax),
|
||||||
"range_min": newTransformFuncRange(runningMin),
|
"range_min": newTransformFuncRange(runningMin),
|
||||||
"range_quantile": transformRangeQuantile,
|
"range_quantile": transformRangeQuantile,
|
||||||
|
@ -136,6 +137,7 @@ var transformFuncsKeepMetricName = map[string]bool{
|
||||||
"range_avg": true,
|
"range_avg": true,
|
||||||
"range_first": true,
|
"range_first": true,
|
||||||
"range_last": true,
|
"range_last": true,
|
||||||
|
"range_linear_regression": true,
|
||||||
"range_max": true,
|
"range_max": true,
|
||||||
"range_min": true,
|
"range_min": true,
|
||||||
"range_quantile": true,
|
"range_quantile": true,
|
||||||
|
@ -1234,6 +1236,27 @@ func newTransformFuncRange(rf func(a, b float64, idx int) float64) transformFunc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func transformRangeLinearRegression(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||||
|
args := tfa.args
|
||||||
|
if err := expectTransformArgsNum(args, 1); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
rvs := args[0]
|
||||||
|
for _, ts := range rvs {
|
||||||
|
values := ts.Values
|
||||||
|
timestamps := ts.Timestamps
|
||||||
|
if len(timestamps) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
interceptTimestamp := timestamps[0]
|
||||||
|
v, k := linearRegression(values, timestamps, interceptTimestamp)
|
||||||
|
for i, t := range timestamps {
|
||||||
|
values[i] = v + k*float64(t-interceptTimestamp)/1e3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rvs, nil
|
||||||
|
}
|
||||||
|
|
||||||
func transformRangeQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
|
func transformRangeQuantile(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||||
args := tfa.args
|
args := tfa.args
|
||||||
if err := expectTransformArgsNum(args, 2); err != nil {
|
if err := expectTransformArgsNum(args, 2); err != nil {
|
||||||
|
|
|
@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
||||||
|
|
||||||
## tip
|
## tip
|
||||||
|
|
||||||
|
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [range_linear_regression](https://docs.victoriametrics.com/MetricsQL.html#range_linear_regression) function for calculating [simple linear regression](https://en.wikipedia.org/wiki/Simple_linear_regression) over the input time series on the selected time range. This function is useful for predictions and capacity planning. For example, `range_linear_regression(process_resident_memory_bytes)` can predict future memory usage based on the past memory usage.
|
||||||
|
|
||||||
## [v1.83.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.83.1)
|
## [v1.83.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.83.1)
|
||||||
|
|
||||||
|
|
|
@ -395,7 +395,7 @@ over the given lookbehind window `d` using the given smoothing factor `sf` and t
|
||||||
Both `sf` and `tf` must be in the range `[0...1]`. It is expected that the [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering)
|
Both `sf` and `tf` must be in the range `[0...1]`. It is expected that the [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering)
|
||||||
returns time series of [gauge type](https://docs.victoriametrics.com/keyConcepts.html#gauge).
|
returns time series of [gauge type](https://docs.victoriametrics.com/keyConcepts.html#gauge).
|
||||||
|
|
||||||
This function is supported by PromQL.
|
This function is supported by PromQL. See also [range_linear_regression](#range_linear_regression).
|
||||||
|
|
||||||
#### idelta
|
#### idelta
|
||||||
|
|
||||||
|
@ -533,7 +533,7 @@ from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.ht
|
||||||
linear interpolation over raw samples on the given lookbehind window `d`. The predicted value is calculated individually per each time series
|
linear interpolation over raw samples on the given lookbehind window `d`. The predicted value is calculated individually per each time series
|
||||||
returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
|
returned from the given [series_selector](https://docs.victoriametrics.com/keyConcepts.html#filtering).
|
||||||
|
|
||||||
This function is supported by PromQL.
|
This function is supported by PromQL. See also [range_linear_regression](#range_linear_regression).
|
||||||
|
|
||||||
#### present_over_time
|
#### present_over_time
|
||||||
|
|
||||||
|
@ -1203,6 +1203,11 @@ See also [rand](#rand) and [rand_exponential](#rand_exponential).
|
||||||
|
|
||||||
`range_last(q)` is a [transform function](#transform-functions), which returns the value for the last point per each time series returned by `q`.
|
`range_last(q)` is a [transform function](#transform-functions), which returns the value for the last point per each time series returned by `q`.
|
||||||
|
|
||||||
|
#### range_linear_regression
|
||||||
|
|
||||||
|
`range_linear_regression(q)` is a [transform function](#transform-functions), which calculates [simple linear regression](https://en.wikipedia.org/wiki/Simple_linear_regression)
|
||||||
|
over the selected time range per each time series returned by `q`. This function is useful for capacity planning and predictions.
|
||||||
|
|
||||||
#### range_max
|
#### range_max
|
||||||
|
|
||||||
`range_max(q)` is a [transform function](#transform-functions), which calculates the max value across points per each time series returned by `q`.
|
`range_max(q)` is a [transform function](#transform-functions), which calculates the max value across points per each time series returned by `q`.
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -12,7 +12,7 @@ require (
|
||||||
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
|
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
|
||||||
github.com/VictoriaMetrics/fasthttp v1.1.0
|
github.com/VictoriaMetrics/fasthttp v1.1.0
|
||||||
github.com/VictoriaMetrics/metrics v1.23.0
|
github.com/VictoriaMetrics/metrics v1.23.0
|
||||||
github.com/VictoriaMetrics/metricsql v0.45.0
|
github.com/VictoriaMetrics/metricsql v0.46.0
|
||||||
github.com/aws/aws-sdk-go-v2 v1.17.1
|
github.com/aws/aws-sdk-go-v2 v1.17.1
|
||||||
github.com/aws/aws-sdk-go-v2/config v1.17.10
|
github.com/aws/aws-sdk-go-v2/config v1.17.10
|
||||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.37
|
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.37
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -100,8 +100,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR
|
||||||
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
|
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
|
||||||
github.com/VictoriaMetrics/metrics v1.23.0 h1:WzfqyzCaxUZip+OBbg1+lV33WChDSu4ssYII3nxtpeA=
|
github.com/VictoriaMetrics/metrics v1.23.0 h1:WzfqyzCaxUZip+OBbg1+lV33WChDSu4ssYII3nxtpeA=
|
||||||
github.com/VictoriaMetrics/metrics v1.23.0/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc=
|
github.com/VictoriaMetrics/metrics v1.23.0/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc=
|
||||||
github.com/VictoriaMetrics/metricsql v0.45.0 h1:kVQHnkDJm4qyJ8f5msTclmwqAtlUdPbbEJ7zoa/FTNs=
|
github.com/VictoriaMetrics/metricsql v0.46.0 h1:UeY+3vykSflhShmBmMemYvDVlqISraiCc8uMtyAc+PI=
|
||||||
github.com/VictoriaMetrics/metricsql v0.45.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
|
github.com/VictoriaMetrics/metricsql v0.46.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
|
||||||
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
|
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
|
||||||
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
||||||
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
|
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
|
||||||
|
|
1
vendor/github.com/VictoriaMetrics/metricsql/transform.go
generated
vendored
1
vendor/github.com/VictoriaMetrics/metricsql/transform.go
generated
vendored
|
@ -73,6 +73,7 @@ var transformFuncs = map[string]bool{
|
||||||
"range_avg": true,
|
"range_avg": true,
|
||||||
"range_first": true,
|
"range_first": true,
|
||||||
"range_last": true,
|
"range_last": true,
|
||||||
|
"range_linear_regression": true,
|
||||||
"range_max": true,
|
"range_max": true,
|
||||||
"range_min": true,
|
"range_min": true,
|
||||||
"range_quantile": true,
|
"range_quantile": true,
|
||||||
|
|
2
vendor/modules.txt
vendored
2
vendor/modules.txt
vendored
|
@ -69,7 +69,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
|
||||||
# github.com/VictoriaMetrics/metrics v1.23.0
|
# github.com/VictoriaMetrics/metrics v1.23.0
|
||||||
## explicit; go 1.15
|
## explicit; go 1.15
|
||||||
github.com/VictoriaMetrics/metrics
|
github.com/VictoriaMetrics/metrics
|
||||||
# github.com/VictoriaMetrics/metricsql v0.45.0
|
# github.com/VictoriaMetrics/metricsql v0.46.0
|
||||||
## explicit; go 1.13
|
## explicit; go 1.13
|
||||||
github.com/VictoriaMetrics/metricsql
|
github.com/VictoriaMetrics/metricsql
|
||||||
github.com/VictoriaMetrics/metricsql/binaryop
|
github.com/VictoriaMetrics/metricsql/binaryop
|
||||||
|
|
Loading…
Reference in a new issue