app/vmselect/promql: add stale_samples_over_time() function

This commit is contained in:
Aliaksandr Valialkin 2022-01-14 01:48:04 +02:00
parent 96707223db
commit f41846d002
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
9 changed files with 200 additions and 178 deletions

View file

@ -910,9 +910,11 @@ func mulNoOverflow(a, b int64) int64 {
}
func dropStaleNaNs(funcName string, values []float64, timestamps []int64) ([]float64, []int64) {
if *noStaleMarkers || funcName == "default_rollup" {
if *noStaleMarkers || funcName == "default_rollup" || funcName == "stale_samples_over_time" {
// Do not drop Prometheus staleness marks (aka stale NaNs) for default_rollup() function,
// since it uses them for Prometheus-style staleness detection.
// Do not drop staleness marks for stale_samples_over_time() function, since it needs
// to calculate the number of staleness markers.
return values, timestamps
}
// Remove Prometheus staleness marks, so non-default rollup functions don't hit NaN values.

View file

@ -77,6 +77,7 @@ var rollupFuncs = map[string]newRollupFunc{
"scrape_interval": newRollupFuncOneArg(rollupScrapeInterval),
"share_gt_over_time": newRollupShareGT,
"share_le_over_time": newRollupShareLE,
"stale_samples_over_time": newRollupFuncOneArg(rollupStaleSamples),
"stddev_over_time": newRollupFuncOneArg(rollupStddev),
"stdvar_over_time": newRollupFuncOneArg(rollupStdvar),
"sum_over_time": newRollupFuncOneArg(rollupSum),
@ -128,6 +129,7 @@ var rollupAggrFuncs = map[string]rollupFunc{
"rate_over_sum": rollupRateOverSum,
"resets": rollupResets,
"scrape_interval": rollupScrapeInterval,
"stale_samples_over_time": rollupStaleSamples,
"stddev_over_time": rollupStddev,
"stdvar_over_time": rollupStdvar,
"sum_over_time": rollupSum,
@ -1373,6 +1375,20 @@ func rollupCount(rfa *rollupFuncArg) float64 {
return float64(len(values))
}
func rollupStaleSamples(rfa *rollupFuncArg) float64 {
values := rfa.values
if len(values) == 0 {
return nan
}
n := 0
for _, v := range rfa.values {
if decimal.IsStaleNaN(v) {
n++
}
}
return float64(n)
}
func rollupStddev(rfa *rollupFuncArg) float64 {
stdvar := rollupStdvar(rfa)
return math.Sqrt(stdvar)

View file

@ -513,6 +513,7 @@ func TestRollupNewRollupFuncSuccess(t *testing.T) {
f("sum2_over_time", 37951)
f("geomean_over_time", 39.33466603189148)
f("count_over_time", 12)
f("stale_samples_over_time", 0)
f("stddev_over_time", 30.752935722554287)
f("stdvar_over_time", 945.7430555555555)
f("first_over_time", 123)

View file

@ -28,6 +28,7 @@ sort: 15
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): expose `vmalert_remotewrite_total` metric at `/metrics` page. This makes possible calculating SLOs for error rate during writing recording rules and alert state to `-remoteWrite.url` with the query `vmalert_remotewrite_errors_total / vmalert_remotewrite_total`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2040). Thanks to @afoninsky .
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `stripPort` template function in the same way as [Prometheus does](https://github.com/prometheus/prometheus/pull/10002).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `parseDuration` template function in the same way as [Prometheus does](https://github.com/prometheus/prometheus/pull/8817).
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `stale_samples_over_time(m[d])` function for calculating the number of [staleness marks](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) for time series `m` over the duration `d`. This function may be useful for detecting flapping metrics at scrape targets, which periodically disappear and then appear again.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that `vmagent` replicas scrape the same targets at different time offsets when [replication is enabled in vmagent clustering mode](https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets). This guarantees that the [deduplication](https://docs.victoriametrics.com/#deduplication) consistently leaves samples from the same `vmagent` replica.
* BUGFIX: return the proper response stub from `/api/v1/query_exemplars` handler, which is needed for Grafana v8+. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1999).

View file

@ -311,6 +311,10 @@ See also [implicit query conversions](#implicit-query-conversions).
`share_le_over_time(series_selector[d], le)` returns share (in the range `[0...1]`) of raw samples on the given lookbehind window `d`, which are smaller or equal to `le`. It is calculated independently per each time series returned from the given [series_selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors). Metric names are stripped from the resulting rollups. Useful for calculating SLI and SLO. Example: `share_le_over_time(memory_usage_bytes[24h], 100*1024*1024)` returns the share of time series values for the last 24 hours when memory usage was below or equal to 100MB. See also [share_gt_over_time](#share_gt_over_time).
#### stale_samples_over_time
`stale_samples_over_time(series_selector[d])` calculates the number of [staleness markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) on the given lookbehind window `d` per each time series matching the given [series_selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors). Metric names are stripped from the resulting rollups.
#### stddev_over_time
`stddev_over_time(series_selector[d])` calculates standard deviation over raw samples on the given lookbehind window `d` per each time series returned from the given [series_selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors). Metric names are stripped from the resulting rollups. This function is supported by PromQL. See also [stdvar_over_time](#stdvar_over_time).

2
go.mod
View file

@ -10,7 +10,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.18.1
github.com/VictoriaMetrics/metricsql v0.35.0
github.com/VictoriaMetrics/metricsql v0.36.1
github.com/aws/aws-sdk-go v1.42.31
github.com/cespare/xxhash/v2 v2.1.2
github.com/cheggaaa/pb/v3 v3.0.8

7
go.sum
View file

@ -112,11 +112,10 @@ github.com/VictoriaMetrics/fastcache v1.8.0 h1:ybZqS7kRy8YVzYsI09GLzQhs7iqS6cOEH
github.com/VictoriaMetrics/fastcache v1.8.0/go.mod h1:n7Sl+ioh/HlWeYHLSIBIE8TcZFHg/+xgvomWSS5xuEE=
github.com/VictoriaMetrics/fasthttp v1.1.0 h1:3crd4YWHsMwu60GUXRH6OstowiFvqrwS4a/ueoLdLL0=
github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR2uydjiWvoLp5ZTqQ=
github.com/VictoriaMetrics/metrics v1.12.2/go.mod h1:Z1tSfPfngDn12bTfZSCqArT3OPY3u88J12hSoOhuiRE=
github.com/VictoriaMetrics/metrics v1.18.1 h1:OZ0+kTTto8oPfHnVAnTOoyl0XlRhRkoQrD2n2cOuRw0=
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metricsql v0.35.0 h1:jK/+UhPb5o2OESZcqifp9hl0rURPgCwY6coy8OlbIss=
github.com/VictoriaMetrics/metricsql v0.35.0/go.mod h1:ylO7YITho/Iw6P71oEaGyHbO94bGoGtzWfLGqFhMIg8=
github.com/VictoriaMetrics/metricsql v0.36.1 h1:pT1OAt7AFiNEj8rmXCqtggkA7XGQJNi4vafaw7JcVD4=
github.com/VictoriaMetrics/metricsql v0.36.1/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
@ -963,7 +962,6 @@ github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyC
github.com/valyala/fasthttp v1.30.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus=
github.com/valyala/fastjson v1.6.3 h1:tAKFnnwmeMGPbwJ7IwxcTPCNr3uIzoIj3/Fh90ra4xc=
github.com/valyala/fastjson v1.6.3/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY=
github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/fastrand v1.1.0 h1:f+5HkLW4rsgzdNoleUOB69hyT9IlD2ZQh9GyDMfb5G8=
github.com/valyala/fastrand v1.1.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
@ -971,7 +969,6 @@ github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52
github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/valyala/gozstd v1.15.1 h1:hpmJYWOpNs0rjDOMdbwWZplfNlfh1tOy0v7XiR9+iGQ=
github.com/valyala/gozstd v1.15.1/go.mod h1:y5Ew47GLlP37EkTB+B4s7r6A5rdaeB7ftbl9zoYiIPQ=
github.com/valyala/histogram v1.1.2/go.mod h1:CZAr6gK9dbD7hYx2s8WSPh0p5x5wETjC+2b3PJVtEdg=
github.com/valyala/histogram v1.2.0 h1:wyYGAZZt3CpwUiIb9AU/Zbllg1llXyrtApRS815OLoQ=
github.com/valyala/histogram v1.2.0/go.mod h1:Hb4kBwb4UxsaNbbbh+RRz8ZR6pdodR57tzWUS3BUzXY=
github.com/valyala/quicktemplate v1.7.0 h1:LUPTJmlVcb46OOUY3IeD9DojFpAVbsG+5WFTcjMJzCM=

View file

@ -63,6 +63,7 @@ var rollupFuncs = map[string]bool{
"scrape_interval": true,
"share_gt_over_time": true,
"share_le_over_time": true,
"stale_samples_over_time": true,
"stddev_over_time": true,
"stdvar_over_time": true,
"sum_over_time": true,

2
vendor/modules.txt vendored
View file

@ -26,7 +26,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.18.1
## explicit; go 1.12
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.35.0
# github.com/VictoriaMetrics/metricsql v0.36.1
## explicit; go 1.13
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop