mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
app/{vmagent,vminsert}: add support for streaming aggregation
See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460
This commit is contained in:
parent
add2c4bf07
commit
fa13bbc48a
29 changed files with 3142 additions and 22 deletions
|
@ -82,6 +82,7 @@ VictoriaMetrics has the following prominent features:
|
|||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
* [Native binary format](#how-to-import-data-in-native-format).
|
||||
* [DataDog agent or DogStatsD](#how-to-send-data-from-datadog-agent).
|
||||
* It supports powerful [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html), which can be used as a [statsd](https://github.com/statsd/statsd) alternative.
|
||||
* It supports metrics [relabeling](#relabeling).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and
|
||||
[high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
|
|
|
@ -24,8 +24,8 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
|
|||
see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* Can add, remove and modify labels (aka tags) via Prometheus relabeling. Can filter data before sending it to remote storage. See [these docs](#relabeling) for details.
|
||||
* Can accept data via all the ingestion protocols supported by VictoriaMetrics - see [these docs](#how-to-push-data-to-vmagent).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems -
|
||||
see [these docs](#replication-and-high-availability).
|
||||
* Can aggregate incoming samples by time and by labels before sending them to remote storage - see [these docs](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems - see [these docs](#replication-and-high-availability).
|
||||
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as the connection
|
||||
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
@ -126,6 +126,12 @@ If you use Prometheus only for scraping metrics from various targets and forward
|
|||
then `vmagent` can replace Prometheus. Typically, `vmagent` requires lower amounts of RAM, CPU and network bandwidth compared with Prometheus.
|
||||
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
|
||||
|
||||
### Statsd alternative
|
||||
|
||||
`vmagent` can be used as an alternative to [statsd](https://github.com/statsd/statsd)
|
||||
when [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) is enabled.
|
||||
See [these docs](https://docs.victoriametrics.com/stream-aggregation.html#statsd-alternative) for details.
|
||||
|
||||
### Flexible metrics relay
|
||||
|
||||
`vmagent` can accept metrics in [various popular data ingestion protocols](#how-to-push-data-to-vmagent), apply [relabeling](#relabeling)
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
@ -58,6 +59,13 @@ var (
|
|||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
|
||||
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter")
|
||||
|
||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html ."+
|
||||
"See also -remoteWrite.streamAggr.keepInput")
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples after the aggregation with -remoteWrite.streamAggr.config ."+
|
||||
"By default the input is dropped after the aggregation, so only the aggregate data is sent to the -remoteWrite.url. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -140,6 +148,7 @@ func Init() {
|
|||
logger.Fatalf("cannot load relabel configs: %s", err)
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
|
||||
configSuccess.Set(1)
|
||||
configTimestamp.Set(fasttime.UnixTimestamp())
|
||||
|
||||
|
@ -435,9 +444,13 @@ var (
|
|||
)
|
||||
|
||||
type remoteWriteCtx struct {
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
|
||||
sas *streamaggr.Aggregators
|
||||
streamAggrKeepInput bool
|
||||
|
||||
pss []*pendingSeries
|
||||
pssNextIdx uint64
|
||||
|
||||
|
@ -469,6 +482,7 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
|||
}
|
||||
c.init(argIdx, *queues, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
sf := significantFigures.GetOptionalArgOrDefault(argIdx, 0)
|
||||
rd := roundDigits.GetOptionalArgOrDefault(argIdx, 100)
|
||||
pssLen := *queues
|
||||
|
@ -481,7 +495,8 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
|||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, sf, rd)
|
||||
}
|
||||
return &remoteWriteCtx{
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
|
@ -490,6 +505,19 @@ func newRemoteWriteCtx(argIdx int, at *auth.Token, remoteWriteURL *url.URL, maxI
|
|||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q, url=%q}`, queuePath, sanitizedURL)),
|
||||
}
|
||||
|
||||
// Initialize sas
|
||||
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
|
||||
if sasFile != "" {
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggrFile=%q: %s", sasFile, err)
|
||||
}
|
||||
rwctx.sas = sas
|
||||
rwctx.streamAggrKeepInput = streamAggrKeepInput.GetOptionalArg(argIdx)
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) MustStop() {
|
||||
|
@ -501,6 +529,8 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
|||
rwctx.fq.UnblockAllReaders()
|
||||
rwctx.c.MustStop()
|
||||
rwctx.c = nil
|
||||
rwctx.sas.MustStop()
|
||||
rwctx.sas = nil
|
||||
rwctx.fq.MustClose()
|
||||
rwctx.fq = nil
|
||||
|
||||
|
@ -509,6 +539,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
|||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
// Apply relabeling
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
rcs := allRelabelConfigs.Load().(*relabelConfigs)
|
||||
|
@ -526,11 +557,17 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
|||
rowsCountAfterRelabel := getRowsCount(tss)
|
||||
rwctx.rowsDroppedByRelabel.Add(rowsCountBeforeRelabel - rowsCountAfterRelabel)
|
||||
}
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
rowsCount := getRowsCount(tss)
|
||||
rwctx.rowsPushedAfterRelabel.Add(rowsCount)
|
||||
pss[idx].Push(tss)
|
||||
|
||||
// Apply stream aggregation if any
|
||||
rwctx.sas.Push(tss)
|
||||
if rwctx.sas == nil || rwctx.streamAggrKeepInput {
|
||||
// Push samples to the remote storage
|
||||
rwctx.pushInternal(tss)
|
||||
}
|
||||
|
||||
// Return back relabeling contexts to the pool
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssRelabelPool.Put(v)
|
||||
|
@ -538,6 +575,12 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
|||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
}
|
||||
|
||||
var tssRelabelPool = &sync.Pool{
|
||||
New: func() interface{} {
|
||||
a := []prompbmarshal.TimeSeries{}
|
||||
|
|
|
@ -69,16 +69,17 @@ Then configure `vmalert` accordingly:
|
|||
-external.label=replica=a # Multiple external labels may be set
|
||||
```
|
||||
|
||||
Note there's a separate `remoteWrite.url` to allow writing results of
|
||||
Note there's a separate `-remoteWrite.url` command-line flag to allow writing results of
|
||||
alerting/recording rules into a different storage than the initial data that's
|
||||
queried. This allows using `vmalert` to aggregate data from a short-term,
|
||||
high-frequency, high-cardinality storage into a long-term storage with
|
||||
decreased cardinality and a bigger interval between samples.
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
|
||||
See the full list of configuration flags in [configuration](#configuration) section.
|
||||
|
||||
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
to specify different `-external.label` command-line flags in order to define which `vmalert` generated rules or alerts.
|
||||
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
|
@ -514,8 +515,8 @@ groups:
|
|||
expr: avg_over_time(http_requests[5m])
|
||||
```
|
||||
|
||||
Ability of `vmalert` to be configured with different `datasource.url` and `remoteWrite.url` allows
|
||||
reading data from one data source and backfilling results to another. This helps to build a system
|
||||
Ability of `vmalert` to be configured with different `-datasource.url` and `-remoteWrite.url` command-line flags
|
||||
allows reading data from one data source and backfilling results to another. This helps to build a system
|
||||
for aggregating and downsampling the data.
|
||||
|
||||
The following example shows how to build a topology where `vmalert` will process data from one cluster
|
||||
|
@ -539,7 +540,7 @@ Please note, [replay](#rules-backfilling) feature may be used for transforming h
|
|||
|
||||
Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only recording rules are used.
|
||||
|
||||
See also [downsampling docs](https://docs.victoriametrics.com/#downsampling).
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) and [downsampling](https://docs.victoriametrics.com/#downsampling).
|
||||
|
||||
#### Multiple remote writes
|
||||
|
||||
|
|
|
@ -19,7 +19,10 @@ type InsertCtx struct {
|
|||
mrs []storage.MetricRow
|
||||
metricNamesBuf []byte
|
||||
|
||||
relabelCtx relabel.Ctx
|
||||
relabelCtx relabel.Ctx
|
||||
streamAggrCtx streamAggrCtx
|
||||
|
||||
skipStreamAggr bool
|
||||
}
|
||||
|
||||
// Reset resets ctx for future fill with rowsLen rows.
|
||||
|
@ -42,6 +45,8 @@ func (ctx *InsertCtx) Reset(rowsLen int) {
|
|||
ctx.mrs = ctx.mrs[:0]
|
||||
ctx.metricNamesBuf = ctx.metricNamesBuf[:0]
|
||||
ctx.relabelCtx.Reset()
|
||||
ctx.streamAggrCtx.Reset()
|
||||
ctx.skipStreamAggr = false
|
||||
}
|
||||
|
||||
func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label) []byte {
|
||||
|
@ -132,6 +137,13 @@ func (ctx *InsertCtx) ApplyRelabeling() {
|
|||
|
||||
// FlushBufs flushes buffered rows to the underlying storage.
|
||||
func (ctx *InsertCtx) FlushBufs() error {
|
||||
if sa != nil && !ctx.skipStreamAggr {
|
||||
ctx.streamAggrCtx.push(ctx.mrs)
|
||||
if !*streamAggrKeepInput {
|
||||
ctx.Reset(0)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
err := vmstorage.AddRows(ctx.mrs)
|
||||
ctx.Reset(0)
|
||||
if err == nil {
|
||||
|
|
117
app/vminsert/common/streamaggr.go
Normal file
117
app/vminsert/common/streamaggr.go
Normal file
|
@ -0,0 +1,117 @@
|
|||
package common
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
)
|
||||
|
||||
var (
|
||||
streamAggrConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html ."+
|
||||
"See also -remoteWrite.streamAggr.keepInput")
|
||||
streamAggrKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples after the aggregation with -streamAggr.config ."+
|
||||
"By default the input is dropped after the aggregation, so only the aggregate data is stored. "+
|
||||
"See https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
)
|
||||
|
||||
// InitStreamAggr must be called after flag.Parse and before using the common package.
|
||||
//
|
||||
// MustStopStreamAggr must be called when stream aggr is no longer needed.
|
||||
func InitStreamAggr() {
|
||||
if *streamAggrConfig == "" {
|
||||
// Nothing to initialize
|
||||
return
|
||||
}
|
||||
a, err := streamaggr.LoadFromFile(*streamAggrConfig, pushAggregateSeries)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot load -streamAggr.config=%q: %s", *streamAggrConfig, err)
|
||||
}
|
||||
sa = a
|
||||
}
|
||||
|
||||
// MustStopStreamAggr stops stream aggregators.
|
||||
func MustStopStreamAggr() {
|
||||
sa.MustStop()
|
||||
sa = nil
|
||||
}
|
||||
|
||||
var sa *streamaggr.Aggregators
|
||||
|
||||
type streamAggrCtx struct {
|
||||
mn storage.MetricName
|
||||
tss [1]prompbmarshal.TimeSeries
|
||||
}
|
||||
|
||||
func (ctx *streamAggrCtx) Reset() {
|
||||
ctx.mn.Reset()
|
||||
ts := &ctx.tss[0]
|
||||
promrelabel.CleanLabels(ts.Labels)
|
||||
}
|
||||
|
||||
func (ctx *streamAggrCtx) push(mrs []storage.MetricRow) {
|
||||
mn := &ctx.mn
|
||||
tss := ctx.tss[:]
|
||||
ts := &tss[0]
|
||||
labels := ts.Labels
|
||||
samples := ts.Samples
|
||||
for _, mr := range mrs {
|
||||
if err := mn.UnmarshalRaw(mr.MetricNameRaw); err != nil {
|
||||
logger.Panicf("BUG: cannot unmarshal recently marshaled MetricName: %s", err)
|
||||
}
|
||||
|
||||
labels = append(labels[:0], prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: bytesutil.ToUnsafeString(mn.MetricGroup),
|
||||
})
|
||||
for _, tag := range mn.Tags {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: bytesutil.ToUnsafeString(tag.Key),
|
||||
Value: bytesutil.ToUnsafeString(tag.Value),
|
||||
})
|
||||
}
|
||||
|
||||
samples = append(samples[:0], prompbmarshal.Sample{
|
||||
Timestamp: mr.Timestamp,
|
||||
Value: mr.Value,
|
||||
})
|
||||
|
||||
ts.Labels = labels
|
||||
ts.Samples = samples
|
||||
|
||||
sa.Push(tss)
|
||||
}
|
||||
}
|
||||
|
||||
func pushAggregateSeries(tss []prompbmarshal.TimeSeries) {
|
||||
currentTimestamp := int64(fasttime.UnixTimestamp()) * 1000
|
||||
var ctx InsertCtx
|
||||
ctx.Reset(len(tss))
|
||||
ctx.skipStreamAggr = true
|
||||
for _, ts := range tss {
|
||||
labels := ts.Labels
|
||||
for _, label := range labels {
|
||||
name := label.Name
|
||||
if name == "__name__" {
|
||||
name = ""
|
||||
}
|
||||
ctx.AddLabel(name, label.Value)
|
||||
}
|
||||
value := ts.Samples[0].Value
|
||||
if err := ctx.WriteDataPoint(nil, ctx.Labels, currentTimestamp, value); err != nil {
|
||||
logger.Errorf("cannot store aggregate series: %s", err)
|
||||
// Do not continue pushing the remaining samples, since it is likely they will return the same error.
|
||||
return
|
||||
}
|
||||
}
|
||||
if err := vmstorage.AddRows(ctx.mrs); err != nil {
|
||||
logger.Errorf("cannot flush aggregate series: %s", err)
|
||||
}
|
||||
}
|
|
@ -9,6 +9,7 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
vminsertCommon "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
|
||||
|
@ -66,6 +67,7 @@ var staticServer = http.FileServer(http.FS(staticFiles))
|
|||
// Init initializes vminsert.
|
||||
func Init() {
|
||||
relabel.Init()
|
||||
vminsertCommon.InitStreamAggr()
|
||||
storage.SetMaxLabelsPerTimeseries(*maxLabelsPerTimeseries)
|
||||
storage.SetMaxLabelValueLen(*maxLabelValueLen)
|
||||
common.StartUnmarshalWorkers()
|
||||
|
@ -103,6 +105,7 @@ func Stop() {
|
|||
opentsdbhttpServer.MustStop()
|
||||
}
|
||||
common.StopUnmarshalWorkers()
|
||||
vminsertCommon.MustStopStreamAggr()
|
||||
}
|
||||
|
||||
// RequestHandler is a handler for Prometheus remote storage write API
|
||||
|
|
|
@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
|||
|
||||
## tip
|
||||
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for aggregation of incoming [samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) by time and by labels. See [these docs](https://docs.victoriametrics.com/stream-aggregation.html) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add ability to explore metrics exported by a particular `job` / `instance`. See [these docs](https://docs.victoriametrics.com/#metrics-explorer) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3386).
|
||||
* FEATURE: allow passing partial `RFC3339` date/time to `time`, `start` and `end` query args at [querying APIs](https://docs.victoriametrics.com/#prometheus-querying-api-usage) and [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series). For example, `2022` is equivalent to `2022-01-01T00:00:00Z`, while `2022-01-30T14` is equivalent to `2022-01-30T14:00:00Z`. See [these docs](https://docs.victoriametrics.com/#timestamp-formats).
|
||||
* FEATURE: [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling): add support for `keepequal` and `dropequal` relabeling actions, which are supported by Prometheus starting from [v2.41.0](https://github.com/prometheus/prometheus/releases/tag/v2.41.0). These relabeling actions are almost identical to `keep_if_equal` and `drop_if_equal` relabeling actions supported by VictoriaMetrics since `v1.38.0` - see [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling-enhancements) - so it is recommended sticking to `keep_if_equal` and `drop_if_equal` actions instead of switching to `keepequal` and `dropequal`.
|
||||
|
|
|
@ -83,6 +83,7 @@ VictoriaMetrics has the following prominent features:
|
|||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
* [Native binary format](#how-to-import-data-in-native-format).
|
||||
* [DataDog agent or DogStatsD](#how-to-send-data-from-datadog-agent).
|
||||
* It supports powerful [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html), which can be used as a [statsd](https://github.com/statsd/statsd) alternative.
|
||||
* It supports metrics [relabeling](#relabeling).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and
|
||||
[high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
|
|
|
@ -86,6 +86,7 @@ VictoriaMetrics has the following prominent features:
|
|||
* [Arbitrary CSV data](#how-to-import-csv-data).
|
||||
* [Native binary format](#how-to-import-data-in-native-format).
|
||||
* [DataDog agent or DogStatsD](#how-to-send-data-from-datadog-agent).
|
||||
* It supports powerful [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html), which can be used as a [statsd](https://github.com/statsd/statsd) alternative.
|
||||
* It supports metrics [relabeling](#relabeling).
|
||||
* It can deal with [high cardinality issues](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and
|
||||
[high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues via [series limiter](#cardinality-limiter).
|
||||
|
|
438
docs/stream-aggregation.md
Normal file
438
docs/stream-aggregation.md
Normal file
|
@ -0,0 +1,438 @@
|
|||
---
|
||||
sort: 98
|
||||
---
|
||||
|
||||
# streaming aggregation
|
||||
|
||||
[vmagent](https://docs.victoriametrics.com/vmagent.html) and [single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html)
|
||||
can aggregate incoming [samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples) in streaming mode by time and by labels.
|
||||
The aggregation is applied to all the metrics received via any [supported data ingestion protocol](https://docs.victoriametrics.com/#how-to-import-time-series-data)
|
||||
and/or scraped from [Prometheus-compatible targets](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
|
||||
The stream aggregation is configured via the following command-line flags:
|
||||
|
||||
- `-remoteWrite.streamAggr.config` at [vmagent](https://docs.victoriametrics.com/vmagent.html).
|
||||
This flag can be specified individually per each specified `-remoteWrite.url`.
|
||||
This allows writing different aggregates to different remote storage destinations.
|
||||
- `-streamAggr.config` at [single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html).
|
||||
|
||||
These flags must point to a file containing [stream aggregation config](#stream-aggregation-config).
|
||||
|
||||
By default only the aggregated data is written to the storage. If the original incoming samples must be written to the storage too,
|
||||
then the following command-line flags must be specified:
|
||||
|
||||
- `-remoteWrite.streamAggr.keepInput` at [vmagent](https://docs.victoriametrics.com/vmagent.html).
|
||||
This flag can be specified individually per each specified `-remoteWrite.url`.
|
||||
This allows writing both raw and aggregate data to different remote storage destinations.
|
||||
- `-streamAggr.keepInput` at [single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html).
|
||||
|
||||
Stream aggregation ignores timestamps associated with the input [samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
It expects that the ingested samples have timestamps close to the current time.
|
||||
|
||||
## Use cases
|
||||
|
||||
Stream aggregation can be used in the following cases:
|
||||
|
||||
* [Statsd alternative](#statsd-alternative)
|
||||
* [Recording rules alternative](#recording-rules-alternative)
|
||||
* [Reducing the number of stored samples](#reducing-the-number-of-stored-samples)
|
||||
* [Reducing the number of stored series](#reducing-the-number-of-stored-series)
|
||||
|
||||
### Statsd alternative
|
||||
|
||||
Stream aggregation can be used as [statsd](https://github.com/statsd/statsd) altnernative in the following cases:
|
||||
|
||||
* [Counting input samples](#counting-input-samples)
|
||||
* [Summing input metrics](#summing-input-metrics)
|
||||
* [Quantiles over input metrics](#quantiles-over-input-metrics)
|
||||
* [Histograms over input metrics](#histograms-over-input-metrics)
|
||||
|
||||
### Recording rules alternative
|
||||
|
||||
Sometimes [alerting queries](https://docs.victoriametrics.com/vmalert.html#alerting-rules) may require non-trivial amounts of CPU, RAM,
|
||||
disk IO and network bandwith at metrics storage side. For example, if `http_request_duration_seconds` histogram is generated by thousands
|
||||
of app instances, then the alerting query `histogram_quantile(0.99, sum(increase(http_request_duration_seconds_bucket[5m])) without (instance)) > 0.5`
|
||||
can become slow, since it needs to scan too big number of unique [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series)
|
||||
with `http_request_duration_seconds_bucket` name. This alerting query can be sped up by pre-calculating
|
||||
the `sum(increase(http_request_duration_seconds_bucket[5m])) without (instance)` via [recording rule](https://docs.victoriametrics.com/vmalert.html#recording-rules).
|
||||
But this recording rule may take too much time to execute too. In this case the slow recording rule can be substituted
|
||||
with the following [stream aggregation config](#stream-aggregation-config):
|
||||
|
||||
```yaml
|
||||
- match: 'http_request_duration_seconds_bucket'
|
||||
interval: 5m
|
||||
without: [instance]
|
||||
outputs: [total]
|
||||
```
|
||||
|
||||
This stream aggregation generates `http_request_duration_seconds_bucket:5m_without_instance_total` output series according to [output metric naming](#output-metric-names).
|
||||
Then these series can be used in [alerting rules](https://docs.victoriametrics.com/vmalert.html#alerting-rules):
|
||||
|
||||
```metricsql
|
||||
histogram_quantile(0.99, last_over_time(http_request_duration_seconds_bucket:5m_without_instance_total[5m])) > 0.5
|
||||
```
|
||||
|
||||
This query is executed much faster than the original query, because it needs to scan much lower number of time series.
|
||||
|
||||
See [the list of aggregate output](#aggregation-outputs), which can be specified at `output` field.
|
||||
See also [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
|
||||
### Reducing the number of stored samples
|
||||
|
||||
If per-[series](https://docs.victoriametrics.com/keyConcepts.html#time-series) samples are ingested at high frequency,
|
||||
then this may result in high disk space usage, since too much data must be stored to disk. This also may result
|
||||
in slow queries, since too much data must be processed during queries.
|
||||
|
||||
This can be fixed with the stream aggregation by increasing the interval between per-series samples stored in the database.
|
||||
|
||||
For example, the following [stream aggregation config](#stream-aggregation-config) reduces the frequency of input samples
|
||||
to one sample per 5 minutes per each input time series (this operation is also known as downsampling):
|
||||
|
||||
```yaml
|
||||
# Aggregate metrics ending with _total with `total` output.
|
||||
# See https://docs.victoriametrics.com/stream-aggregation.html#aggregation-outputs
|
||||
- match: '{__name__=~".+_total"}'
|
||||
interval: 5m
|
||||
outputs: [total]
|
||||
|
||||
# Downsample other metrics with `count_samples`, `sum_samples`, `min` and `max` outputs
|
||||
# See https://docs.victoriametrics.com/stream-aggregation.html#aggregation-outputs
|
||||
- match: '{__name__!~".+_total"}'
|
||||
interval: 5m
|
||||
outputs: [count_samples, sum_samples, min, max]
|
||||
```
|
||||
|
||||
The aggregated output metrics have the following names according to [output metric naming](#output-metric-names):
|
||||
|
||||
```
|
||||
# For input metrics ending with _total
|
||||
some_metric_total:5m_total
|
||||
|
||||
# For input metrics not ending with _total
|
||||
some_metric:5m_count_samples
|
||||
some_metric:5m_sum_samples
|
||||
some_metric:5m_min
|
||||
some_metric:5m_max
|
||||
```
|
||||
|
||||
See [the list of aggregate output](#aggregation-outputs), which can be specified at `output` field.
|
||||
See also [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
### Reducing the number of stored series
|
||||
|
||||
Sometimes apps may generate too many [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series).
|
||||
For example, the `http_requests_total` metric may have `path` or `user` label with too big number of unique values.
|
||||
In this case the following stream aggregation can be used for reducing the number metrics stored in VictoriaMetrics:
|
||||
|
||||
```yaml
|
||||
- match: 'http_requests_total'
|
||||
interval: 30s
|
||||
without: [path, user]
|
||||
outputs: [total]
|
||||
```
|
||||
|
||||
This config specifies labels, which must be removed from the aggregate outpit, in the `without` list.
|
||||
See [these docs](#aggregating-by-labels) for more details.
|
||||
|
||||
The aggregated output metric has the following name according to [output metric naming](#output-metric-names):
|
||||
|
||||
```
|
||||
http_requests_total:30s_without_path_user_total
|
||||
```
|
||||
|
||||
See [the list of aggregate output](#aggregation-outputs), which can be specified at `output` field.
|
||||
|
||||
|
||||
### Counting input samples
|
||||
|
||||
If the monitored app generates event-based metrics, then it may be useful to count the number of such metrics
|
||||
at stream aggregation level.
|
||||
|
||||
For example, if an advertising server generates `hits{some="labels"} 1` and `clicks{some="labels"} 1` metrics
|
||||
per each incoming hit and click, then the following [stream aggregation config](#stream-aggregation-config)
|
||||
can be used for counting these metrics per every 30 second interval:
|
||||
|
||||
```yml
|
||||
- match: '{__name__=~"hits|clicks"}'
|
||||
interval: 30s
|
||||
outputs: [count_samples]
|
||||
```
|
||||
|
||||
This config generates the following output metrics for `hits` and `clicks` input metrics
|
||||
according to [output metric naming](#output-metric-names):
|
||||
|
||||
```
|
||||
hits:30s_count_samples count1
|
||||
clicks:30s_count_samples count2
|
||||
```
|
||||
|
||||
See [the list of aggregate output](#aggregation-outputs), which can be specified at `output` field.
|
||||
See also [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
|
||||
### Summing input metrics
|
||||
|
||||
If the monitored app calulates some events and then sends the calculated number of events to VictoriaMetrics
|
||||
at irregular intervals or at too high frequency, then stream aggregation can be used for summing such events
|
||||
and writing the aggregate sums to the storage at regular intervals.
|
||||
|
||||
For example, if an advertising server generates `hits{some="labels} N` and `clicks{some="labels"} M` metrics
|
||||
at irregular intervals, then the following [stream aggregation config](#stream-aggregation-config)
|
||||
can be used for summing these metrics per every minute:
|
||||
|
||||
```yml
|
||||
- match: '{__name__=~"hits|clicks"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
```
|
||||
|
||||
This config generates the following output metrics according to [output metric naming](#output-metric-names):
|
||||
|
||||
```
|
||||
hits:1m_sum_samples sum1
|
||||
clicks:1m_sum_samples sum2
|
||||
```
|
||||
|
||||
See [the list of aggregate output](#aggregation-outputs), which can be specified at `output` field.
|
||||
See also [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
|
||||
### Quantiles over input metrics
|
||||
|
||||
If the monitored app generates measurement metrics per each request, then it may be useful to calculate
|
||||
the pre-defined set of [percentiles](https://en.wikipedia.org/wiki/Percentile) over these measurements.
|
||||
|
||||
For example, if the monitored app generates `request_duration_seconds N` and `response_size_bytes M` metrics
|
||||
per each incoming request, then the following [stream aggregation config](#stream-aggregation-config)
|
||||
can be used for calculating 50th and 99th percentiles for these metrics every 30 seconds:
|
||||
|
||||
```yaml
|
||||
- match: '{__name__=~"request_duration_seconds|response_size_bytes"}'
|
||||
interval: 30s
|
||||
outputs: ["quantiles(0.50, 0.99)"]
|
||||
```
|
||||
|
||||
This config generates the following output metrics according to [output metric naming](#output-metric-names):
|
||||
|
||||
```
|
||||
request_duration_seconds:30s_quantiles{quantile="0.50"} value1
|
||||
request_duration_seconds:30s_quantiles{quantile="0.99"} value2
|
||||
|
||||
response_size_bytes:30s_quantiles{quantile="0.50"} value1
|
||||
response_size_bytes:30s_quantiles{quantile="0.99"} value2
|
||||
```
|
||||
|
||||
See [the list of aggregate output](#aggregation-outputs), which can be specified at `output` field.
|
||||
See also [histograms over input metrics](#histograms-over-input-metrics) and [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
### Histograms over input metrics
|
||||
|
||||
If the monitored app generates measurement metrics per each request, then it may be useful to calculate
|
||||
a [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) over these metrics.
|
||||
|
||||
For example, if the monitored app generates `request_duration_seconds N` and `response_size_bytes M` metrics
|
||||
per each incoming request, then the following [stream aggregation config](#stream-aggregation-config)
|
||||
can be used for calculating [VictoriaMetrics histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
|
||||
for these metrics every 60 seconds:
|
||||
|
||||
```yaml
|
||||
- match: '{__name__=~"request_duration_seconds|response_size_bytes"}'
|
||||
interval: 60s
|
||||
outputs: [histogram_bucket]
|
||||
```
|
||||
|
||||
This config generates the following output metrics according to [output metric naming](#output-metric-names).
|
||||
|
||||
```
|
||||
request_duration_seconds:60s_histogram_bucket{vmrange="start1...end1"} count1
|
||||
request_duration_seconds:60s_histogram_bucket{vmrange="start2...end2"} count2
|
||||
...
|
||||
request_duration_seconds:60s_histogram_bucket{vmrange="startN...endN"} countN
|
||||
|
||||
response_size_bytes:60s_histogram_bucket{vmrange="start1...end1"} count1
|
||||
response_size_bytes:60s_histogram_bucket{vmrange="start2...end2"} count2
|
||||
...
|
||||
response_size_bytes:60s_histogram_bucket{vmrange="startN...endN"} countN
|
||||
```
|
||||
|
||||
The resulting histogram buckets can be queried with [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) in the following ways:
|
||||
|
||||
1. An estimated 50th and 99th [percentiles](https://en.wikipedia.org/wiki/Percentile) of the request duration over the last hour:
|
||||
|
||||
```metricsql
|
||||
histogram_quantiles("quantile", 0.50, 0.99, sum(increase(request_duration_seconds:60s_histogram_bucket[1h])) by (vmrange))
|
||||
```
|
||||
|
||||
This query uses [histogram_quantiles](https://docs.victoriametrics.com/MetricsQL.html#histogram_quantiles) function.
|
||||
|
||||
2. An estimated [standard deviation](https://en.wikipedia.org/wiki/Standard_deviation) of the request duration over the last hour:
|
||||
|
||||
```metricsql
|
||||
histogram_stddev(sum(increase(request_duration_seconds:60s_histogram_bucket[1h])) by (vmrange))
|
||||
```
|
||||
|
||||
This query uses [histogram_stddev](https://docs.victoriametrics.com/MetricsQL.html#histogram_stddev) function.
|
||||
|
||||
3. An estimated share of requests with the duration smaller than `0.5s` over the last hour:
|
||||
|
||||
```metricsql
|
||||
histogram_share(0.5, sum(increase(request_duration_seconds:60s_histogram_bucket[1h])) by (vmrange))
|
||||
```
|
||||
|
||||
This query uses [histogram_share](https://docs.victoriametrics.com/MetricsQL.html#histogram_share) function.
|
||||
|
||||
See [the list of aggregate output](#aggregation-outputs), which can be specified at `output` field.
|
||||
See also [quantiles over input metrics](#quantiles-over-input-metrics) and [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
|
||||
## Output metric names
|
||||
|
||||
Output metric names for stream aggregation are constructed according to the following pattern:
|
||||
|
||||
```
|
||||
<metric_name>:<interval>[_by_<by_labels>][_without_<without_labels>]_<output>
|
||||
```
|
||||
|
||||
- `<metric_name>` is the original metric name.
|
||||
- `<interval>` is the interval specified in the [stream aggregation config](#stream-aggregation-config).
|
||||
- `<by_labels>` is `_`-delimited list of `by` labels specified in the [stream aggregation config](#stream-aggregation-config).
|
||||
If the `by` list is missing in the config, then the `_by_<by_labels>` part isn't included in the output metric name.
|
||||
- `<without_labels>` is an optional `_`-delimited list of `without` labels specified in the [stream aggregation config](#stream-aggregation-config).
|
||||
If the `without` list is missing in the config, then the `_without_<without_labels>` part isn't included in the output metric name.
|
||||
- `<output>` is the aggregate used for constucting the output metric. The aggregate name is taken from the `outputs` list
|
||||
at the corresponding [stream aggregation config](#stream-aggregation-config).
|
||||
|
||||
Both input and ouput metric names can be modified if needed via relabeling according to [these docs](#relabeling).
|
||||
|
||||
|
||||
## Relabeling
|
||||
|
||||
It is possible to apply [arbitrary relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) to input and output metrics
|
||||
during stream aggregation via `input_relabel_configs` and `output_relabel_config` options in [stream aggregation config](#stream-aggregation-config).
|
||||
|
||||
For example, the following config removes the `:1m_sum_samples` suffix added [to the output metric name](#output-metric-names):
|
||||
|
||||
```yml
|
||||
- interval: 1m
|
||||
outputs: [sum_samples]
|
||||
output_relabel_configs:
|
||||
- source_labels: [__name__]
|
||||
target_label: __name__
|
||||
regex: "(.+):.+"
|
||||
```
|
||||
|
||||
## Aggregation outputs
|
||||
|
||||
The following aggregation outputs are supported in the `outputs` list of the [stream aggregation config](#stream-aggregation-config):
|
||||
|
||||
* `total` generates output [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) by summing the input counters.
|
||||
The `total` handler properly handles input counter resets.
|
||||
The `total` handler returns garbage when something other than [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) is passed to the input.
|
||||
* `increase` returns the increase of input [counters](https://docs.victoriametrics.com/keyConcepts.html#counter).
|
||||
The `increase` handler properly handles the input counter resets.
|
||||
The `increase` handler returns garbage when something other than [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) is passed to the input.
|
||||
* `count_series` counts the number of unique [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series).
|
||||
* `count_samples` counts the number of input [samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `sum_samples` sums input [sample values](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `last` returns the last input [sample value](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `min` returns the minimum input [sample value](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `max` returns the maximum input [sample value](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `avg` returns the average input [sample value](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `stddev` returns [standard deviation](https://en.wikipedia.org/wiki/Standard_deviation) for the input [sample values](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `stdvar` returns [standard variance](https://en.wikipedia.org/wiki/Variance) for the input [sample values](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `histogram_bucket` returns [VictoriaMetrics histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
|
||||
for the input [sample values](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
* `quantiles(phi1, ..., phiN)` returns [percentiles](https://en.wikipedia.org/wiki/Percentile) for the given `phi*`
|
||||
over the input [sample values](https://docs.victoriametrics.com/keyConcepts.html#raw-samples).
|
||||
The `phi` must be in the range `[0..1]`, where `0` means `0th` percentile, while `1` means `100th` percentile.
|
||||
|
||||
The aggregations are calculated during the `interval` specified in the [config](#stream-aggregation-config)
|
||||
and then sent to the storage.
|
||||
|
||||
If `by` and `without` lists are specified in the [config](#stream-aggregation-config),
|
||||
then the [aggregation by labels](#aggregating-by-labels) is performed additionally to aggregation by `interval`.
|
||||
|
||||
|
||||
## Aggregating by labels
|
||||
|
||||
All the labels for the input metrics are preserved by default in the output metrics. For example,
|
||||
the input metric `foo{app="bar",instance="host1"}` results to the output metric `foo:1m_sum_samples{app="bar",instance="host1"}`
|
||||
when the following [stream aggregation config](#stream-aggregation-config) is used:
|
||||
|
||||
```yaml
|
||||
- interval: 1m
|
||||
outputs: [sum_samples]
|
||||
```
|
||||
|
||||
The input labels can be removed via `without` list specified in the config. For example, the following config
|
||||
removes the `instance` label from output metrics by summing input samples across all the instances:
|
||||
|
||||
```yaml
|
||||
- interval: 1m
|
||||
without: [instance]
|
||||
outputs: [sum_samples]
|
||||
```
|
||||
|
||||
In this case the `foo{app="bar",instance="..."}` input metrics are transformed into `foo:1m_without_instance_sum_samples{app="bar"}`
|
||||
output metric.
|
||||
|
||||
It is possible specifying the exact list of labels in the output metrics via `by` list.
|
||||
For example, the following config sums input samples by the `app` label:
|
||||
|
||||
```yaml
|
||||
- interval: 1m
|
||||
by: [app]
|
||||
outputs: [sum_samples]
|
||||
```
|
||||
|
||||
In this case the `foo{app="bar",instance="..."}` input metrics are transformed into `foo:1m_by_app_sum_samples{app="bar"}`
|
||||
output metric.
|
||||
|
||||
|
||||
## Stream aggregation config
|
||||
|
||||
Below is the format for stream aggregation config file, which may be referred via `-remoteWrite.streamAggr.config` command-line flag
|
||||
at [vmagent](https://docs.victoriametrics.com/vmagent.html) or via `-streamAggr.config` command-line flag
|
||||
at [single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html):
|
||||
|
||||
```yaml
|
||||
# match is an optional filter for incoming samples to aggregate.
|
||||
# It can contain arbitrary Prometheus series selector
|
||||
# according to https://docs.victoriametrics.com/keyConcepts.html#filtering .
|
||||
# If match is missing, then all the incoming samples are aggregated.
|
||||
- match: 'http_request_duration_seconds_bucket{env=~"prod|staging"}'
|
||||
|
||||
# interval is the interval for the aggregation.
|
||||
# The aggregated stats is sent to remote storage once per interval.
|
||||
interval: 1m
|
||||
|
||||
# without is an optional list of labels, which must be removed from the output aggregation.
|
||||
# See https://docs.victoriametrics.com/stream-aggregation.html#aggregating-by-labels
|
||||
without: [instance]
|
||||
|
||||
# by is an optioanl list of labels, which must be preserved in the output aggregation.
|
||||
# See https://docs.victoriametrics.com/stream-aggregation.html#aggregating-by-labels
|
||||
# by: [job, vmrange]
|
||||
|
||||
# outputs is the list of aggregations to perform on the input data.
|
||||
# See https://docs.victoriametrics.com/stream-aggregation.html#aggregation-outputs
|
||||
outputs: [total]
|
||||
|
||||
# input_relabel_configs is an optional relabeling rules,
|
||||
# which are applied to the incoming samples after they pass the match filter
|
||||
# and before being aggregated.
|
||||
# See https://docs.victoriametrics.com/stream-aggregation.html#relabeling
|
||||
input_relabel_configs:
|
||||
- target_label: vmaggr
|
||||
replacement: before
|
||||
|
||||
# output_relabel_configs is an optional relabeling rules,
|
||||
# which are applied to the aggregated output metrics.
|
||||
output_relabel_configs:
|
||||
- target_label: vmaggr
|
||||
replacement: after
|
||||
```
|
||||
|
||||
The file can contain multiple aggregation configs. The aggregation is performed independently
|
||||
per each specified config entry.
|
|
@ -28,8 +28,8 @@ additionally to [discovering Prometheus-compatible targets and scraping metrics
|
|||
see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* Can add, remove and modify labels (aka tags) via Prometheus relabeling. Can filter data before sending it to remote storage. See [these docs](#relabeling) for details.
|
||||
* Can accept data via all the ingestion protocols supported by VictoriaMetrics - see [these docs](#how-to-push-data-to-vmagent).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems -
|
||||
see [these docs](#replication-and-high-availability).
|
||||
* Can aggregate incoming samples by time and by labels before sending them to remote storage - see [these docs](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
* Can replicate collected metrics simultaneously to multiple remote storage systems - see [these docs](#replication-and-high-availability).
|
||||
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected metrics
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered metrics are sent to remote storage as soon as the connection
|
||||
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
@ -130,6 +130,12 @@ If you use Prometheus only for scraping metrics from various targets and forward
|
|||
then `vmagent` can replace Prometheus. Typically, `vmagent` requires lower amounts of RAM, CPU and network bandwidth compared with Prometheus.
|
||||
See [these docs](#how-to-collect-metrics-in-prometheus-format) for details.
|
||||
|
||||
### Statsd alternative
|
||||
|
||||
`vmagent` can be used as an alternative to [statsd](https://github.com/statsd/statsd)
|
||||
when [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) is enabled.
|
||||
See [these docs](https://docs.victoriametrics.com/stream-aggregation.html#statsd-alternative) for details.
|
||||
|
||||
### Flexible metrics relay
|
||||
|
||||
`vmagent` can accept metrics in [various popular data ingestion protocols](#how-to-push-data-to-vmagent), apply [relabeling](#relabeling)
|
||||
|
|
|
@ -73,16 +73,17 @@ Then configure `vmalert` accordingly:
|
|||
-external.label=replica=a # Multiple external labels may be set
|
||||
```
|
||||
|
||||
Note there's a separate `remoteWrite.url` to allow writing results of
|
||||
Note there's a separate `-remoteWrite.url` command-line flag to allow writing results of
|
||||
alerting/recording rules into a different storage than the initial data that's
|
||||
queried. This allows using `vmalert` to aggregate data from a short-term,
|
||||
high-frequency, high-cardinality storage into a long-term storage with
|
||||
decreased cardinality and a bigger interval between samples.
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html).
|
||||
|
||||
See the full list of configuration flags in [configuration](#configuration) section.
|
||||
|
||||
If you run multiple `vmalert` services for the same datastore or AlertManager - do not forget
|
||||
to specify different `external.label` flags in order to define which `vmalert` generated rules or alerts.
|
||||
to specify different `-external.label` command-line flags in order to define which `vmalert` generated rules or alerts.
|
||||
|
||||
Configuration for [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
|
@ -518,8 +519,8 @@ groups:
|
|||
expr: avg_over_time(http_requests[5m])
|
||||
```
|
||||
|
||||
Ability of `vmalert` to be configured with different `datasource.url` and `remoteWrite.url` allows
|
||||
reading data from one data source and backfilling results to another. This helps to build a system
|
||||
Ability of `vmalert` to be configured with different `-datasource.url` and `-remoteWrite.url` command-line flags
|
||||
allows reading data from one data source and backfilling results to another. This helps to build a system
|
||||
for aggregating and downsampling the data.
|
||||
|
||||
The following example shows how to build a topology where `vmalert` will process data from one cluster
|
||||
|
@ -543,7 +544,7 @@ Please note, [replay](#rules-backfilling) feature may be used for transforming h
|
|||
|
||||
Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only recording rules are used.
|
||||
|
||||
See also [downsampling docs](https://docs.victoriametrics.com/#downsampling).
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html) and [downsampling](https://docs.victoriametrics.com/#downsampling).
|
||||
|
||||
#### Multiple remote writes
|
||||
|
||||
|
|
74
lib/streamaggr/avg.go
Normal file
74
lib/streamaggr/avg.go
Normal file
|
@ -0,0 +1,74 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// avgAggrState calculates output=avg, e.g. the average value over input samples.
|
||||
type avgAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type avgStateValue struct {
|
||||
mu sync.Mutex
|
||||
sum float64
|
||||
count int64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newAvgAggrState() *avgAggrState {
|
||||
return &avgAggrState{}
|
||||
}
|
||||
|
||||
func (as *avgAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &avgStateValue{
|
||||
sum: value,
|
||||
count: 1,
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if !loaded {
|
||||
// The entry has been successfully stored
|
||||
return
|
||||
}
|
||||
// Update the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
sv := v.(*avgStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
sv.sum += value
|
||||
sv.count++
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *avgAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*avgStateValue)
|
||||
sv.mu.Lock()
|
||||
avg := sv.sum / float64(sv.count)
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "avg", currentTimeMsec, avg)
|
||||
return true
|
||||
})
|
||||
}
|
71
lib/streamaggr/count_samples.go
Normal file
71
lib/streamaggr/count_samples.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// countSamplesAggrState calculates output=countSamples, e.g. the count of input samples.
|
||||
type countSamplesAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type countSamplesStateValue struct {
|
||||
mu sync.Mutex
|
||||
n uint64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newCountSamplesAggrState() *countSamplesAggrState {
|
||||
return &countSamplesAggrState{}
|
||||
}
|
||||
|
||||
func (as *countSamplesAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &countSamplesStateValue{
|
||||
n: 1,
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if !loaded {
|
||||
// The new entry has been successfully created.
|
||||
return
|
||||
}
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
sv := v.(*countSamplesStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
sv.n++
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *countSamplesAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*countSamplesStateValue)
|
||||
sv.mu.Lock()
|
||||
n := sv.n
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "count_samples", currentTimeMsec, float64(n))
|
||||
return true
|
||||
})
|
||||
}
|
78
lib/streamaggr/count_series.go
Normal file
78
lib/streamaggr/count_series.go
Normal file
|
@ -0,0 +1,78 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// countSeriesAggrState calculates output=count_series, e.g. the number of unique series.
|
||||
type countSeriesAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type countSeriesStateValue struct {
|
||||
mu sync.Mutex
|
||||
countedSeries map[string]struct{}
|
||||
n uint64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newCountSeriesAggrState() *countSeriesAggrState {
|
||||
return &countSeriesAggrState{}
|
||||
}
|
||||
|
||||
func (as *countSeriesAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &countSeriesStateValue{
|
||||
countedSeries: map[string]struct{}{
|
||||
inputKey: {},
|
||||
},
|
||||
n: 1,
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if !loaded {
|
||||
// The entry has been added to the map.
|
||||
return
|
||||
}
|
||||
// Update the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
sv := v.(*countSeriesStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
if _, ok := sv.countedSeries[inputKey]; !ok {
|
||||
sv.countedSeries[inputKey] = struct{}{}
|
||||
sv.n++
|
||||
}
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *countSeriesAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*countSeriesStateValue)
|
||||
sv.mu.Lock()
|
||||
n := sv.n
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "count_series", currentTimeMsec, float64(n))
|
||||
return true
|
||||
})
|
||||
}
|
102
lib/streamaggr/histogram_bucket.go
Normal file
102
lib/streamaggr/histogram_bucket.go
Normal file
|
@ -0,0 +1,102 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
// histogramBucketAggrState calculates output=histogramBucket, e.g. VictoriaMetrics histogram over input samples.
|
||||
type histogramBucketAggrState struct {
|
||||
m sync.Map
|
||||
|
||||
ignoreInputDeadline uint64
|
||||
intervalSecs uint64
|
||||
}
|
||||
|
||||
type histogramBucketStateValue struct {
|
||||
mu sync.Mutex
|
||||
h metrics.Histogram
|
||||
deleteDeadline uint64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newHistogramBucketAggrState(interval time.Duration) *histogramBucketAggrState {
|
||||
intervalSecs := uint64(interval.Seconds() + 1)
|
||||
return &histogramBucketAggrState{
|
||||
intervalSecs: intervalSecs,
|
||||
}
|
||||
}
|
||||
|
||||
func (as *histogramBucketAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
deleteDeadline := currentTime + 2*as.intervalSecs
|
||||
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &histogramBucketStateValue{}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if loaded {
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
}
|
||||
sv := v.(*histogramBucketStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
sv.h.Update(value)
|
||||
sv.deleteDeadline = deleteDeadline
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *histogramBucketAggrState) removeOldEntries(currentTime uint64) {
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
sv := v.(*histogramBucketStateValue)
|
||||
|
||||
sv.mu.Lock()
|
||||
deleted := currentTime > sv.deleteDeadline
|
||||
if deleted {
|
||||
// Mark the current entry as deleted
|
||||
sv.deleted = deleted
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
|
||||
if deleted {
|
||||
m.Delete(k)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func (as *histogramBucketAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
currentTimeMsec := int64(currentTime) * 1000
|
||||
|
||||
as.removeOldEntries(currentTime)
|
||||
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
sv := v.(*histogramBucketStateValue)
|
||||
sv.mu.Lock()
|
||||
if !sv.deleted {
|
||||
key := k.(string)
|
||||
sv.h.VisitNonZeroBuckets(func(vmrange string, count uint64) {
|
||||
ctx.appendSeriesWithExtraLabel(key, "histogram_bucket", currentTimeMsec, float64(count), "vmrange", vmrange)
|
||||
})
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
return true
|
||||
})
|
||||
}
|
129
lib/streamaggr/increase.go
Normal file
129
lib/streamaggr/increase.go
Normal file
|
@ -0,0 +1,129 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// increaseAggrState calculates output=increase, e.g. the increase over input counters.
|
||||
type increaseAggrState struct {
|
||||
m sync.Map
|
||||
|
||||
ignoreInputDeadline uint64
|
||||
intervalSecs uint64
|
||||
}
|
||||
|
||||
type increaseStateValue struct {
|
||||
mu sync.Mutex
|
||||
lastValues map[string]*lastValueState
|
||||
total float64
|
||||
increase float64
|
||||
deleteDeadline uint64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newIncreaseAggrState(interval time.Duration) *increaseAggrState {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
intervalSecs := uint64(interval.Seconds() + 1)
|
||||
return &increaseAggrState{
|
||||
ignoreInputDeadline: currentTime + intervalSecs,
|
||||
intervalSecs: intervalSecs,
|
||||
}
|
||||
}
|
||||
|
||||
func (as *increaseAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
deleteDeadline := currentTime + 2*as.intervalSecs
|
||||
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &increaseStateValue{
|
||||
lastValues: make(map[string]*lastValueState),
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if loaded {
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
}
|
||||
sv := v.(*increaseStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
lv, ok := sv.lastValues[inputKey]
|
||||
if !ok {
|
||||
lv = &lastValueState{}
|
||||
sv.lastValues[inputKey] = lv
|
||||
}
|
||||
d := value
|
||||
if ok && lv.value <= value {
|
||||
d = value - lv.value
|
||||
}
|
||||
if ok || currentTime > as.ignoreInputDeadline {
|
||||
sv.total += d
|
||||
}
|
||||
lv.value = value
|
||||
lv.deleteDeadline = deleteDeadline
|
||||
sv.deleteDeadline = deleteDeadline
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *increaseAggrState) removeOldEntries(currentTime uint64) {
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
sv := v.(*increaseStateValue)
|
||||
|
||||
sv.mu.Lock()
|
||||
deleted := currentTime > sv.deleteDeadline
|
||||
if deleted {
|
||||
// Mark the current entry as deleted
|
||||
sv.deleted = deleted
|
||||
} else {
|
||||
// Delete outdated entries in sv.lastValues
|
||||
m := sv.lastValues
|
||||
for k1, v1 := range m {
|
||||
if currentTime > v1.deleteDeadline {
|
||||
delete(m, k1)
|
||||
}
|
||||
}
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
|
||||
if deleted {
|
||||
m.Delete(k)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func (as *increaseAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
currentTimeMsec := int64(currentTime) * 1000
|
||||
|
||||
as.removeOldEntries(currentTime)
|
||||
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
sv := v.(*increaseStateValue)
|
||||
sv.mu.Lock()
|
||||
increase := sv.total
|
||||
sv.total = 0
|
||||
deleted := sv.deleted
|
||||
sv.mu.Unlock()
|
||||
if !deleted {
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "increase", currentTimeMsec, increase)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
71
lib/streamaggr/last.go
Normal file
71
lib/streamaggr/last.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// lastAggrState calculates output=last, e.g. the last value over input samples.
|
||||
type lastAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type lastStateValue struct {
|
||||
mu sync.Mutex
|
||||
last float64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newLastAggrState() *lastAggrState {
|
||||
return &lastAggrState{}
|
||||
}
|
||||
|
||||
func (as *lastAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &lastStateValue{
|
||||
last: value,
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if !loaded {
|
||||
// The new entry has been successfully created.
|
||||
return
|
||||
}
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
sv := v.(*lastStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
sv.last = value
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *lastAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*lastStateValue)
|
||||
sv.mu.Lock()
|
||||
last := sv.last
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "last", currentTimeMsec, last)
|
||||
return true
|
||||
})
|
||||
}
|
73
lib/streamaggr/max.go
Normal file
73
lib/streamaggr/max.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// maxAggrState calculates output=max, e.g. the maximum value over input samples.
|
||||
type maxAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type maxStateValue struct {
|
||||
mu sync.Mutex
|
||||
max float64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newMaxAggrState() *maxAggrState {
|
||||
return &maxAggrState{}
|
||||
}
|
||||
|
||||
func (as *maxAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &maxStateValue{
|
||||
max: value,
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if !loaded {
|
||||
// The new entry has been successfully created.
|
||||
return
|
||||
}
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
sv := v.(*maxStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
if value > sv.max {
|
||||
sv.max = value
|
||||
}
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *maxAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*maxStateValue)
|
||||
sv.mu.Lock()
|
||||
max := sv.max
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "max", currentTimeMsec, max)
|
||||
return true
|
||||
})
|
||||
}
|
73
lib/streamaggr/min.go
Normal file
73
lib/streamaggr/min.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// minAggrState calculates output=min, e.g. the minimum value over input samples.
|
||||
type minAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type minStateValue struct {
|
||||
mu sync.Mutex
|
||||
min float64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newMinAggrState() *minAggrState {
|
||||
return &minAggrState{}
|
||||
}
|
||||
|
||||
func (as *minAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &minStateValue{
|
||||
min: value,
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if !loaded {
|
||||
// The new entry has been successfully created.
|
||||
return
|
||||
}
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
sv := v.(*minStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
if value < sv.min {
|
||||
sv.min = value
|
||||
}
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *minAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*minStateValue)
|
||||
sv.mu.Lock()
|
||||
min := sv.min
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "min", currentTimeMsec, min)
|
||||
return true
|
||||
})
|
||||
}
|
87
lib/streamaggr/quantiles.go
Normal file
87
lib/streamaggr/quantiles.go
Normal file
|
@ -0,0 +1,87 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/valyala/histogram"
|
||||
)
|
||||
|
||||
// quantilesAggrState calculates output=quantiles, e.g. the the given quantiles over the input samples.
|
||||
type quantilesAggrState struct {
|
||||
m sync.Map
|
||||
|
||||
phis []float64
|
||||
}
|
||||
|
||||
type quantilesStateValue struct {
|
||||
mu sync.Mutex
|
||||
h *histogram.Fast
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newQuantilesAggrState(phis []float64) *quantilesAggrState {
|
||||
return &quantilesAggrState{
|
||||
phis: phis,
|
||||
}
|
||||
}
|
||||
|
||||
func (as *quantilesAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
h := histogram.GetFast()
|
||||
v = &quantilesStateValue{
|
||||
h: h,
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if loaded {
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
histogram.PutFast(h)
|
||||
v = vNew
|
||||
}
|
||||
}
|
||||
sv := v.(*quantilesStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
sv.h.Update(value)
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *quantilesAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
phis := as.phis
|
||||
var quantiles []float64
|
||||
var b []byte
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*quantilesStateValue)
|
||||
sv.mu.Lock()
|
||||
quantiles = sv.h.Quantiles(quantiles[:0], phis)
|
||||
histogram.PutFast(sv.h)
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
|
||||
key := k.(string)
|
||||
for i, quantile := range quantiles {
|
||||
b = strconv.AppendFloat(b[:0], phis[i], 'g', -1, 64)
|
||||
phiStr := bytesutil.InternBytes(b)
|
||||
ctx.appendSeriesWithExtraLabel(key, "quantiles", currentTimeMsec, quantile, "quantile", phiStr)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
74
lib/streamaggr/stddev.go
Normal file
74
lib/streamaggr/stddev.go
Normal file
|
@ -0,0 +1,74 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// stddevAggrState calculates output=stddev, e.g. the average value over input samples.
|
||||
type stddevAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type stddevStateValue struct {
|
||||
mu sync.Mutex
|
||||
count float64
|
||||
avg float64
|
||||
q float64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newStddevAggrState() *stddevAggrState {
|
||||
return &stddevAggrState{}
|
||||
}
|
||||
|
||||
func (as *stddevAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &stddevStateValue{}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if loaded {
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
}
|
||||
sv := v.(*stddevStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
// See `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation
|
||||
sv.count++
|
||||
avg := sv.avg + (value-sv.avg)/sv.count
|
||||
sv.q += (value - sv.avg) * (value - avg)
|
||||
sv.avg = avg
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *stddevAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*stddevStateValue)
|
||||
sv.mu.Lock()
|
||||
stddev := math.Sqrt(sv.q / sv.count)
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "stddev", currentTimeMsec, stddev)
|
||||
return true
|
||||
})
|
||||
}
|
73
lib/streamaggr/stdvar.go
Normal file
73
lib/streamaggr/stdvar.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// stdvarAggrState calculates output=stdvar, e.g. the average value over input samples.
|
||||
type stdvarAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type stdvarStateValue struct {
|
||||
mu sync.Mutex
|
||||
count float64
|
||||
avg float64
|
||||
q float64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newStdvarAggrState() *stdvarAggrState {
|
||||
return &stdvarAggrState{}
|
||||
}
|
||||
|
||||
func (as *stdvarAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &stdvarStateValue{}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if loaded {
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
}
|
||||
sv := v.(*stdvarStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
// See `Rapid calculation methods` at https://en.wikipedia.org/wiki/Standard_deviation
|
||||
sv.count++
|
||||
avg := sv.avg + (value-sv.avg)/sv.count
|
||||
sv.q += (value - sv.avg) * (value - avg)
|
||||
sv.avg = avg
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *stdvarAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*stdvarStateValue)
|
||||
sv.mu.Lock()
|
||||
stdvar := sv.q / sv.count
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "stdvar", currentTimeMsec, stdvar)
|
||||
return true
|
||||
})
|
||||
}
|
641
lib/streamaggr/streamaggr.go
Normal file
641
lib/streamaggr/streamaggr.go
Normal file
|
@ -0,0 +1,641 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
var supportedOutputs = []string{
|
||||
"total",
|
||||
"increase",
|
||||
"count_series",
|
||||
"count_samples",
|
||||
"sum_samples",
|
||||
"last",
|
||||
"min",
|
||||
"max",
|
||||
"avg",
|
||||
"stddev",
|
||||
"stdvar",
|
||||
"histogram_bucket",
|
||||
"quantiles(phi1, ..., phiN)",
|
||||
}
|
||||
|
||||
// LoadFromFile loads Aggregators from the given path and uses the given pushFunc for pushing the aggregated data.
|
||||
//
|
||||
// The returned Aggregators must be stopped with MustStop() when no longer needed.
|
||||
func LoadFromFile(path string, pushFunc PushFunc) (*Aggregators, error) {
|
||||
data, err := fs.ReadFileOrHTTP(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load aggregators: %w", err)
|
||||
}
|
||||
as, err := NewAggregatorsFromData(data, pushFunc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize aggregators from %q: %w", path, err)
|
||||
}
|
||||
return as, nil
|
||||
}
|
||||
|
||||
// NewAggregatorsFromData initializes Aggregators from the given data and uses the given pushFunc for pushing the aggregated data.
|
||||
//
|
||||
// The returned Aggregators must be stopped with MustStop() when no longer needed.
|
||||
func NewAggregatorsFromData(data []byte, pushFunc PushFunc) (*Aggregators, error) {
|
||||
var cfgs []*Config
|
||||
if err := yaml.UnmarshalStrict(data, &cfgs); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewAggregators(cfgs, pushFunc)
|
||||
}
|
||||
|
||||
// Config is a configuration for a single stream aggregation.
|
||||
type Config struct {
|
||||
// Match is a label selector for filtering time series for the given selector.
|
||||
//
|
||||
// If the match isn't set, then all the input time series are processed.
|
||||
Match *promrelabel.IfExpression `yaml:"match,omitempty"`
|
||||
|
||||
// Interval is the interval between aggregations.
|
||||
Interval string `yaml:"interval"`
|
||||
|
||||
// Outputs is a list of output aggregate functions to produce.
|
||||
//
|
||||
// The following names are allowed:
|
||||
//
|
||||
// - total - aggregates input counters
|
||||
// - increase - counts the increase over input counters
|
||||
// - count_series - counts the input series
|
||||
// - count_samples - counts the input samples
|
||||
// - sum_samples - sums the input samples
|
||||
// - last - the last biggest sample value
|
||||
// - min - the minimum sample value
|
||||
// - max - the maximum sample value
|
||||
// - avg - the average value across all the samples
|
||||
// - stddev - standard deviation across all the samples
|
||||
// - stdvar - standard variance across all the samples
|
||||
// - histogram_bucket - creates VictoriaMetrics histogram for input samples
|
||||
// - quantiles(phi1, ..., phiN) - quantiles' estimation for phi in the range [0..1]
|
||||
//
|
||||
// The output time series will have the following names:
|
||||
//
|
||||
// input_name:aggr_<interval>_<output>
|
||||
//
|
||||
Outputs []string `yaml:"outputs"`
|
||||
|
||||
// By is an optional list of labels for grouping input series.
|
||||
//
|
||||
// See also Without.
|
||||
//
|
||||
// If neither By nor Without are set, then the Outputs are calculated
|
||||
// individually per each input time series.
|
||||
By []string `yaml:"by,omitempty"`
|
||||
|
||||
// Without is an optional list of labels, which must be excluded when grouping input series.
|
||||
//
|
||||
// See also By.
|
||||
//
|
||||
// If neither By nor Without are set, then the Outputs are calculated
|
||||
// individually per each input time series.
|
||||
Without []string `yaml:"without,omitempty"`
|
||||
|
||||
// InputRelabelConfigs is an optional relabeling rules, which are applied on the input
|
||||
// before aggregation.
|
||||
InputRelabelConfigs []promrelabel.RelabelConfig `yaml:"input_relabel_configs,omitempty"`
|
||||
|
||||
// OutputRelabelConfigs is an optional relabeling rules, which are applied
|
||||
// on the aggregated output before being sent to remote storage.
|
||||
OutputRelabelConfigs []promrelabel.RelabelConfig `yaml:"output_relabel_configs,omitempty"`
|
||||
}
|
||||
|
||||
// Aggregators aggregates metrics passed to Push and calls pushFunc for aggregate data.
|
||||
type Aggregators struct {
|
||||
as []*aggregator
|
||||
}
|
||||
|
||||
// NewAggregators creates Aggregators from the given cfgs.
|
||||
//
|
||||
// pushFunc is called when the aggregated data must be flushed.
|
||||
//
|
||||
// MustStop must be called on the returned Aggregators when they are no longer needed.
|
||||
func NewAggregators(cfgs []*Config, pushFunc PushFunc) (*Aggregators, error) {
|
||||
if len(cfgs) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
as := make([]*aggregator, len(cfgs))
|
||||
for i, cfg := range cfgs {
|
||||
a, err := newAggregator(cfg, pushFunc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize aggregator #%d: %w", i, err)
|
||||
}
|
||||
as[i] = a
|
||||
}
|
||||
return &Aggregators{
|
||||
as: as,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// MustStop stops a.
|
||||
func (a *Aggregators) MustStop() {
|
||||
if a == nil {
|
||||
return
|
||||
}
|
||||
for _, aggr := range a.as {
|
||||
aggr.MustStop()
|
||||
}
|
||||
}
|
||||
|
||||
// Push pushes tss to a.
|
||||
func (a *Aggregators) Push(tss []prompbmarshal.TimeSeries) {
|
||||
if a == nil {
|
||||
return
|
||||
}
|
||||
for _, aggr := range a.as {
|
||||
aggr.Push(tss)
|
||||
}
|
||||
}
|
||||
|
||||
// aggregator aggregates input series according to the config passed to NewAggregator
|
||||
type aggregator struct {
|
||||
match *promrelabel.IfExpression
|
||||
|
||||
inputRelabeling *promrelabel.ParsedConfigs
|
||||
outputRelabeling *promrelabel.ParsedConfigs
|
||||
|
||||
by []string
|
||||
without []string
|
||||
aggregateOnlyByTime bool
|
||||
|
||||
// aggrStates contains aggregate states for the given outputs
|
||||
aggrStates []aggrState
|
||||
|
||||
pushFunc PushFunc
|
||||
|
||||
// suffix contains a suffix, which should be added to aggregate metric names
|
||||
//
|
||||
// It contains the interval, lables in (by, without), plus output name.
|
||||
// For example, foo_bar metric name is transformed to foo_bar:1m_by_job
|
||||
// for `interval: 1m`, `by: [job]`
|
||||
suffix string
|
||||
|
||||
wg sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
type aggrState interface {
|
||||
pushSample(inputKey, outputKey string, value float64)
|
||||
appendSeriesForFlush(ctx *flushCtx)
|
||||
}
|
||||
|
||||
// PushFunc is called by Aggregators when it needs to push its state to metrics storage
|
||||
type PushFunc func(tss []prompbmarshal.TimeSeries)
|
||||
|
||||
// newAggregator creates new aggregator for the given cfg, which pushes the aggregate data to pushFunc.
|
||||
//
|
||||
// The returned aggregator must be stopped when no longer needed by calling MustStop().
|
||||
func newAggregator(cfg *Config, pushFunc PushFunc) (*aggregator, error) {
|
||||
// check cfg.Interval
|
||||
interval, err := time.ParseDuration(cfg.Interval)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse `interval: %q`: %w", cfg.Interval, err)
|
||||
}
|
||||
if interval <= time.Second {
|
||||
return nil, fmt.Errorf("the minimum supported aggregation interval is 1s; got %s", interval)
|
||||
}
|
||||
|
||||
// initialize input_relabel_configs and output_relabel_configs
|
||||
inputRelabeling, err := promrelabel.ParseRelabelConfigs(cfg.InputRelabelConfigs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse input_relabel_configs: %w", err)
|
||||
}
|
||||
outputRelabeling, err := promrelabel.ParseRelabelConfigs(cfg.OutputRelabelConfigs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse output_relabel_configs: %w", err)
|
||||
}
|
||||
|
||||
// check by and without lists
|
||||
by := cfg.By
|
||||
without := cfg.Without
|
||||
if len(by) > 0 && len(without) > 0 {
|
||||
return nil, fmt.Errorf("`by: %s` and `without: %s` lists cannot be set simultaneously", by, without)
|
||||
}
|
||||
aggregateOnlyByTime := (len(by) == 0 && len(without) == 0)
|
||||
if !aggregateOnlyByTime && len(without) == 0 {
|
||||
by = addMissingUnderscoreName(by)
|
||||
}
|
||||
|
||||
// initialize outputs list
|
||||
if len(cfg.Outputs) == 0 {
|
||||
return nil, fmt.Errorf("`outputs` list must contain at least a single entry from the list %s; "+
|
||||
"see https://docs.victoriametrics.com/vmagent.html#stream-aggregation", supportedOutputs)
|
||||
}
|
||||
aggrStates := make([]aggrState, len(cfg.Outputs))
|
||||
for i, output := range cfg.Outputs {
|
||||
if strings.HasPrefix(output, "quantiles(") {
|
||||
if !strings.HasSuffix(output, ")") {
|
||||
return nil, fmt.Errorf("missing closing brace for `quantiles()` output")
|
||||
}
|
||||
argsStr := output[len("quantiles(") : len(output)-1]
|
||||
if len(argsStr) == 0 {
|
||||
return nil, fmt.Errorf("`quantiles()` must contain at least one phi")
|
||||
}
|
||||
args := strings.Split(argsStr, ",")
|
||||
phis := make([]float64, len(args))
|
||||
for j, arg := range args {
|
||||
arg = strings.TrimSpace(arg)
|
||||
phi, err := strconv.ParseFloat(arg, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse phi=%q for quantiles(%s): %w", arg, argsStr, err)
|
||||
}
|
||||
if phi < 0 || phi > 1 {
|
||||
return nil, fmt.Errorf("phi inside quantiles(%s) must be in the range [0..1]; got %v", argsStr, phi)
|
||||
}
|
||||
phis[j] = phi
|
||||
}
|
||||
aggrStates[i] = newQuantilesAggrState(phis)
|
||||
continue
|
||||
}
|
||||
switch output {
|
||||
case "total":
|
||||
aggrStates[i] = newTotalAggrState(interval)
|
||||
case "increase":
|
||||
aggrStates[i] = newIncreaseAggrState(interval)
|
||||
case "count_series":
|
||||
aggrStates[i] = newCountSeriesAggrState()
|
||||
case "count_samples":
|
||||
aggrStates[i] = newCountSamplesAggrState()
|
||||
case "sum_samples":
|
||||
aggrStates[i] = newSumSamplesAggrState()
|
||||
case "last":
|
||||
aggrStates[i] = newLastAggrState()
|
||||
case "min":
|
||||
aggrStates[i] = newMinAggrState()
|
||||
case "max":
|
||||
aggrStates[i] = newMaxAggrState()
|
||||
case "avg":
|
||||
aggrStates[i] = newAvgAggrState()
|
||||
case "stddev":
|
||||
aggrStates[i] = newStddevAggrState()
|
||||
case "stdvar":
|
||||
aggrStates[i] = newStdvarAggrState()
|
||||
case "histogram_bucket":
|
||||
aggrStates[i] = newHistogramBucketAggrState(interval)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported output=%q; supported values: %s; "+
|
||||
"see https://docs.victoriametrics.com/vmagent.html#stream-aggregation", output, supportedOutputs)
|
||||
}
|
||||
}
|
||||
|
||||
// initialize suffix to add to metric names after aggregation
|
||||
suffix := ":" + cfg.Interval
|
||||
if labels := removeUnderscoreName(by); len(labels) > 0 {
|
||||
suffix += fmt.Sprintf("_by_%s", strings.Join(labels, "_"))
|
||||
}
|
||||
if labels := removeUnderscoreName(without); len(labels) > 0 {
|
||||
suffix += fmt.Sprintf("_without_%s", strings.Join(labels, "_"))
|
||||
}
|
||||
suffix += "_"
|
||||
|
||||
// initialize the aggregator
|
||||
a := &aggregator{
|
||||
match: cfg.Match,
|
||||
|
||||
inputRelabeling: inputRelabeling,
|
||||
outputRelabeling: outputRelabeling,
|
||||
|
||||
by: by,
|
||||
without: without,
|
||||
aggregateOnlyByTime: aggregateOnlyByTime,
|
||||
|
||||
aggrStates: aggrStates,
|
||||
pushFunc: pushFunc,
|
||||
|
||||
suffix: suffix,
|
||||
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
a.wg.Add(1)
|
||||
go func() {
|
||||
a.runFlusher(interval)
|
||||
defer a.wg.Done()
|
||||
}()
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func (a *aggregator) runFlusher(interval time.Duration) {
|
||||
t := time.NewTicker(interval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-a.stopCh:
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
a.flush()
|
||||
}
|
||||
}
|
||||
|
||||
func (a *aggregator) flush() {
|
||||
ctx := &flushCtx{
|
||||
suffix: a.suffix,
|
||||
}
|
||||
for _, as := range a.aggrStates {
|
||||
ctx.reset()
|
||||
as.appendSeriesForFlush(ctx)
|
||||
|
||||
tss := ctx.tss
|
||||
|
||||
// Apply output relabeling
|
||||
if a.outputRelabeling != nil {
|
||||
dst := tss[:0]
|
||||
for _, ts := range tss {
|
||||
ts.Labels = a.outputRelabeling.Apply(ts.Labels, 0)
|
||||
if len(ts.Labels) == 0 {
|
||||
// The metric has been deleted by the relabeling
|
||||
continue
|
||||
}
|
||||
dst = append(dst, ts)
|
||||
}
|
||||
tss = dst
|
||||
}
|
||||
|
||||
// Push the output metrics
|
||||
a.pushFunc(tss)
|
||||
}
|
||||
}
|
||||
|
||||
// MustStop stops the aggregator.
|
||||
//
|
||||
// The aggregator stops pushing the aggregated metrics after this call.
|
||||
func (a *aggregator) MustStop() {
|
||||
close(a.stopCh)
|
||||
a.wg.Wait()
|
||||
}
|
||||
|
||||
// Push pushes series to a.
|
||||
func (a *aggregator) Push(tss []prompbmarshal.TimeSeries) {
|
||||
labels := promutils.GetLabels()
|
||||
tmpLabels := promutils.GetLabels()
|
||||
bb := bbPool.Get()
|
||||
for _, ts := range tss {
|
||||
if !a.match.Match(ts.Labels) {
|
||||
continue
|
||||
}
|
||||
|
||||
labels.Labels = append(labels.Labels[:0], ts.Labels...)
|
||||
labels.Labels = a.inputRelabeling.Apply(labels.Labels, 0)
|
||||
if len(labels.Labels) == 0 {
|
||||
// The metric has been deleted by the relabeling
|
||||
continue
|
||||
}
|
||||
labels.Sort()
|
||||
|
||||
if a.aggregateOnlyByTime {
|
||||
bb.B = marshalLabelsFast(bb.B[:0], labels.Labels)
|
||||
} else {
|
||||
tmpLabels.Labels = removeUnneededLabels(tmpLabels.Labels[:0], labels.Labels, a.by, a.without)
|
||||
bb.B = marshalLabelsFast(bb.B[:0], tmpLabels.Labels)
|
||||
}
|
||||
outputKey := bytesutil.InternBytes(bb.B)
|
||||
inputKey := ""
|
||||
if !a.aggregateOnlyByTime {
|
||||
tmpLabels.Labels = extractUnneededLabels(tmpLabels.Labels[:0], labels.Labels, a.by, a.without)
|
||||
bb.B = marshalLabelsFast(bb.B[:0], tmpLabels.Labels)
|
||||
inputKey = bytesutil.InternBytes(bb.B)
|
||||
}
|
||||
|
||||
for _, sample := range ts.Samples {
|
||||
a.pushSample(inputKey, outputKey, sample.Value)
|
||||
}
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
promutils.PutLabels(tmpLabels)
|
||||
promutils.PutLabels(labels)
|
||||
}
|
||||
|
||||
var bbPool bytesutil.ByteBufferPool
|
||||
|
||||
func (a *aggregator) pushSample(inputKey, outputKey string, value float64) {
|
||||
if math.IsNaN(value) {
|
||||
// Skip nan samples
|
||||
return
|
||||
}
|
||||
for _, as := range a.aggrStates {
|
||||
as.pushSample(inputKey, outputKey, value)
|
||||
}
|
||||
}
|
||||
|
||||
func extractUnneededLabels(dst, labels []prompbmarshal.Label, by, without []string) []prompbmarshal.Label {
|
||||
if len(without) > 0 {
|
||||
for _, label := range labels {
|
||||
if hasInArray(label.Name, without) {
|
||||
dst = append(dst, label)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, label := range labels {
|
||||
if !hasInArray(label.Name, by) {
|
||||
dst = append(dst, label)
|
||||
}
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func removeUnneededLabels(dst, labels []prompbmarshal.Label, by, without []string) []prompbmarshal.Label {
|
||||
if len(without) > 0 {
|
||||
for _, label := range labels {
|
||||
if !hasInArray(label.Name, without) {
|
||||
dst = append(dst, label)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, label := range labels {
|
||||
if hasInArray(label.Name, by) {
|
||||
dst = append(dst, label)
|
||||
}
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func hasInArray(name string, a []string) bool {
|
||||
for _, s := range a {
|
||||
if name == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func marshalLabelsFast(dst []byte, labels []prompbmarshal.Label) []byte {
|
||||
dst = encoding.MarshalUint32(dst, uint32(len(labels)))
|
||||
for _, label := range labels {
|
||||
dst = encoding.MarshalUint32(dst, uint32(len(label.Name)))
|
||||
dst = append(dst, label.Name...)
|
||||
dst = encoding.MarshalUint32(dst, uint32(len(label.Value)))
|
||||
dst = append(dst, label.Value...)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
func unmarshalLabelsFast(dst []prompbmarshal.Label, src []byte) ([]prompbmarshal.Label, error) {
|
||||
if len(src) < 4 {
|
||||
return dst, fmt.Errorf("cannot unmarshal labels count from %d bytes; needs at least 4 bytes", len(src))
|
||||
}
|
||||
n := encoding.UnmarshalUint32(src)
|
||||
src = src[4:]
|
||||
for i := uint32(0); i < n; i++ {
|
||||
// Unmarshal label name
|
||||
if len(src) < 4 {
|
||||
return dst, fmt.Errorf("cannot unmarshal label name length from %d bytes; needs at least 4 bytes", len(src))
|
||||
}
|
||||
labelNameLen := encoding.UnmarshalUint32(src)
|
||||
src = src[4:]
|
||||
if uint32(len(src)) < labelNameLen {
|
||||
return dst, fmt.Errorf("cannot unmarshal label name from %d bytes; needs at least %d bytes", len(src), labelNameLen)
|
||||
}
|
||||
labelName := bytesutil.InternBytes(src[:labelNameLen])
|
||||
src = src[labelNameLen:]
|
||||
|
||||
// Unmarshal label value
|
||||
if len(src) < 4 {
|
||||
return dst, fmt.Errorf("cannot unmarshal label value length from %d bytes; needs at least 4 bytes", len(src))
|
||||
}
|
||||
labelValueLen := encoding.UnmarshalUint32(src)
|
||||
src = src[4:]
|
||||
if uint32(len(src)) < labelValueLen {
|
||||
return dst, fmt.Errorf("cannot unmarshal label value from %d bytes; needs at least %d bytes", len(src), labelValueLen)
|
||||
}
|
||||
labelValue := bytesutil.InternBytes(src[:labelValueLen])
|
||||
src = src[labelValueLen:]
|
||||
|
||||
dst = append(dst, prompbmarshal.Label{
|
||||
Name: labelName,
|
||||
Value: labelValue,
|
||||
})
|
||||
}
|
||||
if len(src) > 0 {
|
||||
return dst, fmt.Errorf("unexpected non-empty tail after unmarshaling labels; tail length is %d bytes", len(src))
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
type flushCtx struct {
|
||||
suffix string
|
||||
|
||||
tss []prompbmarshal.TimeSeries
|
||||
labels []prompbmarshal.Label
|
||||
samples []prompbmarshal.Sample
|
||||
}
|
||||
|
||||
func (ctx *flushCtx) reset() {
|
||||
ctx.tss = prompbmarshal.ResetTimeSeries(ctx.tss)
|
||||
promrelabel.CleanLabels(ctx.labels)
|
||||
ctx.labels = ctx.labels[:0]
|
||||
ctx.samples = ctx.samples[:0]
|
||||
}
|
||||
|
||||
func (ctx *flushCtx) appendSeries(labelsMarshaled, suffix string, timestamp int64, value float64) {
|
||||
var err error
|
||||
labelsLen := len(ctx.labels)
|
||||
samplesLen := len(ctx.samples)
|
||||
ctx.labels, err = unmarshalLabelsFast(ctx.labels, bytesutil.ToUnsafeBytes(labelsMarshaled))
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot unmarshal labels from output key: %s", err)
|
||||
}
|
||||
ctx.labels = addMetricSuffix(ctx.labels, labelsLen, ctx.suffix, suffix)
|
||||
ctx.samples = append(ctx.samples, prompbmarshal.Sample{
|
||||
Timestamp: timestamp,
|
||||
Value: value,
|
||||
})
|
||||
ctx.tss = append(ctx.tss, prompbmarshal.TimeSeries{
|
||||
Labels: ctx.labels[labelsLen:],
|
||||
Samples: ctx.samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
|
||||
func (ctx *flushCtx) appendSeriesWithExtraLabel(labelsMarshaled, suffix string, timestamp int64, value float64, extraName, extraValue string) {
|
||||
var err error
|
||||
labelsLen := len(ctx.labels)
|
||||
samplesLen := len(ctx.samples)
|
||||
ctx.labels, err = unmarshalLabelsFast(ctx.labels, bytesutil.ToUnsafeBytes(labelsMarshaled))
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot unmarshal labels from output key: %s", err)
|
||||
}
|
||||
ctx.labels = addMetricSuffix(ctx.labels, labelsLen, ctx.suffix, suffix)
|
||||
ctx.labels = append(ctx.labels, prompbmarshal.Label{
|
||||
Name: extraName,
|
||||
Value: extraValue,
|
||||
})
|
||||
ctx.samples = append(ctx.samples, prompbmarshal.Sample{
|
||||
Timestamp: timestamp,
|
||||
Value: value,
|
||||
})
|
||||
ctx.tss = append(ctx.tss, prompbmarshal.TimeSeries{
|
||||
Labels: ctx.labels[labelsLen:],
|
||||
Samples: ctx.samples[samplesLen:],
|
||||
})
|
||||
}
|
||||
|
||||
func addMetricSuffix(labels []prompbmarshal.Label, offset int, firstSuffix, lastSuffix string) []prompbmarshal.Label {
|
||||
src := labels[offset:]
|
||||
for i := range src {
|
||||
label := &src[i]
|
||||
if label.Name != "__name__" {
|
||||
continue
|
||||
}
|
||||
bb := bbPool.Get()
|
||||
bb.B = append(bb.B, label.Value...)
|
||||
bb.B = append(bb.B, firstSuffix...)
|
||||
bb.B = append(bb.B, lastSuffix...)
|
||||
label.Value = bytesutil.InternBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
return labels
|
||||
}
|
||||
// The __name__ isn't found. Add it
|
||||
bb := bbPool.Get()
|
||||
bb.B = append(bb.B, firstSuffix...)
|
||||
bb.B = append(bb.B, lastSuffix...)
|
||||
labelValue := bytesutil.InternBytes(bb.B)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: labelValue,
|
||||
})
|
||||
return labels
|
||||
}
|
||||
|
||||
func addMissingUnderscoreName(labels []string) []string {
|
||||
result := []string{"__name__"}
|
||||
for _, s := range labels {
|
||||
if s == "__name__" {
|
||||
continue
|
||||
}
|
||||
result = append(result, s)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func removeUnderscoreName(labels []string) []string {
|
||||
var result []string
|
||||
for _, s := range labels {
|
||||
if s == "__name__" {
|
||||
continue
|
||||
}
|
||||
result = append(result, s)
|
||||
}
|
||||
return result
|
||||
}
|
662
lib/streamaggr/streamaggr_test.go
Normal file
662
lib/streamaggr/streamaggr_test.go
Normal file
|
@ -0,0 +1,662 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
)
|
||||
|
||||
func TestAggregatorsFailure(t *testing.T) {
|
||||
f := func(config string) {
|
||||
t.Helper()
|
||||
pushFunc := func(tss []prompbmarshal.TimeSeries) {
|
||||
panic(fmt.Errorf("pushFunc shouldn't be called"))
|
||||
}
|
||||
a, err := NewAggregatorsFromData([]byte(config), pushFunc)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if a != nil {
|
||||
t.Fatalf("expecting nil a")
|
||||
}
|
||||
}
|
||||
|
||||
// Invalid config
|
||||
f(`foobar`)
|
||||
|
||||
// Unknown option
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [total]
|
||||
foobar: baz
|
||||
`)
|
||||
|
||||
// missing interval
|
||||
f(`
|
||||
- outputs: [total]
|
||||
`)
|
||||
|
||||
// missing outputs
|
||||
f(`
|
||||
- interval: 1m
|
||||
`)
|
||||
|
||||
// Invalid output
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [foobar]
|
||||
`)
|
||||
|
||||
// Negative interval
|
||||
f(`- interval: -5m`)
|
||||
// Too small interval
|
||||
f(`- interval: 10ms`)
|
||||
|
||||
// Invalid input_relabel_configs
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [total]
|
||||
input_relabel_configs:
|
||||
- foo: bar
|
||||
`)
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [total]
|
||||
input_relabel_configs:
|
||||
- action: replace
|
||||
`)
|
||||
|
||||
// Invalid output_relabel_configs
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [total]
|
||||
output_relabel_configs:
|
||||
- foo: bar
|
||||
`)
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [total]
|
||||
output_relabel_configs:
|
||||
- action: replace
|
||||
`)
|
||||
|
||||
// Both by and without are non-empty
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [total]
|
||||
by: [foo]
|
||||
without: [bar]
|
||||
`)
|
||||
|
||||
// Invalid quantiles()
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: ["quantiles("]
|
||||
`)
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: ["quantiles()"]
|
||||
`)
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: ["quantiles(foo)"]
|
||||
`)
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: ["quantiles(-0.5)"]
|
||||
`)
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: ["quantiles(1.5)"]
|
||||
`)
|
||||
}
|
||||
|
||||
func TestAggregatorsSuccess(t *testing.T) {
|
||||
f := func(config, inputMetrics, outputMetricsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
// Initialize Aggregators
|
||||
var tssOutput []prompbmarshal.TimeSeries
|
||||
var tssOutputLock sync.Mutex
|
||||
pushFunc := func(tss []prompbmarshal.TimeSeries) {
|
||||
tssOutputLock.Lock()
|
||||
for _, ts := range tss {
|
||||
labelsCopy := append([]prompbmarshal.Label{}, ts.Labels...)
|
||||
samplesCopy := append([]prompbmarshal.Sample{}, ts.Samples...)
|
||||
tssOutput = append(tssOutput, prompbmarshal.TimeSeries{
|
||||
Labels: labelsCopy,
|
||||
Samples: samplesCopy,
|
||||
})
|
||||
}
|
||||
tssOutputLock.Unlock()
|
||||
}
|
||||
a, err := NewAggregatorsFromData([]byte(config), pushFunc)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot initialize aggregators: %s", err)
|
||||
}
|
||||
|
||||
// Push the inputMetrics to Aggregators
|
||||
tssInput := mustParsePromMetrics(inputMetrics)
|
||||
a.Push(tssInput)
|
||||
if a != nil {
|
||||
for _, aggr := range a.as {
|
||||
aggr.flush()
|
||||
}
|
||||
}
|
||||
a.MustStop()
|
||||
|
||||
// Verify the tssOutput contains the expected metrics
|
||||
tsStrings := make([]string, len(tssOutput))
|
||||
for i, ts := range tssOutput {
|
||||
tsStrings[i] = timeSeriesToString(ts)
|
||||
}
|
||||
sort.Strings(tsStrings)
|
||||
outputMetrics := strings.Join(tsStrings, "")
|
||||
if outputMetrics != outputMetricsExpected {
|
||||
t.Fatalf("unexpected output metrics;\ngot\n%s\nwant\n%s", outputMetrics, outputMetricsExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// Empty config
|
||||
f(``, ``, ``)
|
||||
f(``, `foo{bar="baz"} 1`, ``)
|
||||
f(``, "foo 1\nbaz 2", ``)
|
||||
|
||||
// Empty by list - aggregate only by time
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [count_samples, sum_samples, count_series, last]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_count_samples 1
|
||||
bar:1m_count_series 1
|
||||
bar:1m_last 5
|
||||
bar:1m_sum_samples 5
|
||||
foo:1m_count_samples{abc="123"} 2
|
||||
foo:1m_count_samples{abc="456",de="fg"} 1
|
||||
foo:1m_count_series{abc="123"} 1
|
||||
foo:1m_count_series{abc="456",de="fg"} 1
|
||||
foo:1m_last{abc="123"} 8.5
|
||||
foo:1m_last{abc="456",de="fg"} 8
|
||||
foo:1m_sum_samples{abc="123"} 12.5
|
||||
foo:1m_sum_samples{abc="456",de="fg"} 8
|
||||
`)
|
||||
|
||||
// Special case: __name__ in by list
|
||||
f(`
|
||||
- interval: 1m
|
||||
by: [__name__]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_count_samples 1
|
||||
bar:1m_count_series 1
|
||||
bar:1m_sum_samples 5
|
||||
foo:1m_count_samples 3
|
||||
foo:1m_count_series 2
|
||||
foo:1m_sum_samples 20.5
|
||||
`)
|
||||
|
||||
// Non-empty by list with non-existing labels
|
||||
f(`
|
||||
- interval: 1m
|
||||
by: [foo, bar]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_by_foo_bar_count_samples 1
|
||||
bar:1m_by_foo_bar_count_series 1
|
||||
bar:1m_by_foo_bar_sum_samples 5
|
||||
foo:1m_by_foo_bar_count_samples 3
|
||||
foo:1m_by_foo_bar_count_series 2
|
||||
foo:1m_by_foo_bar_sum_samples 20.5
|
||||
`)
|
||||
|
||||
// Non-empty by list with existing label
|
||||
f(`
|
||||
- interval: 1m
|
||||
by: [abc]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_by_abc_count_samples 1
|
||||
bar:1m_by_abc_count_series 1
|
||||
bar:1m_by_abc_sum_samples 5
|
||||
foo:1m_by_abc_count_samples{abc="123"} 2
|
||||
foo:1m_by_abc_count_samples{abc="456"} 1
|
||||
foo:1m_by_abc_count_series{abc="123"} 1
|
||||
foo:1m_by_abc_count_series{abc="456"} 1
|
||||
foo:1m_by_abc_sum_samples{abc="123"} 12.5
|
||||
foo:1m_by_abc_sum_samples{abc="456"} 8
|
||||
`)
|
||||
|
||||
// Non-empty without list with non-existing labels
|
||||
f(`
|
||||
- interval: 1m
|
||||
without: [foo]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_without_foo_count_samples 1
|
||||
bar:1m_without_foo_count_series 1
|
||||
bar:1m_without_foo_sum_samples 5
|
||||
foo:1m_without_foo_count_samples{abc="123"} 2
|
||||
foo:1m_without_foo_count_samples{abc="456",de="fg"} 1
|
||||
foo:1m_without_foo_count_series{abc="123"} 1
|
||||
foo:1m_without_foo_count_series{abc="456",de="fg"} 1
|
||||
foo:1m_without_foo_sum_samples{abc="123"} 12.5
|
||||
foo:1m_without_foo_sum_samples{abc="456",de="fg"} 8
|
||||
`)
|
||||
|
||||
// Non-empty without list with existing labels
|
||||
f(`
|
||||
- interval: 1m
|
||||
without: [abc]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_without_abc_count_samples 1
|
||||
bar:1m_without_abc_count_series 1
|
||||
bar:1m_without_abc_sum_samples 5
|
||||
foo:1m_without_abc_count_samples 2
|
||||
foo:1m_without_abc_count_samples{de="fg"} 1
|
||||
foo:1m_without_abc_count_series 1
|
||||
foo:1m_without_abc_count_series{de="fg"} 1
|
||||
foo:1m_without_abc_sum_samples 12.5
|
||||
foo:1m_without_abc_sum_samples{de="fg"} 8
|
||||
`)
|
||||
|
||||
// Special case: __name__ in without list
|
||||
f(`
|
||||
- interval: 1m
|
||||
without: [__name__]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `:1m_count_samples 1
|
||||
:1m_count_samples{abc="123"} 2
|
||||
:1m_count_samples{abc="456",de="fg"} 1
|
||||
:1m_count_series 1
|
||||
:1m_count_series{abc="123"} 1
|
||||
:1m_count_series{abc="456",de="fg"} 1
|
||||
:1m_sum_samples 5
|
||||
:1m_sum_samples{abc="123"} 12.5
|
||||
:1m_sum_samples{abc="456",de="fg"} 8
|
||||
`)
|
||||
|
||||
// drop some input metrics
|
||||
f(`
|
||||
- interval: 1m
|
||||
without: [abc]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
input_relabel_configs:
|
||||
- if: 'foo'
|
||||
action: drop
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_without_abc_count_samples 1
|
||||
bar:1m_without_abc_count_series 1
|
||||
bar:1m_without_abc_sum_samples 5
|
||||
`)
|
||||
|
||||
// rename output metrics
|
||||
f(`
|
||||
- interval: 1m
|
||||
without: [abc]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
output_relabel_configs:
|
||||
- action: replace_all
|
||||
source_labels: [__name__]
|
||||
regex: ":|_"
|
||||
replacement: "-"
|
||||
target_label: __name__
|
||||
- action: drop
|
||||
source_labels: [de]
|
||||
regex: fg
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar-1m-without-abc-count-samples 1
|
||||
bar-1m-without-abc-count-series 1
|
||||
bar-1m-without-abc-sum-samples 5
|
||||
foo-1m-without-abc-count-samples 2
|
||||
foo-1m-without-abc-count-series 1
|
||||
foo-1m-without-abc-sum-samples 12.5
|
||||
`)
|
||||
|
||||
// match doesn't match anything
|
||||
f(`
|
||||
- interval: 1m
|
||||
without: [abc]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
match: '{non_existing_label!=""}'
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, ``)
|
||||
|
||||
// match matches foo series with non-empty abc label
|
||||
f(`
|
||||
- interval: 1m
|
||||
by: [abc]
|
||||
outputs: [count_samples, sum_samples, count_series]
|
||||
match: 'foo{abc=~".+"}'
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `foo:1m_by_abc_count_samples{abc="123"} 2
|
||||
foo:1m_by_abc_count_samples{abc="456"} 1
|
||||
foo:1m_by_abc_count_series{abc="123"} 1
|
||||
foo:1m_by_abc_count_series{abc="456"} 1
|
||||
foo:1m_by_abc_sum_samples{abc="123"} 12.5
|
||||
foo:1m_by_abc_sum_samples{abc="456"} 8
|
||||
`)
|
||||
|
||||
// total output for non-repeated series
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [total]
|
||||
`, `
|
||||
foo 123
|
||||
bar{baz="qwe"} 4.34
|
||||
`, `bar:1m_total{baz="qwe"} 0
|
||||
foo:1m_total 0
|
||||
`)
|
||||
|
||||
// total output for repeated series
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [total]
|
||||
`, `
|
||||
foo 123
|
||||
bar{baz="qwe"} 1.32
|
||||
bar{baz="qwe"} 4.34
|
||||
bar{baz="qwe"} 2
|
||||
foo{baz="qwe"} -5
|
||||
bar{baz="qwer"} 343
|
||||
bar{baz="qwer"} 344
|
||||
foo{baz="qwe"} 10
|
||||
`, `bar:1m_total{baz="qwe"} 5.02
|
||||
bar:1m_total{baz="qwer"} 1
|
||||
foo:1m_total 0
|
||||
foo:1m_total{baz="qwe"} 15
|
||||
`)
|
||||
|
||||
// total output for repeated series with group by __name__
|
||||
f(`
|
||||
- interval: 1m
|
||||
by: [__name__]
|
||||
outputs: [total]
|
||||
`, `
|
||||
foo 123
|
||||
bar{baz="qwe"} 1.32
|
||||
bar{baz="qwe"} 4.34
|
||||
bar{baz="qwe"} 2
|
||||
foo{baz="qwe"} -5
|
||||
bar{baz="qwer"} 343
|
||||
bar{baz="qwer"} 344
|
||||
foo{baz="qwe"} 10
|
||||
`, `bar:1m_total 6.02
|
||||
foo:1m_total 15
|
||||
`)
|
||||
|
||||
// increase output for non-repeated series
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [increase]
|
||||
`, `
|
||||
foo 123
|
||||
bar{baz="qwe"} 4.34
|
||||
`, `bar:1m_increase{baz="qwe"} 0
|
||||
foo:1m_increase 0
|
||||
`)
|
||||
|
||||
// increase output for repeated series
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [increase]
|
||||
`, `
|
||||
foo 123
|
||||
bar{baz="qwe"} 1.32
|
||||
bar{baz="qwe"} 4.34
|
||||
bar{baz="qwe"} 2
|
||||
foo{baz="qwe"} -5
|
||||
bar{baz="qwer"} 343
|
||||
bar{baz="qwer"} 344
|
||||
foo{baz="qwe"} 10
|
||||
`, `bar:1m_increase{baz="qwe"} 5.02
|
||||
bar:1m_increase{baz="qwer"} 1
|
||||
foo:1m_increase 0
|
||||
foo:1m_increase{baz="qwe"} 15
|
||||
`)
|
||||
|
||||
// multiple aggregate configs
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [count_series, sum_samples]
|
||||
- interval: 5m
|
||||
by: [bar]
|
||||
outputs: [sum_samples]
|
||||
`, `
|
||||
foo 1
|
||||
foo{bar="baz"} 2
|
||||
foo 3.3
|
||||
`, `foo:1m_count_series 1
|
||||
foo:1m_count_series{bar="baz"} 1
|
||||
foo:1m_sum_samples 4.3
|
||||
foo:1m_sum_samples{bar="baz"} 2
|
||||
foo:5m_by_bar_sum_samples 4.3
|
||||
foo:5m_by_bar_sum_samples{bar="baz"} 2
|
||||
`)
|
||||
|
||||
// min and max outputs
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [min, max]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_max 5
|
||||
bar:1m_min 5
|
||||
foo:1m_max{abc="123"} 8.5
|
||||
foo:1m_max{abc="456",de="fg"} 8
|
||||
foo:1m_min{abc="123"} 4
|
||||
foo:1m_min{abc="456",de="fg"} 8
|
||||
`)
|
||||
|
||||
// avg output
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [avg]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_avg 5
|
||||
foo:1m_avg{abc="123"} 6.25
|
||||
foo:1m_avg{abc="456",de="fg"} 8
|
||||
`)
|
||||
|
||||
// stddev output
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [stddev]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_stddev 0
|
||||
foo:1m_stddev{abc="123"} 2.25
|
||||
foo:1m_stddev{abc="456",de="fg"} 0
|
||||
`)
|
||||
|
||||
// stdvar output
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [stdvar]
|
||||
`, `
|
||||
foo{abc="123"} 4
|
||||
bar 5
|
||||
foo{abc="123"} 8.5
|
||||
foo{abc="456",de="fg"} 8
|
||||
`, `bar:1m_stdvar 0
|
||||
foo:1m_stdvar{abc="123"} 5.0625
|
||||
foo:1m_stdvar{abc="456",de="fg"} 0
|
||||
`)
|
||||
|
||||
// histogram_bucket output
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [histogram_bucket]
|
||||
`, `
|
||||
cpu_usage{cpu="1"} 12.5
|
||||
cpu_usage{cpu="1"} 13.3
|
||||
cpu_usage{cpu="1"} 13
|
||||
cpu_usage{cpu="1"} 12
|
||||
cpu_usage{cpu="1"} 14
|
||||
cpu_usage{cpu="1"} 25
|
||||
cpu_usage{cpu="2"} 90
|
||||
`, `cpu_usage:1m_histogram_bucket{cpu="1",vmrange="1.136e+01...1.292e+01"} 2
|
||||
cpu_usage:1m_histogram_bucket{cpu="1",vmrange="1.292e+01...1.468e+01"} 3
|
||||
cpu_usage:1m_histogram_bucket{cpu="1",vmrange="2.448e+01...2.783e+01"} 1
|
||||
cpu_usage:1m_histogram_bucket{cpu="2",vmrange="8.799e+01...1.000e+02"} 1
|
||||
`)
|
||||
|
||||
// histogram_bucket output without cpu
|
||||
f(`
|
||||
- interval: 1m
|
||||
without: [cpu]
|
||||
outputs: [histogram_bucket]
|
||||
`, `
|
||||
cpu_usage{cpu="1"} 12.5
|
||||
cpu_usage{cpu="1"} 13.3
|
||||
cpu_usage{cpu="1"} 13
|
||||
cpu_usage{cpu="1"} 12
|
||||
cpu_usage{cpu="1"} 14
|
||||
cpu_usage{cpu="1"} 25
|
||||
cpu_usage{cpu="2"} 90
|
||||
`, `cpu_usage:1m_without_cpu_histogram_bucket{vmrange="1.136e+01...1.292e+01"} 2
|
||||
cpu_usage:1m_without_cpu_histogram_bucket{vmrange="1.292e+01...1.468e+01"} 3
|
||||
cpu_usage:1m_without_cpu_histogram_bucket{vmrange="2.448e+01...2.783e+01"} 1
|
||||
cpu_usage:1m_without_cpu_histogram_bucket{vmrange="8.799e+01...1.000e+02"} 1
|
||||
`)
|
||||
|
||||
// quantiles output
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: ["quantiles(0, 0.5, 1)"]
|
||||
`, `
|
||||
cpu_usage{cpu="1"} 12.5
|
||||
cpu_usage{cpu="1"} 13.3
|
||||
cpu_usage{cpu="1"} 13
|
||||
cpu_usage{cpu="1"} 12
|
||||
cpu_usage{cpu="1"} 14
|
||||
cpu_usage{cpu="1"} 25
|
||||
cpu_usage{cpu="2"} 90
|
||||
`, `cpu_usage:1m_quantiles{cpu="1",quantile="0"} 12
|
||||
cpu_usage:1m_quantiles{cpu="1",quantile="0.5"} 13.3
|
||||
cpu_usage:1m_quantiles{cpu="1",quantile="1"} 25
|
||||
cpu_usage:1m_quantiles{cpu="2",quantile="0"} 90
|
||||
cpu_usage:1m_quantiles{cpu="2",quantile="0.5"} 90
|
||||
cpu_usage:1m_quantiles{cpu="2",quantile="1"} 90
|
||||
`)
|
||||
|
||||
// quantiles output without cpu
|
||||
f(`
|
||||
- interval: 1m
|
||||
without: [cpu]
|
||||
outputs: ["quantiles(0, 0.5, 1)"]
|
||||
`, `
|
||||
cpu_usage{cpu="1"} 12.5
|
||||
cpu_usage{cpu="1"} 13.3
|
||||
cpu_usage{cpu="1"} 13
|
||||
cpu_usage{cpu="1"} 12
|
||||
cpu_usage{cpu="1"} 14
|
||||
cpu_usage{cpu="1"} 25
|
||||
cpu_usage{cpu="2"} 90
|
||||
`, `cpu_usage:1m_without_cpu_quantiles{quantile="0"} 12
|
||||
cpu_usage:1m_without_cpu_quantiles{quantile="0.5"} 13.3
|
||||
cpu_usage:1m_without_cpu_quantiles{quantile="1"} 90
|
||||
`)
|
||||
}
|
||||
|
||||
func timeSeriesToString(ts prompbmarshal.TimeSeries) string {
|
||||
labelsString := promrelabel.LabelsToString(ts.Labels)
|
||||
if len(ts.Samples) != 1 {
|
||||
panic(fmt.Errorf("unexpected number of samples for %s: %d; want 1", labelsString, len(ts.Samples)))
|
||||
}
|
||||
return fmt.Sprintf("%s %v\n", labelsString, ts.Samples[0].Value)
|
||||
}
|
||||
|
||||
func mustParsePromMetrics(s string) []prompbmarshal.TimeSeries {
|
||||
var rows prometheus.Rows
|
||||
errLogger := func(s string) {
|
||||
panic(fmt.Errorf("unexpected error when parsing Prometheus metrics: %s", s))
|
||||
}
|
||||
rows.UnmarshalWithErrLogger(s, errLogger)
|
||||
var tss []prompbmarshal.TimeSeries
|
||||
samples := make([]prompbmarshal.Sample, 0, len(rows.Rows))
|
||||
for _, row := range rows.Rows {
|
||||
labels := make([]prompbmarshal.Label, 0, len(row.Tags)+1)
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "__name__",
|
||||
Value: row.Metric,
|
||||
})
|
||||
for _, tag := range row.Tags {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: tag.Key,
|
||||
Value: tag.Value,
|
||||
})
|
||||
}
|
||||
samples = append(samples, prompbmarshal.Sample{
|
||||
Value: row.Value,
|
||||
Timestamp: row.Timestamp,
|
||||
})
|
||||
ts := prompbmarshal.TimeSeries{
|
||||
Labels: labels,
|
||||
Samples: samples[len(samples)-1:],
|
||||
}
|
||||
tss = append(tss, ts)
|
||||
}
|
||||
return tss
|
||||
}
|
73
lib/streamaggr/streamaggr_timing_test.go
Normal file
73
lib/streamaggr/streamaggr_timing_test.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func BenchmarkAggregatorsPushByJobAvg(b *testing.B) {
|
||||
for _, output := range []string{
|
||||
"total",
|
||||
"increase",
|
||||
"count_series",
|
||||
"count_samples",
|
||||
"sum_samples",
|
||||
"last",
|
||||
"min",
|
||||
"max",
|
||||
"avg",
|
||||
"stddev",
|
||||
"stdvar",
|
||||
"histogram_bucket",
|
||||
"quantiles(0, 0.5, 1)",
|
||||
} {
|
||||
b.Run(fmt.Sprintf("output=%s", output), func(b *testing.B) {
|
||||
benchmarkAggregatorsPush(b, output)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkAggregatorsPush(b *testing.B, output string) {
|
||||
config := fmt.Sprintf(`
|
||||
- match: http_requests_total
|
||||
interval: 24h
|
||||
without: [job]
|
||||
outputs: [%q]
|
||||
`, output)
|
||||
pushFunc := func(tss []prompbmarshal.TimeSeries) {
|
||||
panic(fmt.Errorf("unexpected pushFunc call"))
|
||||
}
|
||||
a, err := NewAggregatorsFromData([]byte(config), pushFunc)
|
||||
if err != nil {
|
||||
b.Fatalf("unexpected error when initializing aggregators: %s", err)
|
||||
}
|
||||
defer a.MustStop()
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(len(benchSeries)))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
a.Push(benchSeries)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func newBenchSeries(seriesCount, samplesPerSeries int) []prompbmarshal.TimeSeries {
|
||||
a := make([]string, seriesCount*samplesPerSeries)
|
||||
for i := 0; i < samplesPerSeries; i++ {
|
||||
for j := 0; j < seriesCount; j++ {
|
||||
s := fmt.Sprintf(`http_requests_total{path="/foo/%d",job="foo",instance="bar"} %d`, j, i*10)
|
||||
a = append(a, s)
|
||||
}
|
||||
}
|
||||
metrics := strings.Join(a, "\n")
|
||||
return mustParsePromMetrics(metrics)
|
||||
}
|
||||
|
||||
const seriesCount = 10000
|
||||
const samplesPerSeries = 10
|
||||
|
||||
var benchSeries = newBenchSeries(seriesCount, samplesPerSeries)
|
71
lib/streamaggr/sum_samples.go
Normal file
71
lib/streamaggr/sum_samples.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// sumSamplesAggrState calculates output=sum_samples, e.g. the sum over input samples.
|
||||
type sumSamplesAggrState struct {
|
||||
m sync.Map
|
||||
}
|
||||
|
||||
type sumSamplesStateValue struct {
|
||||
mu sync.Mutex
|
||||
sum float64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func newSumSamplesAggrState() *sumSamplesAggrState {
|
||||
return &sumSamplesAggrState{}
|
||||
}
|
||||
|
||||
func (as *sumSamplesAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &sumSamplesStateValue{
|
||||
sum: value,
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if !loaded {
|
||||
// The new entry has been successfully created.
|
||||
return
|
||||
}
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
sv := v.(*sumSamplesStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
sv.sum += value
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *sumSamplesAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
// Atomically delete the entry from the map, so new entry is created for the next flush.
|
||||
m.Delete(k)
|
||||
|
||||
sv := v.(*sumSamplesStateValue)
|
||||
sv.mu.Lock()
|
||||
sum := sv.sum
|
||||
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
|
||||
sv.deleted = true
|
||||
sv.mu.Unlock()
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "sum_samples", currentTimeMsec, sum)
|
||||
return true
|
||||
})
|
||||
}
|
137
lib/streamaggr/total.go
Normal file
137
lib/streamaggr/total.go
Normal file
|
@ -0,0 +1,137 @@
|
|||
package streamaggr
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
// totalAggrState calculates output=total, e.g. the summary counter over input counters.
|
||||
type totalAggrState struct {
|
||||
m sync.Map
|
||||
|
||||
ignoreInputDeadline uint64
|
||||
intervalSecs uint64
|
||||
}
|
||||
|
||||
type totalStateValue struct {
|
||||
mu sync.Mutex
|
||||
lastValues map[string]*lastValueState
|
||||
total float64
|
||||
deleteDeadline uint64
|
||||
deleted bool
|
||||
}
|
||||
|
||||
type lastValueState struct {
|
||||
value float64
|
||||
deleteDeadline uint64
|
||||
}
|
||||
|
||||
func newTotalAggrState(interval time.Duration) *totalAggrState {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
intervalSecs := uint64(interval.Seconds() + 1)
|
||||
return &totalAggrState{
|
||||
ignoreInputDeadline: currentTime + intervalSecs,
|
||||
intervalSecs: intervalSecs,
|
||||
}
|
||||
}
|
||||
|
||||
func (as *totalAggrState) pushSample(inputKey, outputKey string, value float64) {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
deleteDeadline := currentTime + as.intervalSecs + (as.intervalSecs >> 1)
|
||||
|
||||
again:
|
||||
v, ok := as.m.Load(outputKey)
|
||||
if !ok {
|
||||
// The entry is missing in the map. Try creating it.
|
||||
v = &totalStateValue{
|
||||
lastValues: make(map[string]*lastValueState),
|
||||
}
|
||||
vNew, loaded := as.m.LoadOrStore(outputKey, v)
|
||||
if loaded {
|
||||
// Use the entry created by a concurrent goroutine.
|
||||
v = vNew
|
||||
}
|
||||
}
|
||||
sv := v.(*totalStateValue)
|
||||
sv.mu.Lock()
|
||||
deleted := sv.deleted
|
||||
if !deleted {
|
||||
lv, ok := sv.lastValues[inputKey]
|
||||
if !ok {
|
||||
lv = &lastValueState{}
|
||||
sv.lastValues[inputKey] = lv
|
||||
}
|
||||
d := value
|
||||
if ok && lv.value <= value {
|
||||
d = value - lv.value
|
||||
}
|
||||
if ok || currentTime > as.ignoreInputDeadline {
|
||||
sv.total += d
|
||||
}
|
||||
lv.value = value
|
||||
lv.deleteDeadline = deleteDeadline
|
||||
sv.deleteDeadline = deleteDeadline
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
if deleted {
|
||||
// The entry has been deleted by the concurrent call to appendSeriesForFlush
|
||||
// Try obtaining and updating the entry again.
|
||||
goto again
|
||||
}
|
||||
}
|
||||
|
||||
func (as *totalAggrState) removeOldEntries(currentTime uint64) {
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
sv := v.(*totalStateValue)
|
||||
|
||||
sv.mu.Lock()
|
||||
deleted := currentTime > sv.deleteDeadline
|
||||
if deleted {
|
||||
// Mark the current entry as deleted
|
||||
sv.deleted = deleted
|
||||
} else {
|
||||
// Delete outdated entries in sv.lastValues
|
||||
m := sv.lastValues
|
||||
for k1, v1 := range m {
|
||||
if currentTime > v1.deleteDeadline {
|
||||
delete(m, k1)
|
||||
}
|
||||
}
|
||||
}
|
||||
sv.mu.Unlock()
|
||||
|
||||
if deleted {
|
||||
m.Delete(k)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
func (as *totalAggrState) appendSeriesForFlush(ctx *flushCtx) {
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
currentTimeMsec := int64(currentTime) * 1000
|
||||
|
||||
as.removeOldEntries(currentTime)
|
||||
|
||||
m := &as.m
|
||||
m.Range(func(k, v interface{}) bool {
|
||||
sv := v.(*totalStateValue)
|
||||
sv.mu.Lock()
|
||||
total := sv.total
|
||||
if math.Abs(sv.total) >= (1 << 53) {
|
||||
// It is time to reset the entry, since it starts losing float64 precision
|
||||
sv.total = 0
|
||||
}
|
||||
deleted := sv.deleted
|
||||
sv.mu.Unlock()
|
||||
if !deleted {
|
||||
key := k.(string)
|
||||
ctx.appendSeries(key, "total", currentTimeMsec, total)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
Loading…
Reference in a new issue