mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-11 14:53:49 +00:00
db557b86ee
- Move the remaining code responsible for stream aggregation initialization from remotewrite.go to streamaggr.go . This improves code maintainability a bit. - Properly shut down streamaggr.Aggregators initialized inside remotewrite.CheckStreamAggrConfigs(). This prevents from potential resource leaks. - Use separate functions for initializing and reloading of global stream aggregation and per-remoteWrite.url stream aggregation. This makes the code easier to read and maintain. This also fixes INFO and ERROR logs emitted by these functions. - Add an ability to specify `name` option in every stream aggregation config. This option is used as `name` label in metrics exposed by stream aggregation at /metrics page. This simplifies investigation of the exposed metrics. - Add `path` label additionally to `name`, `url` and `position` labels at metrics exposed by streaming aggregation. This label should simplify investigation of the exposed metrics. - Remove `match` and `group` labels from metrics exposed by streaming aggregation, since they have little practical applicability: it is hard to use these labels in query filters and aggregation functions. - Rename the metric `vm_streamaggr_flushed_samples_total` to less misleading `vm_streamaggr_output_samples_total` . This metric shows the number of samples generated by the corresponding streaming aggregation rule. This metric has been added in the commit861852f262
. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6462 - Remove the metric `vm_streamaggr_stale_samples_total`, since it is unclear how it can be used in practice. This metric has been added in the commit861852f262
. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6462 - Remove Alias and aggrID fields from streamaggr.Options struct, since these fields aren't related to optional params, which could modify the behaviour of the constructed streaming aggregator. Convert the Alias field to regular argument passed to LoadFromFile() function, since this argument is mandatory. - Pass Options arg to LoadFromFile() function by reference, since this structure is quite big. This also allows passing nil instead of Options when default options are enough. - Add `name`, `path`, `url` and `position` labels to `vm_streamaggr_dedup_state_size_bytes` and `vm_streamaggr_dedup_state_items_count` metrics, so they have consistent set of labels comparing to the rest of streaming aggregation metrics. - Convert aggregator.aggrStates field type from `map[string]aggrState` to `[]aggrOutput`, where `aggrOutput` contains the corresponding `aggrState` plus all the related metrics (currently only `vm_streamaggr_output_samples_total` metric is exposed with the corresponding `output` label per each configured output function). This simplifies and speeds up the code responsible for updating per-output metrics. This is a follow-up for the commit2eb1bc4f81
. See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6604 - Added missing urls to docs ( https://docs.victoriametrics.com/stream-aggregation/ ) in error messages. These urls help users figuring out why VictoriaMetrics or vmagent generates the corresponding error messages. The urls were removed for unknown reason in the commit2eb1bc4f81
. - Fix incorrect update for `vm_streamaggr_output_samples_total` metric in flushCtx.appendSeriesWithExtraLabel() function. While at it, reduce memory usage by limiting the maximum number of samples per flush to 10K. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5467 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6268
281 lines
9.4 KiB
Go
281 lines
9.4 KiB
Go
package common
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
|
"github.com/VictoriaMetrics/metrics"
|
|
)
|
|
|
|
var (
|
|
streamAggrConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
|
"See https://docs.victoriametrics.com/stream-aggregation/ . "+
|
|
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
|
streamAggrKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation with -streamAggr.config. "+
|
|
"By default, only aggregated samples are dropped, while the remaining samples are stored in the database. "+
|
|
"See also -streamAggr.dropInput and https://docs.victoriametrics.com/stream-aggregation/")
|
|
streamAggrDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation with -streamAggr.config. "+
|
|
"By default, only aggregated samples are dropped, while the remaining samples are stored in the database. "+
|
|
"See also -streamAggr.keepInput and https://docs.victoriametrics.com/stream-aggregation/")
|
|
streamAggrDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation with -streamAggr.config . "+
|
|
"See also -streamAggr.dropInputLabels and -dedup.minScrapeInterval and https://docs.victoriametrics.com/stream-aggregation/#deduplication")
|
|
streamAggrDropInputLabels = flagutil.NewArrayString("streamAggr.dropInputLabels", "An optional list of labels to drop from samples "+
|
|
"before stream de-duplication and aggregation . See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels")
|
|
streamAggrIgnoreOldSamples = flag.Bool("streamAggr.ignoreOldSamples", false, "Whether to ignore input samples with old timestamps outside the current aggregation interval. "+
|
|
"See https://docs.victoriametrics.com/stream-aggregation/#ignoring-old-samples")
|
|
streamAggrIgnoreFirstIntervals = flag.Int("streamAggr.ignoreFirstIntervals", 0, "Number of aggregation intervals to skip after the start. Increase this value if you observe incorrect aggregation results after restarts. It could be caused by receiving unordered delayed data from clients pushing data into the database. "+
|
|
"See https://docs.victoriametrics.com/stream-aggregation/#ignore-aggregation-intervals-on-start")
|
|
)
|
|
|
|
var (
|
|
saCfgReloaderStopCh chan struct{}
|
|
saCfgReloaderWG sync.WaitGroup
|
|
|
|
saCfgReloads = metrics.NewCounter(`vminsert_streamagg_config_reloads_total`)
|
|
saCfgReloadErr = metrics.NewCounter(`vminsert_streamagg_config_reloads_errors_total`)
|
|
saCfgSuccess = metrics.NewGauge(`vminsert_streamagg_config_last_reload_successful`, nil)
|
|
saCfgTimestamp = metrics.NewCounter(`vminsert_streamagg_config_last_reload_success_timestamp_seconds`)
|
|
|
|
sasGlobal atomic.Pointer[streamaggr.Aggregators]
|
|
deduplicator *streamaggr.Deduplicator
|
|
)
|
|
|
|
// CheckStreamAggrConfig checks config pointed by -stramaggr.config
|
|
func CheckStreamAggrConfig() error {
|
|
if *streamAggrConfig == "" {
|
|
return nil
|
|
}
|
|
pushNoop := func(_ []prompbmarshal.TimeSeries) {}
|
|
opts := &streamaggr.Options{
|
|
DedupInterval: *streamAggrDedupInterval,
|
|
DropInputLabels: *streamAggrDropInputLabels,
|
|
IgnoreOldSamples: *streamAggrIgnoreOldSamples,
|
|
IgnoreFirstIntervals: *streamAggrIgnoreFirstIntervals,
|
|
}
|
|
sas, err := streamaggr.LoadFromFile(*streamAggrConfig, pushNoop, opts, "global")
|
|
if err != nil {
|
|
return fmt.Errorf("error when loading -streamAggr.config=%q: %w", *streamAggrConfig, err)
|
|
}
|
|
sas.MustStop()
|
|
return nil
|
|
}
|
|
|
|
// InitStreamAggr must be called after flag.Parse and before using the common package.
|
|
//
|
|
// MustStopStreamAggr must be called when stream aggr is no longer needed.
|
|
func InitStreamAggr() {
|
|
saCfgReloaderStopCh = make(chan struct{})
|
|
if *streamAggrConfig == "" {
|
|
if *streamAggrDedupInterval > 0 {
|
|
deduplicator = streamaggr.NewDeduplicator(pushAggregateSeries, *streamAggrDedupInterval, *streamAggrDropInputLabels, "global")
|
|
}
|
|
return
|
|
}
|
|
|
|
sighupCh := procutil.NewSighupChan()
|
|
|
|
opts := &streamaggr.Options{
|
|
DedupInterval: *streamAggrDedupInterval,
|
|
DropInputLabels: *streamAggrDropInputLabels,
|
|
IgnoreOldSamples: *streamAggrIgnoreOldSamples,
|
|
IgnoreFirstIntervals: *streamAggrIgnoreFirstIntervals,
|
|
}
|
|
sas, err := streamaggr.LoadFromFile(*streamAggrConfig, pushAggregateSeries, opts, "global")
|
|
if err != nil {
|
|
logger.Fatalf("cannot load -streamAggr.config=%q: %s", *streamAggrConfig, err)
|
|
}
|
|
|
|
sasGlobal.Store(sas)
|
|
saCfgSuccess.Set(1)
|
|
saCfgTimestamp.Set(fasttime.UnixTimestamp())
|
|
|
|
// Start config reloader.
|
|
saCfgReloaderWG.Add(1)
|
|
go func() {
|
|
defer saCfgReloaderWG.Done()
|
|
for {
|
|
select {
|
|
case <-sighupCh:
|
|
case <-saCfgReloaderStopCh:
|
|
return
|
|
}
|
|
reloadStreamAggrConfig()
|
|
}
|
|
}()
|
|
}
|
|
|
|
func reloadStreamAggrConfig() {
|
|
logger.Infof("reloading -streamAggr.config=%q", *streamAggrConfig)
|
|
saCfgReloads.Inc()
|
|
|
|
opts := &streamaggr.Options{
|
|
DedupInterval: *streamAggrDedupInterval,
|
|
DropInputLabels: *streamAggrDropInputLabels,
|
|
IgnoreOldSamples: *streamAggrIgnoreOldSamples,
|
|
IgnoreFirstIntervals: *streamAggrIgnoreFirstIntervals,
|
|
}
|
|
sasNew, err := streamaggr.LoadFromFile(*streamAggrConfig, pushAggregateSeries, opts, "global")
|
|
if err != nil {
|
|
saCfgSuccess.Set(0)
|
|
saCfgReloadErr.Inc()
|
|
logger.Errorf("cannot reload -streamAggr.config=%q: use the previously loaded config; error: %s", *streamAggrConfig, err)
|
|
return
|
|
}
|
|
sas := sasGlobal.Load()
|
|
if !sasNew.Equal(sas) {
|
|
sasOld := sasGlobal.Swap(sasNew)
|
|
sasOld.MustStop()
|
|
logger.Infof("successfully reloaded stream aggregation config at -streamAggr.config=%q", *streamAggrConfig)
|
|
} else {
|
|
logger.Infof("nothing changed in -streamAggr.config=%q", *streamAggrConfig)
|
|
sasNew.MustStop()
|
|
}
|
|
saCfgSuccess.Set(1)
|
|
saCfgTimestamp.Set(fasttime.UnixTimestamp())
|
|
}
|
|
|
|
// MustStopStreamAggr stops stream aggregators.
|
|
func MustStopStreamAggr() {
|
|
close(saCfgReloaderStopCh)
|
|
saCfgReloaderWG.Wait()
|
|
|
|
sas := sasGlobal.Swap(nil)
|
|
sas.MustStop()
|
|
|
|
if deduplicator != nil {
|
|
deduplicator.MustStop()
|
|
deduplicator = nil
|
|
}
|
|
}
|
|
|
|
type streamAggrCtx struct {
|
|
mn storage.MetricName
|
|
tss []prompbmarshal.TimeSeries
|
|
labels []prompbmarshal.Label
|
|
samples []prompbmarshal.Sample
|
|
buf []byte
|
|
}
|
|
|
|
func (ctx *streamAggrCtx) Reset() {
|
|
ctx.mn.Reset()
|
|
|
|
clear(ctx.tss)
|
|
ctx.tss = ctx.tss[:0]
|
|
|
|
clear(ctx.labels)
|
|
ctx.labels = ctx.labels[:0]
|
|
|
|
ctx.samples = ctx.samples[:0]
|
|
ctx.buf = ctx.buf[:0]
|
|
}
|
|
|
|
func (ctx *streamAggrCtx) push(mrs []storage.MetricRow, matchIdxs []byte) []byte {
|
|
mn := &ctx.mn
|
|
tss := ctx.tss
|
|
labels := ctx.labels
|
|
samples := ctx.samples
|
|
buf := ctx.buf
|
|
|
|
tssLen := len(tss)
|
|
for _, mr := range mrs {
|
|
if err := mn.UnmarshalRaw(mr.MetricNameRaw); err != nil {
|
|
logger.Panicf("BUG: cannot unmarshal recently marshaled MetricName: %s", err)
|
|
}
|
|
|
|
labelsLen := len(labels)
|
|
|
|
bufLen := len(buf)
|
|
buf = append(buf, mn.MetricGroup...)
|
|
metricGroup := bytesutil.ToUnsafeString(buf[bufLen:])
|
|
labels = append(labels, prompbmarshal.Label{
|
|
Name: "__name__",
|
|
Value: metricGroup,
|
|
})
|
|
|
|
for _, tag := range mn.Tags {
|
|
bufLen = len(buf)
|
|
buf = append(buf, tag.Key...)
|
|
name := bytesutil.ToUnsafeString(buf[bufLen:])
|
|
|
|
bufLen = len(buf)
|
|
buf = append(buf, tag.Value...)
|
|
value := bytesutil.ToUnsafeString(buf[bufLen:])
|
|
labels = append(labels, prompbmarshal.Label{
|
|
Name: name,
|
|
Value: value,
|
|
})
|
|
}
|
|
|
|
samplesLen := len(samples)
|
|
samples = append(samples, prompbmarshal.Sample{
|
|
Timestamp: mr.Timestamp,
|
|
Value: mr.Value,
|
|
})
|
|
|
|
tss = append(tss, prompbmarshal.TimeSeries{
|
|
Labels: labels[labelsLen:],
|
|
Samples: samples[samplesLen:],
|
|
})
|
|
}
|
|
ctx.tss = tss
|
|
ctx.labels = labels
|
|
ctx.samples = samples
|
|
ctx.buf = buf
|
|
|
|
tss = tss[tssLen:]
|
|
|
|
sas := sasGlobal.Load()
|
|
if sas.IsEnabled() {
|
|
matchIdxs = sas.Push(tss, matchIdxs)
|
|
} else if deduplicator != nil {
|
|
matchIdxs = bytesutil.ResizeNoCopyMayOverallocate(matchIdxs, len(tss))
|
|
for i := range matchIdxs {
|
|
matchIdxs[i] = 1
|
|
}
|
|
deduplicator.Push(tss)
|
|
}
|
|
|
|
ctx.Reset()
|
|
|
|
return matchIdxs
|
|
}
|
|
|
|
func pushAggregateSeries(tss []prompbmarshal.TimeSeries) {
|
|
currentTimestamp := int64(fasttime.UnixTimestamp()) * 1000
|
|
var ctx InsertCtx
|
|
ctx.Reset(len(tss))
|
|
ctx.skipStreamAggr = true
|
|
for _, ts := range tss {
|
|
labels := ts.Labels
|
|
ctx.Labels = ctx.Labels[:0]
|
|
for _, label := range labels {
|
|
name := label.Name
|
|
if name == "__name__" {
|
|
name = ""
|
|
}
|
|
ctx.AddLabel(name, label.Value)
|
|
}
|
|
value := ts.Samples[0].Value
|
|
if err := ctx.WriteDataPoint(nil, ctx.Labels, currentTimestamp, value); err != nil {
|
|
logger.Errorf("cannot store aggregate series: %s", err)
|
|
// Do not continue pushing the remaining samples, since it is likely they will return the same error.
|
|
return
|
|
}
|
|
}
|
|
// There is no need in limiting the number of concurrent calls to vmstorage.AddRows() here,
|
|
// since the number of concurrent pushAggregateSeries() calls should be already limited by lib/streamaggr.
|
|
if err := vmstorage.AddRows(ctx.mrs); err != nil {
|
|
logger.Errorf("cannot flush aggregate series: %s", err)
|
|
}
|
|
}
|