lib/streamaggr: follow-up for the commit c0e4ccb7b5

- Clarify docs for `Ignore aggregation intervals on start` feature. - Make more clear the code dealing with ignoreFirstIntervals at aggregator.runFlusher() functions. It is better from readability and maintainability PoV using distinct a.flush() calls for distinct cases instead of merging them into a single a.flush() call. - Take into account the first incomplete interval when tracking the number of skipped aggregation intervals, since this behaviour is easier to understand by the end users. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6137
2025-03-11 15:34:56 +00:00 · 2024-07-02 21:21:35 +02:00 · 2024-07-02 21:21:35 +02:00 · f17b408643
commit f17b408643
parent ffb49c677b
3 changed files with 44 additions and 31 deletions
--- a/app/vminsert/common/streamaggr.go
+++ b/app/vminsert/common/streamaggr.go
@ -32,10 +32,10 @@ var (
 		"See also -streamAggr.dropInputLabels and -dedup.minScrapeInterval and https://docs.victoriametrics.com/stream-aggregation/#deduplication")
 	streamAggrDropInputLabels = flagutil.NewArrayString("streamAggr.dropInputLabels", "An optional list of labels to drop from samples "+
 		"before stream de-duplication and aggregation . See https://docs.victoriametrics.com/stream-aggregation/#dropping-unneeded-labels")
-	streamAggrIgnoreFirstIntervals = flag.Int("streamAggr.ignoreFirstIntervals", 0, "Number of aggregation intervals to skip after the start. Increase this value if you observe incorrect aggregation results after restarts. It could be caused by receiving unordered delayed data from clients pushing data into the database. "+
-		"See https://docs.victoriametrics.com/stream-aggregation/#ignore-aggregation-intervals-on-start")
 	streamAggrIgnoreOldSamples = flag.Bool("streamAggr.ignoreOldSamples", false, "Whether to ignore input samples with old timestamps outside the current aggregation interval. "+
 		"See https://docs.victoriametrics.com/stream-aggregation/#ignoring-old-samples")
+	streamAggrIgnoreFirstIntervals = flag.Int("streamAggr.ignoreFirstIntervals", 0, "Number of aggregation intervals to skip after the start. Increase this value if you observe incorrect aggregation results after restarts. It could be caused by receiving unordered delayed data from clients pushing data into the database. "+
+		"See https://docs.victoriametrics.com/stream-aggregation/#ignore-aggregation-intervals-on-start")
 )

 var (
--- a/docs/stream-aggregation.md
+++ b/docs/stream-aggregation.md
@ -22,6 +22,8 @@ after applying all the configured [relabeling stages](https://docs.victoriametri
 _By default, stream aggregation ignores timestamps associated with the input [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples).
 It expects that the ingested samples have timestamps close to the current time. See [how to ignore old samples](#ignoring-old-samples)._

+## Configuration
+
 Stream aggregation can be configured via the following command-line flags:

 - `-streamAggr.config` at [single-node VictoriaMetrics](https://docs.victoriametrics.com/single-server-victoriametrics/)
@ -128,25 +130,30 @@ outside the current [aggregation interval](#stream-aggregation-config) must be i

 ## Ignore aggregation intervals on start

-Stream aggregation may yield inaccurate results if it processes incomplete data. This issue can arise when data is 
-received from clients that maintain a queue of unsent data, such as Prometheus or vmagent. If the queue isn't fully 
-cleared within the aggregation `interval`, only a portion of the time series may be processed, leading to distorted 
-calculations. To mitigate this, consider the following options:
+Streaming aggregation results may be incorrect for some time after the restart of [vmagent](https://docs.victoriametrics.com/vmagent/)
+or [single-node VictoriaMetrics](https://docs.victoriametrics.com/) until all the buffered [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples)
+are sent from remote sources to the `vmagent` or single-node VictoriaMetrics via [supported data ingestion protocols](https://docs.victoriametrics.com/vmagent/#how-to-push-data-to-vmagent).
+In this case it may be a good idea to drop the aggregated data during the first `N` [aggrgation intervals](#stream-aggregation-config)
+just after the restart of `vmagent` or single-node VictoriaMetrics. This can be done via the following options:

- Set `-streamAggr.ignoreFirstIntervals=<intervalsCount>` command-line flag to [single-node VictoriaMetrics](https://docs.victoriametrics.com/)
-  or to [vmagent](https://docs.victoriametrics.com/vmagent/) to skip first `<intervalsCount>` [aggregation intervals](#stream-aggregation-config)
-  from persisting to the storage. At [vmagent](https://docs.victoriametrics.com/vmagent/)
-  `-remoteWrite.streamAggr.ignoreFirstIntervals=<intervalsCount>` flag can be specified individually per each `-remoteWrite.url`.
-  It is expected that all incomplete or queued data will be processed during specified `<intervalsCount>` 
-  and all subsequent aggregation intervals will produce correct data.
+- The `-streamAggr.ignoreFirstIntervals=N` command-line flag at `vmagent` and single-node VictoriaMetrics. This flag instructs skipping the first `N`
+  [aggregation intervals](#stream-aggregation-config) just after the restart accross all the [configured stream aggregation configs](#configuration).

- Set `ignore_first_intervals: <intervalsCount>` option individually per [aggregation config](#stream-aggregation-config).
-  This enables ignoring first `<intervalsCount>` aggregation intervals for that particular aggregation config.
+  The `-remoteWrite.streamAggr.ignoreFirstIntervals=N` command-line flag can be specified individually per each `-remoteWrite.url` at [vmagent](https://docs.victoriametrics.com/vmagent/).
+
+- The `ignore_first_intervals: N` option at the particular [aggregation config](#stream-aggregation-config).
+
+See also:
+
+- [Flush time alignment](#flush-time-alignment)
+- [Ignoring old samples](#ignoring-old-samples)

 ## Flush time alignment

 By default, the time for aggregated data flush is aligned by the `interval` option specified in [aggregate config](#stream-aggregation-config).
+
 For example:
+
 - if `interval: 1m` is set, then the aggregated data is flushed to the storage at the end of every minute
 - if `interval: 1h` is set, then the aggregated data is flushed to the storage at the end of every hour

@ -157,6 +164,11 @@ The aggregated data on the first and the last interval is dropped during `vmagen
 since the first and the last aggregation intervals are incomplete, so they usually contain incomplete confusing data.
 If you need preserving the aggregated data on these intervals, then set `flush_on_shutdown: true` option in the [aggregate config](#stream-aggregation-config).

+See also:
+
+- [Ignore aggregation intervals on start](#ignore-aggregation-intervals-on-start)
+- [Ignoring old samples](#ignoring-old-samples)
+
 ## Use cases

 Stream aggregation can be used in the following cases:
@ -994,15 +1006,15 @@ specified individually per each `-remoteWrite.url`:

  # ignore_old_samples instructs ignoring input samples with old timestamps outside the current aggregation interval.
  # See https://docs.victoriametrics.com/stream-aggregation/#ignoring-old-samples
-  # See also -remoteWrite.streamAggr.ignoreOldSamples or -streamAggr.ignoreOldSamples command-line flag.
+  # See also -remoteWrite.streamAggr.ignoreOldSamples and -streamAggr.ignoreOldSamples command-line flag.
  #
  # ignore_old_samples: false

-  # ignore_first_intervals instructs ignoring first N aggregation intervals after process start.
+  # ignore_first_intervals instructs ignoring the first N aggregation intervals after process start.
  # See https://docs.victoriametrics.com/stream-aggregation/#ignore-aggregation-intervals-on-start
-  # See also -remoteWrite.streamAggr.ignoreFirstIntervals or -streamAggr.ignoreFirstIntervals command-line flag.
+  # See also -remoteWrite.streamAggr.ignoreFirstIntervals and -streamAggr.ignoreFirstIntervals command-line flags.
  #
-  # ignore_first_intervals: false
+  # ignore_first_intervals: N

  # drop_input_labels instructs dropping the given labels from input samples.
  # The labels' dropping is performed before input_relabel_configs are applied.
--- a/lib/streamaggr/streamaggr.go
+++ b/lib/streamaggr/streamaggr.go
@ -130,9 +130,9 @@ type Options struct {
 	// This option can be overridden individually per each aggregation via ignore_old_samples option.
 	IgnoreOldSamples bool

-	// IgnoreFirstIntervals sets amount of aggregation intervals to ignore on start.
+	// IgnoreFirstIntervals sets the number of aggregation intervals to be ignored on start.
 	//
-	// By default, no intervals will be ignored.
+	// By default, zero intervals are ignored.
 	//
 	// This option can be overridden individually per each aggregation via ignore_first_intervals option.
 	IgnoreFirstIntervals int
@ -715,15 +715,16 @@ func (a *aggregator) runFlusher(pushFunc PushFunc, alignFlushToInterval, skipInc

 		if alignFlushToInterval && skipIncompleteFlush {
 			a.flush(nil, interval, true)
+			ignoreFirstIntervals--
 		}

 		for tickerWait(t) {
-			pf := pushFunc
 			if ignoreFirstIntervals > 0 {
-				pf = nil
+				a.flush(nil, interval, true)
 				ignoreFirstIntervals--
+			} else {
+				a.flush(pushFunc, interval, true)
 			}
-			a.flush(pf, interval, true)

 			if alignFlushToInterval {
 				select {
@ -744,17 +745,17 @@ func (a *aggregator) runFlusher(pushFunc PushFunc, alignFlushToInterval, skipInc

 			ct := time.Now()
 			if ct.After(flushDeadline) {
-				pf := pushFunc
-				if ignoreFirstIntervals > 0 {
-					pf = nil
-					ignoreFirstIntervals--
-				}
 				// It is time to flush the aggregated state
 				if alignFlushToInterval && skipIncompleteFlush && !isSkippedFirstFlush {
-					pf = nil
+					a.flush(nil, interval, true)
+					ignoreFirstIntervals--
 					isSkippedFirstFlush = true
+				} else if ignoreFirstIntervals > 0 {
+					a.flush(nil, interval, true)
+					ignoreFirstIntervals--
+				} else {
+					a.flush(pushFunc, interval, true)
 				}
-				a.flush(pf, interval, true)
 				for ct.After(flushDeadline) {
 					flushDeadline = flushDeadline.Add(interval)
 				}
@ -769,7 +770,7 @@ func (a *aggregator) runFlusher(pushFunc PushFunc, alignFlushToInterval, skipInc
 		}
 	}

-	if !skipIncompleteFlush && ignoreFirstIntervals == 0 {
+	if !skipIncompleteFlush && ignoreFirstIntervals <= 0 {
 		a.dedupFlush(dedupInterval)
 		a.flush(pushFunc, interval, true)
 	}