VictoriaMetrics/lib/streamaggr/streamaggr_timing_test.go

package streamaggr

import (
	"fmt"
	"strings"
	"testing"
	"time"

	"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
)

var benchOutputs = []string{
	"avg",
	"count_samples",
	"count_series",
	"histogram_bucket",
	"increase",
	"increase_prometheus",
	"last",
	"max",
	"min",
	"quantiles(0, 0.5, 1)",
	"rate_avg",
	"rate_sum",
	"stddev",
	"stdvar",
	"sum_samples",
	"total",
	"total_prometheus",
	"unique_samples",
}

func BenchmarkAggregatorsPush(b *testing.B) {
	for _, output := range benchOutputs {
		b.Run(fmt.Sprintf("output=%s", output), func(b *testing.B) {
			benchmarkAggregatorsPush(b, output)
		})
	}
}

func benchmarkAggregatorsPush(b *testing.B, output string) {
	pushFunc := func(_ []prompbmarshal.TimeSeries) {}
	a := newBenchAggregators([]string{output}, pushFunc)
	defer a.MustStop()

	const loops = 100

	b.ResetTimer()
	b.ReportAllocs()
	b.SetBytes(int64(len(benchSeries) * loops))
	b.RunParallel(func(pb *testing.PB) {
		var matchIdxs []byte
		for pb.Next() {
			for i := 0; i < loops; i++ {
				matchIdxs = a.Push(benchSeries, matchIdxs)
			}
		}
	})
}

func newBenchAggregators(outputs []string, pushFunc PushFunc) *Aggregators {
	outputsQuoted := make([]string, len(outputs))
	for i := range outputs {
		outputsQuoted[i] = stringsutil.JSONString(outputs[i])
	}
	config := fmt.Sprintf(`
- match: http_requests_total
  interval: 24h
  by: [job]
  outputs: [%s]
`, strings.Join(outputsQuoted, ","))

	a, err := LoadFromData([]byte(config), pushFunc, nil, "some_alias")
	if err != nil {
		panic(fmt.Errorf("unexpected error when initializing aggregators: %s", err))
	}
	return a
}

func newBenchSeries(seriesCount int) []prompbmarshal.TimeSeries {
	a := make([]string, 0, seriesCount)
	for j := 0; j < seriesCount; j++ {
		s := fmt.Sprintf(`http_requests_total{path="/foo/%d",job="foo_%d",instance="bar",pod="pod-123232312",namespace="kube-foo-bar",node="node-123-3434-443",`+
			`some_other_label="foo-bar-baz",environment="prod",label1="value1",label2="value2",label3="value3"} %d`, j, j%100, j*1000)
		a = append(a, s)
	}
	metrics := strings.Join(a, "\n")
	offsetMsecs := time.Now().UnixMilli()
	return prompbmarshal.MustParsePromMetrics(metrics, offsetMsecs)
}

const seriesCount = 10_000

var benchSeries = newBenchSeries(seriesCount)
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`package streamaggr`

			`import (`
			`"fmt"`
			`"strings"`
			`"testing"`
lib/streamaggr: properly drop samples on the first incomplete interval Previously samples were dropped on the first incomplete interval and the next complete interval. Also make sure that the de-duplication is performed just before flushing the aggregate state. This should help the case then dedup_interval = interval. 2024-03-04 12:50:46 +00:00			`"time"`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"`
all: consistently use stringsutil.JSONString() for formatting JSON strings with fmt.* functions instead of using "%q" formatter The %q formatter may result in incorrectly formatted JSON string if the original string contains special chars such as \x1b . They must be encoded as \u001b , otherwise the resulting JSON string cannot be parsed by JSON parsers. This is a follow-up for c0caa6993903a748c8942c8c28b5901ee6d5f4d4 See https://github.com/VictoriaMetrics/victorialogs-datasource/issues/24 2024-07-17 11:52:10 +00:00			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`)`

lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			`var benchOutputs = []string{`
lib/streamaggr: consistently use alphabetical order of benchmarked stream aggregation outputs 2024-07-15 07:53:19 +00:00			`"avg",`
			`"count_samples",`
			`"count_series",`
			`"histogram_bucket",`
lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			`"increase",`
			`"increase_prometheus",`
			`"last",`
			`"max",`
lib/streamaggr: consistently use alphabetical order of benchmarked stream aggregation outputs 2024-07-15 07:53:19 +00:00			`"min",`
			`"quantiles(0, 0.5, 1)",`
			`"rate_avg",`
			`"rate_sum",`
lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			`"stddev",`
			`"stdvar",`
lib/streamaggr: consistently use alphabetical order of benchmarked stream aggregation outputs 2024-07-15 07:53:19 +00:00			`"sum_samples",`
			`"total",`
			`"total_prometheus",`
			`"unique_samples",`
lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			`}`

			`func BenchmarkAggregatorsPush(b *testing.B) {`
			`for _, output := range benchOutputs {`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`b.Run(fmt.Sprintf("output=%s", output), func(b *testing.B) {`
			`benchmarkAggregatorsPush(b, output)`
			`})`
			`}`
			`}`

lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			`func benchmarkAggregatorsPush(b *testing.B, output string) {`
all: fix golangci-lint(revive) warnings after 0c0ed61ce786d220b9fdd351385e81d9d3e4185e Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6001 2024-04-02 20:16:24 +00:00			`pushFunc := func(_ []prompbmarshal.TimeSeries) {}`
lib/streamaggr: benchmark only flush routines in BenchmarkDedupAggrFlushSerial and BenchmarkAggregatorsFlushSerial 2024-03-04 17:12:06 +00:00			`a := newBenchAggregators([]string{output}, pushFunc)`
lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			`defer a.MustStop()`

			`const loops = 100`
lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00
lib/streamaggr: benchmark only flush routines in BenchmarkDedupAggrFlushSerial and BenchmarkAggregatorsFlushSerial 2024-03-04 17:12:06 +00:00			`b.ResetTimer()`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`b.ReportAllocs()`
lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00			`b.SetBytes(int64(len(benchSeries) * loops))`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`b.RunParallel(func(pb *testing.PB) {`
lib/streamaggr: follow-up for 736197179e0c097ccf22ade06ee0c873d2e6997b - Use a byte slice instead of a map for tracking indexes for matching series. This improves performance, since access by slice index is faster than access by map key. - Re-use the byte slice for tracking indexes for matching series. This removes unnecessary memory allocations and improves stream aggregation performance a bit. - Add an ability to return to the previous behvaiour by specifying -remoteWrite.streamAggr.dropInput command-line flag. In this case all the input samples are dropped when stream aggregation is enabled. - Backport the new stream aggregation behaviour from vmagent to single-node VictoriaMetrics when -streamAggr.config option is set. - Improve docs regarding this change at docs/CHANGELOG.md - Document the new behavior at docs/stream-aggregation.md Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4243 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4575 2023-07-24 23:44:09 +00:00			`var matchIdxs []byte`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`for pb.Next() {`
lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00			`for i := 0; i < loops; i++ {`
lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			`matchIdxs = a.Push(benchSeries, matchIdxs)`
lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00			`}`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`}`
			`})`
			`}`

lib/streamaggr: benchmark only flush routines in BenchmarkDedupAggrFlushSerial and BenchmarkAggregatorsFlushSerial 2024-03-04 17:12:06 +00:00			`func newBenchAggregators(outputs []string, pushFunc PushFunc) *Aggregators {`
			`outputsQuoted := make([]string, len(outputs))`
			`for i := range outputs {`
all: consistently use stringsutil.JSONString() for formatting JSON strings with fmt.* functions instead of using "%q" formatter The %q formatter may result in incorrectly formatted JSON string if the original string contains special chars such as \x1b . They must be encoded as \u001b , otherwise the resulting JSON string cannot be parsed by JSON parsers. This is a follow-up for c0caa6993903a748c8942c8c28b5901ee6d5f4d4 See https://github.com/VictoriaMetrics/victorialogs-datasource/issues/24 2024-07-17 11:52:10 +00:00			`outputsQuoted[i] = stringsutil.JSONString(outputs[i])`
lib/streamaggr: benchmark only flush routines in BenchmarkDedupAggrFlushSerial and BenchmarkAggregatorsFlushSerial 2024-03-04 17:12:06 +00:00			`}`
lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			config := fmt.Sprintf(`
			`- match: http_requests_total`
			`interval: 24h`
lib/streamaggr: benchmark only flush routines in BenchmarkDedupAggrFlushSerial and BenchmarkAggregatorsFlushSerial 2024-03-04 17:12:06 +00:00			`by: [job]`
			`outputs: [%s]`
			`, strings.Join(outputsQuoted, ","))

app/vmagent/remotewrite: follow-up for f153f54d11250da050aa93bc4fa9b7ba9e144691 - Move the remaining code responsible for stream aggregation initialization from remotewrite.go to streamaggr.go . This improves code maintainability a bit. - Properly shut down streamaggr.Aggregators initialized inside remotewrite.CheckStreamAggrConfigs(). This prevents from potential resource leaks. - Use separate functions for initializing and reloading of global stream aggregation and per-remoteWrite.url stream aggregation. This makes the code easier to read and maintain. This also fixes INFO and ERROR logs emitted by these functions. - Add an ability to specify `name` option in every stream aggregation config. This option is used as `name` label in metrics exposed by stream aggregation at /metrics page. This simplifies investigation of the exposed metrics. - Add `path` label additionally to `name`, `url` and `position` labels at metrics exposed by streaming aggregation. This label should simplify investigation of the exposed metrics. - Remove `match` and `group` labels from metrics exposed by streaming aggregation, since they have little practical applicability: it is hard to use these labels in query filters and aggregation functions. - Rename the metric `vm_streamaggr_flushed_samples_total` to less misleading `vm_streamaggr_output_samples_total` . This metric shows the number of samples generated by the corresponding streaming aggregation rule. This metric has been added in the commit 861852f2624895e01f93ce196607c72616ce2a94 . See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6462 - Remove the metric `vm_streamaggr_stale_samples_total`, since it is unclear how it can be used in practice. This metric has been added in the commit 861852f2624895e01f93ce196607c72616ce2a94 . See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6462 - Remove Alias and aggrID fields from streamaggr.Options struct, since these fields aren't related to optional params, which could modify the behaviour of the constructed streaming aggregator. Convert the Alias field to regular argument passed to LoadFromFile() function, since this argument is mandatory. - Pass Options arg to LoadFromFile() function by reference, since this structure is quite big. This also allows passing nil instead of Options when default options are enough. - Add `name`, `path`, `url` and `position` labels to `vm_streamaggr_dedup_state_size_bytes` and `vm_streamaggr_dedup_state_items_count` metrics, so they have consistent set of labels comparing to the rest of streaming aggregation metrics. - Convert aggregator.aggrStates field type from `map[string]aggrState` to `[]aggrOutput`, where `aggrOutput` contains the corresponding `aggrState` plus all the related metrics (currently only `vm_streamaggr_output_samples_total` metric is exposed with the corresponding `output` label per each configured output function). This simplifies and speeds up the code responsible for updating per-output metrics. This is a follow-up for the commit 2eb1bc4f814037ae87ac6556011ae0d3caee6bc8 . See https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6604 - Added missing urls to docs ( https://docs.victoriametrics.com/stream-aggregation/ ) in error messages. These urls help users figuring out why VictoriaMetrics or vmagent generates the corresponding error messages. The urls were removed for unknown reason in the commit 2eb1bc4f814037ae87ac6556011ae0d3caee6bc8 . - Fix incorrect update for `vm_streamaggr_output_samples_total` metric in flushCtx.appendSeriesWithExtraLabel() function. While at it, reduce memory usage by limiting the maximum number of samples per flush to 10K. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5467 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6268 2024-07-15 16:01:37 +00:00			`a, err := LoadFromData([]byte(config), pushFunc, nil, "some_alias")`
lib/streamaggr: add a benchmark for measuring the performance of aggregator.flush 2024-03-03 22:45:48 +00:00			`if err != nil {`
			`panic(fmt.Errorf("unexpected error when initializing aggregators: %s", err))`
			`}`
			`return a`
			`}`

lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00			`func newBenchSeries(seriesCount int) []prompbmarshal.TimeSeries {`
tests: fix slice init length (#6897) ### Describe Your Changes fix slice init length ### Checklist The following checks are mandatory: - [ ] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). Signed-off-by: dufucun <dufuchun@sohu.com> 2024-08-30 08:55:25 +00:00			`a := make([]string, 0, seriesCount)`
lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00			`for j := 0; j < seriesCount; j++ {`
lib/streamaggr: use multiple job labels in BenchmarkAggregatorsPush 2024-03-04 15:37:04 +00:00			s := fmt.Sprintf(`http_requests_total{path="/foo/%d",job="foo_%d",instance="bar",pod="pod-123232312",namespace="kube-foo-bar",node="node-123-3434-443",`+
lib/streamaggr: benchmark only flush routines in BenchmarkDedupAggrFlushSerial and BenchmarkAggregatorsFlushSerial 2024-03-04 17:12:06 +00:00			`some_other_label="foo-bar-baz",environment="prod",label1="value1",label2="value2",label3="value3"} %d`, j, j%100, j*1000)
lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00			`a = append(a, s)`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`}`
			`metrics := strings.Join(a, "\n")`
app/vmagent/remotewrite,lib/streamaggr: re-use common code in tests after 879771808b7f7fe6bd3020957fb6835a02424846 - Export streamaggr.LoadFromData() function, so it could be used in tests outside the lib/streamaggr package. This allows removing a hack with creation of temporary files at TestRemoteWriteContext_TryPush_ImmutableTimeseries. - Move common code for mustParsePromMetrics() function into lib/prompbmarshal package, so it could be used in tests for building []prompbmarshal.TimeSeries from string. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6205 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6206 2024-07-03 13:10:09 +00:00			`offsetMsecs := time.Now().UnixMilli()`
			`return prompbmarshal.MustParsePromMetrics(metrics, offsetMsecs)`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00			`}`

lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00			`const seriesCount = 10_000`
app/{vmagent,vminsert}: add support for streaming aggregation See https://docs.victoriametrics.com/stream-aggregation.html Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3460 2023-01-04 06:19:18 +00:00
lib/streamaggr: make the BenchmarkAggregatorsPushByJobAvg closer to production case with long list of labels per sample 2024-02-29 00:39:00 +00:00			`var benchSeries = newBenchSeries(seriesCount)`