VictoriaMetrics/lib/streamaggr/dedup_timing_test.go

package streamaggr

import (
	"fmt"
	"sync/atomic"
	"testing"
)

func BenchmarkDedupAggr(b *testing.B) {
	for _, samplesPerPush := range []int{1, 10, 100, 1_000, 10_000, 100_000, 1_000_000} {
		b.Run(fmt.Sprintf("samplesPerPush_%d", samplesPerPush), func(b *testing.B) {
			benchmarkDedupAggr(b, samplesPerPush)
		})
	}
}

func benchmarkDedupAggr(b *testing.B, samplesPerPush int) {
	flushSamples := func(samples []pushSample) {
		Sink.Add(uint64(len(samples)))
	}

	const loops = 2
	benchSamples := newBenchSamples(samplesPerPush)
	da := newDedupAggr()

	b.ReportAllocs()
	b.SetBytes(int64(samplesPerPush * loops))
	b.RunParallel(func(pb *testing.PB) {
		for pb.Next() {
			for i := 0; i < loops; i++ {
				da.pushSamples(benchSamples)
			}
			da.flush(flushSamples)
		}
	})
}

func newBenchSamples(count int) []pushSample {
	samples := make([]pushSample, count)
	for i := range samples {
		sample := &samples[i]
		sample.key = fmt.Sprintf("key_%d", i)
		sample.value = float64(i)
	}
	return samples
}

var Sink atomic.Uint64
lib/streamaggr: huge pile of changes - Reduce memory usage by up to 5x when de-duplicating samples across big number of time series. - Reduce memory usage by up to 5x when aggregating across big number of output time series. - Add lib/promutils.LabelsCompressor, which is going to be used by other VictoriaMetrics components for reducing memory usage for marshaled []prompbmarshal.Label. - Add `dedup_interval` option at aggregation config, which allows setting individual deduplication intervals per each aggregation. - Add `keep_metric_names` option at aggregation config, which allows keeping the original metric names in the output samples. - Add `unique_samples` output, which counts the number of unique sample values. - Add `increase_prometheus` and `total_prometheus` outputs, which ignore the first sample per each newly encountered time series. - Use 64-bit hashes instead of marshaled labels as map keys when calculating `count_series` output. This makes obsolete https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5579 - Expose various metrics, which may help debugging stream aggregation: - vm_streamaggr_dedup_state_size_bytes - the size of data structures responsible for deduplication - vm_streamaggr_dedup_state_items_count - the number of items in the deduplication data structures - vm_streamaggr_labels_compressor_size_bytes - the size of labels compressor data structures - vm_streamaggr_labels_compressor_items_count - the number of entries in the labels compressor - vm_streamaggr_flush_duration_seconds - a histogram, which shows the duration of stream aggregation flushes - vm_streamaggr_dedup_flush_duration_seconds - a histogram, which shows the duration of deduplication flushes - vm_streamaggr_flush_timeouts_total - counter for timed out stream aggregation flushes, which took longer than the configured interval - vm_streamaggr_dedup_flush_timeouts_total - counter for timed out deduplication flushes, which took longer than the configured dedup_interval - Actualize docs/stream-aggregation.md The memory usage reduction increases CPU usage during stream aggregation by up to 30%. This commit is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5850 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5898 2024-03-02 00:42:26 +00:00			`package streamaggr`

			`import (`
			`"fmt"`
			`"sync/atomic"`
			`"testing"`
			`)`

			`func BenchmarkDedupAggr(b *testing.B) {`
lib/streamaggr: add a benchmark for de-duplicating of 1M samples 2024-03-03 22:26:59 +00:00			`for _, samplesPerPush := range []int{1, 10, 100, 1_000, 10_000, 100_000, 1_000_000} {`
lib/streamaggr: huge pile of changes - Reduce memory usage by up to 5x when de-duplicating samples across big number of time series. - Reduce memory usage by up to 5x when aggregating across big number of output time series. - Add lib/promutils.LabelsCompressor, which is going to be used by other VictoriaMetrics components for reducing memory usage for marshaled []prompbmarshal.Label. - Add `dedup_interval` option at aggregation config, which allows setting individual deduplication intervals per each aggregation. - Add `keep_metric_names` option at aggregation config, which allows keeping the original metric names in the output samples. - Add `unique_samples` output, which counts the number of unique sample values. - Add `increase_prometheus` and `total_prometheus` outputs, which ignore the first sample per each newly encountered time series. - Use 64-bit hashes instead of marshaled labels as map keys when calculating `count_series` output. This makes obsolete https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5579 - Expose various metrics, which may help debugging stream aggregation: - vm_streamaggr_dedup_state_size_bytes - the size of data structures responsible for deduplication - vm_streamaggr_dedup_state_items_count - the number of items in the deduplication data structures - vm_streamaggr_labels_compressor_size_bytes - the size of labels compressor data structures - vm_streamaggr_labels_compressor_items_count - the number of entries in the labels compressor - vm_streamaggr_flush_duration_seconds - a histogram, which shows the duration of stream aggregation flushes - vm_streamaggr_dedup_flush_duration_seconds - a histogram, which shows the duration of deduplication flushes - vm_streamaggr_flush_timeouts_total - counter for timed out stream aggregation flushes, which took longer than the configured interval - vm_streamaggr_dedup_flush_timeouts_total - counter for timed out deduplication flushes, which took longer than the configured dedup_interval - Actualize docs/stream-aggregation.md The memory usage reduction increases CPU usage during stream aggregation by up to 30%. This commit is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5850 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5898 2024-03-02 00:42:26 +00:00			`b.Run(fmt.Sprintf("samplesPerPush_%d", samplesPerPush), func(b *testing.B) {`
			`benchmarkDedupAggr(b, samplesPerPush)`
			`})`
			`}`
			`}`

			`func benchmarkDedupAggr(b *testing.B, samplesPerPush int) {`
			`flushSamples := func(samples []pushSample) {`
			`Sink.Add(uint64(len(samples)))`
			`}`

			`const loops = 2`
			`benchSamples := newBenchSamples(samplesPerPush)`
			`da := newDedupAggr()`

			`b.ReportAllocs()`
			`b.SetBytes(int64(samplesPerPush * loops))`
			`b.RunParallel(func(pb *testing.PB) {`
			`for pb.Next() {`
			`for i := 0; i < loops; i++ {`
			`da.pushSamples(benchSamples)`
			`}`
			`da.flush(flushSamples)`
			`}`
			`})`
			`}`

			`func newBenchSamples(count int) []pushSample {`
			`samples := make([]pushSample, count)`
			`for i := range samples {`
			`sample := &samples[i]`
			`sample.key = fmt.Sprintf("key_%d", i)`
			`sample.value = float64(i)`
			`}`
			`return samples`
			`}`

			`var Sink atomic.Uint64`