From 1f9d605988130063f345fb357100ca228974817b Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 9 Jan 2023 12:57:43 -0800 Subject: [PATCH] app/vmselect/netstorage: consistently select the sample with the biggest value out of samples with identical timestamps Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333 This fix is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3620 , but doesn't slow down the common case with merging replicated data blocks so significantly. Benchmark results: Before the change: BenchmarkMergeSortBlocks/replicationFactor-1-4 13968 85643 ns/op 956.53 MB/s 1700 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-2-4 10806 109171 ns/op 1500.77 MB/s 2191 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-3-4 8887 130623 ns/op 1881.45 MB/s 2660 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-4-4 7440 157348 ns/op 2082.52 MB/s 3174 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-5-4 6534 184473 ns/op 2220.38 MB/s 3612 B/op 1 allocs/op BenchmarkMergeSortBlocks/overlapped-blocks-bestcase-4 13419 85205 ns/op 961.44 MB/s 2213 B/op 1 allocs/op BenchmarkMergeSortBlocks/overlapped-blocks-worstcase-4 579 1894900 ns/op 43.23 MB/s 46760 B/op 1 allocs/op After the change: BenchmarkMergeSortBlocks/replicationFactor-1-4 13832 85298 ns/op 960.40 MB/s 1716 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-2-4 8833 134222 ns/op 1220.66 MB/s 2675 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-3-4 6487 184830 ns/op 1329.65 MB/s 3636 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-4-4 4977 236318 ns/op 1386.61 MB/s 4733 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-5-4 4088 296734 ns/op 1380.36 MB/s 5761 B/op 1 allocs/op BenchmarkMergeSortBlocks/overlapped-blocks-bestcase-4 14083 84067 ns/op 974.47 MB/s 2110 B/op 1 allocs/op BenchmarkMergeSortBlocks/overlapped-blocks-worstcase-4 536 2043534 ns/op 40.09 MB/s 50511 B/op 1 allocs/op --- app/vmselect/netstorage/netstorage.go | 26 +++++++++++++++++----- app/vmselect/netstorage/netstorage_test.go | 22 ++++++++++++++---- docs/CHANGELOG.md | 1 + 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go index f7c1daca5..f97075208 100644 --- a/app/vmselect/netstorage/netstorage.go +++ b/app/vmselect/netstorage/netstorage.go @@ -508,18 +508,17 @@ func mergeSortBlocks(dst *Result, sbh sortBlocksHeap, dedupInterval int64) { } sbNext := sbh.getNextBlock() tsNext := sbNext.Timestamps[sbNext.NextIdx] - topTimestamps := top.Timestamps topNextIdx := top.NextIdx - if n := equalTimestampsPrefix(topTimestamps[topNextIdx:], sbNext.Timestamps[sbNext.NextIdx:]); n > 0 && dedupInterval > 0 { + if n := equalSamplesPrefix(top, sbNext); n > 0 && dedupInterval > 0 { // Skip n replicated samples at top if deduplication is enabled. top.NextIdx = topNextIdx + n } else { // Copy samples from top to dst with timestamps not exceeding tsNext. - top.NextIdx = topNextIdx + binarySearchTimestamps(topTimestamps[topNextIdx:], tsNext) - dst.Timestamps = append(dst.Timestamps, topTimestamps[topNextIdx:top.NextIdx]...) + top.NextIdx = topNextIdx + binarySearchTimestamps(top.Timestamps[topNextIdx:], tsNext) + dst.Timestamps = append(dst.Timestamps, top.Timestamps[topNextIdx:top.NextIdx]...) dst.Values = append(dst.Values, top.Values[topNextIdx:top.NextIdx]...) } - if top.NextIdx < len(topTimestamps) { + if top.NextIdx < len(top.Timestamps) { heap.Fix(&sbh, 0) } else { heap.Pop(&sbh) @@ -535,6 +534,14 @@ func mergeSortBlocks(dst *Result, sbh sortBlocksHeap, dedupInterval int64) { var dedupsDuringSelect = metrics.NewCounter(`vm_deduplicated_samples_total{type="select"}`) +func equalSamplesPrefix(a, b *sortBlock) int { + n := equalTimestampsPrefix(a.Timestamps[a.NextIdx:], b.Timestamps[b.NextIdx:]) + if n == 0 { + return 0 + } + return equalValuesPrefix(a.Values[a.NextIdx:a.NextIdx+n], b.Values[b.NextIdx:b.NextIdx+n]) +} + func equalTimestampsPrefix(a, b []int64) int { for i, v := range a { if i >= len(b) || v != b[i] { @@ -544,6 +551,15 @@ func equalTimestampsPrefix(a, b []int64) int { return len(a) } +func equalValuesPrefix(a, b []float64) int { + for i, v := range a { + if i >= len(b) || v != b[i] { + return i + } + } + return len(a) +} + func binarySearchTimestamps(timestamps []int64, ts int64) int { // The code has been adapted from sort.Search. n := len(timestamps) diff --git a/app/vmselect/netstorage/netstorage_test.go b/app/vmselect/netstorage/netstorage_test.go index 4b20553be..a6ea47892 100644 --- a/app/vmselect/netstorage/netstorage_test.go +++ b/app/vmselect/netstorage/netstorage_test.go @@ -102,21 +102,35 @@ func TestMergeSortBlocks(t *testing.T) { Values: []float64{9, 4.2, 2.1, 5.2, 42, 6.1}, }) - // Multiple blocks with identical timestamps. + // Multiple blocks with identical timestamps and identical values. f([]*sortBlock{ + { + Timestamps: []int64{1, 2, 4, 5}, + Values: []float64{9, 5.2, 6.1, 9}, + }, { Timestamps: []int64{1, 2, 4}, Values: []float64{9, 5.2, 6.1}, }, + }, 1, &Result{ + Timestamps: []int64{1, 2, 4, 5}, + Values: []float64{9, 5.2, 6.1, 9}, + }) + + // Multiple blocks with identical timestamps. + f([]*sortBlock{ + { + Timestamps: []int64{1, 2, 4, 5}, + Values: []float64{9, 5.2, 6.1, 9}, + }, { Timestamps: []int64{1, 2, 4}, Values: []float64{4.2, 2.1, 42}, }, }, 1, &Result{ - Timestamps: []int64{1, 2, 4}, - Values: []float64{4.2, 2.1, 42}, + Timestamps: []int64{1, 2, 4, 5}, + Values: []float64{9, 5.2, 42, 9}, }) - // Multiple blocks with identical timestamps, disabled deduplication. f([]*sortBlock{ { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f5f27a701..e80e4c591 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -45,6 +45,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): [dockerswarm_sd_configs](https://docs.victoriametrics.com/sd_configs.html#dockerswarm_sd_configs): properly encode `filters` field. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3579). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): fix possible resource leak after hot reload of the updated [consul_sd_configs](https://docs.victoriametrics.com/sd_configs.html#consul_sd_configs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3468). * BUGFIX: properly return label names starting from uppercase such as `CamelCaseLabel` from [/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3566). +* BUGFIX: consistently select the sample with the biggest value out of samples with identical timestamps during querying when the [deduplication](https://docs.victoriametrics.com/#deduplication) is enabled according to [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333). Previously random samples could be selected during querying. ## [v1.85.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.85.3)