diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go index f7c1daca5..f97075208 100644 --- a/app/vmselect/netstorage/netstorage.go +++ b/app/vmselect/netstorage/netstorage.go @@ -508,18 +508,17 @@ func mergeSortBlocks(dst *Result, sbh sortBlocksHeap, dedupInterval int64) { } sbNext := sbh.getNextBlock() tsNext := sbNext.Timestamps[sbNext.NextIdx] - topTimestamps := top.Timestamps topNextIdx := top.NextIdx - if n := equalTimestampsPrefix(topTimestamps[topNextIdx:], sbNext.Timestamps[sbNext.NextIdx:]); n > 0 && dedupInterval > 0 { + if n := equalSamplesPrefix(top, sbNext); n > 0 && dedupInterval > 0 { // Skip n replicated samples at top if deduplication is enabled. top.NextIdx = topNextIdx + n } else { // Copy samples from top to dst with timestamps not exceeding tsNext. - top.NextIdx = topNextIdx + binarySearchTimestamps(topTimestamps[topNextIdx:], tsNext) - dst.Timestamps = append(dst.Timestamps, topTimestamps[topNextIdx:top.NextIdx]...) + top.NextIdx = topNextIdx + binarySearchTimestamps(top.Timestamps[topNextIdx:], tsNext) + dst.Timestamps = append(dst.Timestamps, top.Timestamps[topNextIdx:top.NextIdx]...) dst.Values = append(dst.Values, top.Values[topNextIdx:top.NextIdx]...) } - if top.NextIdx < len(topTimestamps) { + if top.NextIdx < len(top.Timestamps) { heap.Fix(&sbh, 0) } else { heap.Pop(&sbh) @@ -535,6 +534,14 @@ func mergeSortBlocks(dst *Result, sbh sortBlocksHeap, dedupInterval int64) { var dedupsDuringSelect = metrics.NewCounter(`vm_deduplicated_samples_total{type="select"}`) +func equalSamplesPrefix(a, b *sortBlock) int { + n := equalTimestampsPrefix(a.Timestamps[a.NextIdx:], b.Timestamps[b.NextIdx:]) + if n == 0 { + return 0 + } + return equalValuesPrefix(a.Values[a.NextIdx:a.NextIdx+n], b.Values[b.NextIdx:b.NextIdx+n]) +} + func equalTimestampsPrefix(a, b []int64) int { for i, v := range a { if i >= len(b) || v != b[i] { @@ -544,6 +551,15 @@ func equalTimestampsPrefix(a, b []int64) int { return len(a) } +func equalValuesPrefix(a, b []float64) int { + for i, v := range a { + if i >= len(b) || v != b[i] { + return i + } + } + return len(a) +} + func binarySearchTimestamps(timestamps []int64, ts int64) int { // The code has been adapted from sort.Search. n := len(timestamps) diff --git a/app/vmselect/netstorage/netstorage_test.go b/app/vmselect/netstorage/netstorage_test.go index 4b20553be..a6ea47892 100644 --- a/app/vmselect/netstorage/netstorage_test.go +++ b/app/vmselect/netstorage/netstorage_test.go @@ -102,21 +102,35 @@ func TestMergeSortBlocks(t *testing.T) { Values: []float64{9, 4.2, 2.1, 5.2, 42, 6.1}, }) - // Multiple blocks with identical timestamps. + // Multiple blocks with identical timestamps and identical values. f([]*sortBlock{ + { + Timestamps: []int64{1, 2, 4, 5}, + Values: []float64{9, 5.2, 6.1, 9}, + }, { Timestamps: []int64{1, 2, 4}, Values: []float64{9, 5.2, 6.1}, }, + }, 1, &Result{ + Timestamps: []int64{1, 2, 4, 5}, + Values: []float64{9, 5.2, 6.1, 9}, + }) + + // Multiple blocks with identical timestamps. + f([]*sortBlock{ + { + Timestamps: []int64{1, 2, 4, 5}, + Values: []float64{9, 5.2, 6.1, 9}, + }, { Timestamps: []int64{1, 2, 4}, Values: []float64{4.2, 2.1, 42}, }, }, 1, &Result{ - Timestamps: []int64{1, 2, 4}, - Values: []float64{4.2, 2.1, 42}, + Timestamps: []int64{1, 2, 4, 5}, + Values: []float64{9, 5.2, 42, 9}, }) - // Multiple blocks with identical timestamps, disabled deduplication. f([]*sortBlock{ { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f5f27a701..e80e4c591 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -45,6 +45,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): [dockerswarm_sd_configs](https://docs.victoriametrics.com/sd_configs.html#dockerswarm_sd_configs): properly encode `filters` field. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3579). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): fix possible resource leak after hot reload of the updated [consul_sd_configs](https://docs.victoriametrics.com/sd_configs.html#consul_sd_configs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3468). * BUGFIX: properly return label names starting from uppercase such as `CamelCaseLabel` from [/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3566). +* BUGFIX: consistently select the sample with the biggest value out of samples with identical timestamps during querying when the [deduplication](https://docs.victoriametrics.com/#deduplication) is enabled according to [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333). Previously random samples could be selected during querying. ## [v1.85.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.85.3)