VictoriaMetrics/lib/storage/dedup_test.go
Roman Khavronenko 7c0ae3a86a
lib/storage: keep sample with the biggest value on timestamp conflict (#3421)
The change leaves raw sample with the biggest value for identical
timestamps per each `-dedup.minScrapeInterval` discrete interval
when the deduplication is enabled.

```
benchstat old.txt new.txt
name                                         old time/op    new time/op    delta
DeduplicateSamples/minScrapeInterval=1s-10      817ns ± 2%     832ns ± 3%      ~     (p=0.052 n=10+10)
DeduplicateSamples/minScrapeInterval=2s-10     1.56µs ± 1%    2.12µs ± 0%   +35.19%  (p=0.000 n=9+7)
DeduplicateSamples/minScrapeInterval=5s-10     1.32µs ± 3%    1.65µs ± 2%   +25.57%  (p=0.000 n=10+10)
DeduplicateSamples/minScrapeInterval=10s-10    1.13µs ± 2%    1.50µs ± 1%   +32.85%  (p=0.000 n=10+10)

name                                         old speed      new speed      delta
DeduplicateSamples/minScrapeInterval=1s-10   10.0GB/s ± 2%   9.9GB/s ± 3%      ~     (p=0.052 n=10+10)
DeduplicateSamples/minScrapeInterval=2s-10   5.24GB/s ± 1%  3.87GB/s ± 0%   -26.03%  (p=0.000 n=9+7)
DeduplicateSamples/minScrapeInterval=5s-10   6.22GB/s ± 3%  4.96GB/s ± 2%   -20.37%  (p=0.000 n=10+10)
DeduplicateSamples/minScrapeInterval=10s-10  7.28GB/s ± 2%  5.48GB/s ± 1%   -24.74%  (p=0.000 n=10+10)
```

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333
Signed-off-by: hagen1778 <roman@victoriametrics.com>

Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-12-08 18:06:11 -08:00

153 lines
6.1 KiB
Go

package storage
import (
"reflect"
"testing"
"time"
)
func TestNeedsDedup(t *testing.T) {
f := func(interval int64, timestamps []int64, expectedResult bool) {
t.Helper()
result := needsDedup(timestamps, interval)
if result != expectedResult {
t.Fatalf("unexpected result for needsDedup(%d, %d); got %v; want %v", timestamps, interval, result, expectedResult)
}
}
f(-1, nil, false)
f(-1, []int64{1}, false)
f(0, []int64{1, 2}, false)
f(10, []int64{1}, false)
f(10, []int64{1, 2}, true)
f(10, []int64{9, 11}, false)
f(10, []int64{10, 11}, false)
f(10, []int64{0, 10, 11}, false)
f(10, []int64{9, 10}, true)
f(10, []int64{0, 10, 19}, false)
f(10, []int64{9, 19}, false)
f(10, []int64{0, 11, 19}, true)
f(10, []int64{0, 11, 20}, true)
f(10, []int64{0, 11, 21}, false)
f(10, []int64{0, 19}, false)
f(10, []int64{0, 30, 40}, false)
f(10, []int64{0, 31, 40}, true)
f(10, []int64{0, 31, 41}, false)
f(10, []int64{0, 31, 49}, false)
}
func TestDeduplicateSamples(t *testing.T) {
// Disable deduplication before exit, since the rest of tests expect disabled dedup.
f := func(scrapeInterval time.Duration, timestamps, timestampsExpected []int64, values, valuesExpected []float64) {
t.Helper()
timestampsCopy := make([]int64, len(timestamps))
copy(timestampsCopy, timestamps)
dedupInterval := scrapeInterval.Milliseconds()
timestampsCopy, values = DeduplicateSamples(timestampsCopy, values, dedupInterval)
if !reflect.DeepEqual(timestampsCopy, timestampsExpected) {
t.Fatalf("invalid DeduplicateSamples(%v) timestamps;\ngot\n%v\nwant\n%v", timestamps, timestampsCopy, timestampsExpected)
}
if !reflect.DeepEqual(values, valuesExpected) {
t.Fatalf("invalid DeduplicateSamples(%v) values;\ngot\n%v\nwant\n%v", timestamps, values, valuesExpected)
}
// Verify that the second call to DeduplicateSamples doesn't modify samples.
valuesCopy := append([]float64{}, values...)
timestampsCopy, valuesCopy = DeduplicateSamples(timestampsCopy, valuesCopy, dedupInterval)
if !reflect.DeepEqual(timestampsCopy, timestampsExpected) {
t.Fatalf("invalid DeduplicateSamples(%v) timestamps for the second call;\ngot\n%v\nwant\n%v", timestamps, timestampsCopy, timestampsExpected)
}
if !reflect.DeepEqual(valuesCopy, values) {
t.Fatalf("invalid DeduplicateSamples(%v) values for the second call;\ngot\n%v\nwant\n%v", timestamps, values, valuesCopy)
}
}
f(time.Millisecond, nil, []int64{}, []float64{}, []float64{})
f(time.Millisecond, []int64{123}, []int64{123}, []float64{0}, []float64{0})
f(time.Millisecond, []int64{123, 456}, []int64{123, 456}, []float64{0, 1}, []float64{0, 1})
// pick the biggest value on the interval, no matter what order is
f(time.Millisecond,
[]int64{0, 0, 0, 1, 1, 2, 3, 3, 3, 4},
[]int64{0, 1, 2, 3, 4},
[]float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
[]float64{2, 4, 5, 8, 9})
f(time.Millisecond,
[]int64{0, 0, 0, 1, 1, 2, 3, 3, 3, 4},
[]int64{0, 1, 2, 3, 4},
[]float64{2, 1, 0, 3, 4, 5, 7, 6, 8, 9},
[]float64{2, 4, 5, 8, 9})
f(time.Millisecond,
[]int64{0, 0, 0, 1, 1, 2, 3, 3, 3, 4},
[]int64{0, 1, 2, 3, 4},
[]float64{1, 2, 0, 4, 3, 5, 8, 6, 7, 9},
[]float64{2, 4, 5, 8, 9})
// descending values
f(time.Millisecond,
[]int64{0, 0, 0, 1, 1, 2, 3, 3, 3, 4},
[]int64{0, 1, 2, 3, 4},
[]float64{9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
[]float64{9, 6, 4, 3, 0})
f(10*time.Millisecond,
[]int64{0, 9, 11, 13, 13, 29, 29, 29},
[]int64{0, 9, 13, 29},
[]float64{5, 1, 0, 4, 1, 3, 0, 5},
[]float64{5, 1, 4, 5})
// too small dedup interval
f(0,
[]int64{0, 0, 0, 1, 1, 2, 3, 3, 3, 4},
[]int64{0, 0, 0, 1, 1, 2, 3, 3, 3, 4},
[]float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
[]float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
f(100*time.Millisecond,
[]int64{0, 100, 100, 101, 150, 180, 205, 300, 1000},
[]int64{0, 100, 180, 300, 1000},
[]float64{0, 1, 2, 3, 4, 5, 6, 7, 8},
[]float64{0, 2, 5, 7, 8})
f(10*time.Second,
[]int64{10e3, 13e3, 21e3, 22e3, 30e3, 33e3, 39e3, 45e3},
[]int64{10e3, 13e3, 30e3, 39e3, 45e3},
[]float64{0, 1, 2, 3, 4, 5, 6, 7},
[]float64{0, 1, 4, 6, 7})
}
func TestDeduplicateSamplesDuringMerge(t *testing.T) {
// Disable deduplication before exit, since the rest of tests expect disabled dedup.
f := func(scrapeInterval time.Duration, timestamps, timestampsExpected, valuesExpected []int64) {
t.Helper()
timestampsCopy := make([]int64, len(timestamps))
values := make([]int64, len(timestamps))
for i, ts := range timestamps {
timestampsCopy[i] = ts
values[i] = int64(i)
}
dedupInterval := scrapeInterval.Milliseconds()
timestampsCopy, values = deduplicateSamplesDuringMerge(timestampsCopy, values, dedupInterval)
if !reflect.DeepEqual(timestampsCopy, timestampsExpected) {
t.Fatalf("invalid deduplicateSamplesDuringMerge(%v) timestamps;\ngot\n%v\nwant\n%v", timestamps, timestampsCopy, timestampsExpected)
}
if !reflect.DeepEqual(values, valuesExpected) {
t.Fatalf("invalid DeduplicateSamples(%v) values;\ngot\n%v\nwant\n%v", timestamps, values, valuesExpected)
}
// Verify that the second call to DeduplicateSamples doesn't modify samples.
valuesCopy := append([]int64{}, values...)
timestampsCopy, valuesCopy = deduplicateSamplesDuringMerge(timestampsCopy, valuesCopy, dedupInterval)
if !reflect.DeepEqual(timestampsCopy, timestampsExpected) {
t.Fatalf("invalid deduplicateSamplesDuringMerge(%v) timestamps for the second call;\ngot\n%v\nwant\n%v", timestamps, timestampsCopy, timestampsExpected)
}
if !reflect.DeepEqual(valuesCopy, values) {
t.Fatalf("invalid deduplicateSamplesDuringMerge(%v) values for the second call;\ngot\n%v\nwant\n%v", timestamps, values, valuesCopy)
}
}
f(time.Millisecond, nil, []int64{}, []int64{})
f(time.Millisecond, []int64{123}, []int64{123}, []int64{0})
f(time.Millisecond, []int64{123, 456}, []int64{123, 456}, []int64{0, 1})
f(time.Millisecond, []int64{0, 0, 0, 1, 1, 2, 3, 3, 3, 4}, []int64{0, 1, 2, 3, 4}, []int64{2, 4, 5, 8, 9})
f(100*time.Millisecond, []int64{0, 100, 100, 101, 150, 180, 200, 300, 1000}, []int64{0, 100, 200, 300, 1000}, []int64{0, 2, 6, 7, 8})
f(10*time.Second, []int64{10e3, 13e3, 21e3, 22e3, 30e3, 33e3, 39e3, 45e3}, []int64{10e3, 13e3, 30e3, 39e3, 45e3}, []int64{0, 1, 4, 6, 7})
}