From 56d6b8ed0ae807a9c41ed5e32ea7b243fa24b2d1 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin <valyala@gmail.com> Date: Tue, 4 Feb 2020 18:40:58 +0200 Subject: [PATCH] lib/storage: do not deduplicate blocks with less than 32 samples during merge This should improve deduplication accuracy for blocks with higher number of samples. --- lib/storage/dedup.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/storage/dedup.go b/lib/storage/dedup.go index aa78665dbd..1d0765f072 100644 --- a/lib/storage/dedup.go +++ b/lib/storage/dedup.go @@ -57,6 +57,11 @@ func deduplicateSamplesDuringMerge(srcTimestamps []int64, srcValues []int64) ([] if *minScrapeInterval <= 0 { return srcTimestamps, srcValues } + if len(srcTimestamps) < 32 { + // Do not de-duplicate small number of samples during merge + // in order to improve deduplication accuracy on later stages. + return srcTimestamps, srcValues + } minDelta := getMinDelta() if !needsDedup(srcTimestamps, minDelta) { // Fast path - nothing to deduplicate