From 56d6b8ed0ae807a9c41ed5e32ea7b243fa24b2d1 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@gmail.com>
Date: Tue, 4 Feb 2020 18:40:58 +0200
Subject: [PATCH] lib/storage: do not deduplicate blocks with less than 32
 samples during merge

This should improve deduplication accuracy for blocks with higher number of samples.
---
 lib/storage/dedup.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/storage/dedup.go b/lib/storage/dedup.go
index aa78665dbd..1d0765f072 100644
--- a/lib/storage/dedup.go
+++ b/lib/storage/dedup.go
@@ -57,6 +57,11 @@ func deduplicateSamplesDuringMerge(srcTimestamps []int64, srcValues []int64) ([]
 	if *minScrapeInterval <= 0 {
 		return srcTimestamps, srcValues
 	}
+	if len(srcTimestamps) < 32 {
+		// Do not de-duplicate small number of samples during merge
+		// in order to improve deduplication accuracy on later stages.
+		return srcTimestamps, srcValues
+	}
 	minDelta := getMinDelta()
 	if !needsDedup(srcTimestamps, minDelta) {
 		// Fast path - nothing to deduplicate