diff --git a/README.md b/README.md index 980b73330..de41a4a30 100644 --- a/README.md +++ b/README.md @@ -1767,6 +1767,9 @@ This aligns with the [staleness rules in Prometheus](https://prometheus.io/docs/ If multiple raw samples have **the same timestamp** on the given `-dedup.minScrapeInterval` discrete interval, then the sample with **the biggest value** is kept. +Prometheus stale markers are respected as any other value. If raw sample with the biggest timestamp on `-dedup.minScrapeInterval` +has a stale marker as a value - it will be kept after the deduplication. + Please note, [labels](https://docs.victoriametrics.com/keyConcepts.html#labels) of raw samples should be identical in order to be deduplicated. For example, this is why [HA pair of vmagents](https://docs.victoriametrics.com/vmagent.html#high-availability) needs to be identically configured. diff --git a/lib/storage/dedup.go b/lib/storage/dedup.go index 9f8e45369..dcdefbfd8 100644 --- a/lib/storage/dedup.go +++ b/lib/storage/dedup.go @@ -25,6 +25,8 @@ func isDedupEnabled() bool { } // DeduplicateSamples removes samples from src* if they are closer to each other than dedupInterval in milliseconds. +// DeduplicateSamples treats StaleNaN (Prometheus stale markers) as values and doesn't skip them on purpose - see +// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5587 func DeduplicateSamples(srcTimestamps []int64, srcValues []float64, dedupInterval int64) ([]int64, []float64) { if !needsDedup(srcTimestamps, dedupInterval) { // Fast path - nothing to deduplicate