From 4878152678d684debb9bed6e7f53c62f0101d6e2 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 15 Jul 2024 10:08:12 +0200 Subject: [PATCH] lib/{storage,mergeset}: do not allow setting dataFlushInterval to values smaller than pending{Items,Rows}FlushInterval Pending rows and items unconditionally remain in memory for up to pending{Items,Rows}FlushInterval, so there is no any sense in setting dataFlushInterval (the interval for guaranteed flush of in-memory data to disk) to values smaller than pending{Items,Rows}FlushInterval, since this doesn't affect the interval for flushing pending rows and items from memory to disk. This is a follow-up for 4c80b170273392ac2e2364cbc78223d7aba0cb8a Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6221 --- lib/mergeset/table.go | 11 +++++++---- lib/storage/partition.go | 13 ++++++++----- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/mergeset/table.go b/lib/mergeset/table.go index 69911bddf9..16d4167224 100644 --- a/lib/mergeset/table.go +++ b/lib/mergeset/table.go @@ -47,8 +47,7 @@ const maxPartSize = 400e9 // The interval for flushing buffered data to parts, so it becomes visible to search. const pendingItemsFlushInterval = time.Second -// The interval for guaranteed flush of recently ingested data from memory to on-disk parts, -// so they survive process crash. +// The interval for guaranteed flush of recently ingested data from memory to on-disk parts so they survive process crash. var dataFlushInterval = 5 * time.Second // SetDataFlushInterval sets the interval for guaranteed flush of recently ingested data from memory to disk. @@ -57,9 +56,13 @@ var dataFlushInterval = 5 * time.Second // // This function must be called before initializing the indexdb. func SetDataFlushInterval(d time.Duration) { - if d >= time.Second { - dataFlushInterval = d + if d < pendingItemsFlushInterval { + // There is no sense in setting dataFlushInterval to values smaller than pendingItemsFlushInterval, + // since pending rows unconditionally remain in memory for up to pendingItemsFlushInterval. + d = pendingItemsFlushInterval } + + dataFlushInterval = d } // maxItemsPerCachedPart is the maximum items per created part by the merge, diff --git a/lib/storage/partition.go b/lib/storage/partition.go index f58710a6c5..ab70c37914 100644 --- a/lib/storage/partition.go +++ b/lib/storage/partition.go @@ -48,8 +48,7 @@ var rawRowsShardsPerPartition = cgroup.AvailableCPUs() // The interval for flushing buffered rows into parts, so they become visible to search. const pendingRowsFlushInterval = 2 * time.Second -// The interval for guaranteed flush of recently ingested data from memory to on-disk parts, -// so they survive process crash. +// The interval for guaranteed flush of recently ingested data from memory to on-disk parts, so they survive process crash. var dataFlushInterval = 5 * time.Second // SetDataFlushInterval sets the interval for guaranteed flush of recently ingested data from memory to disk. @@ -58,10 +57,14 @@ var dataFlushInterval = 5 * time.Second // // This function must be called before initializing the storage. func SetDataFlushInterval(d time.Duration) { - if d >= time.Second { - dataFlushInterval = d - mergeset.SetDataFlushInterval(d) + if d < pendingRowsFlushInterval { + // There is no sense in setting dataFlushInterval to values smaller than pendingRowsFlushInterval, + // since pending rows unconditionally remain in memory for up to pendingRowsFlushInterval. + d = pendingRowsFlushInterval } + + dataFlushInterval = d + mergeset.SetDataFlushInterval(d) } // The maximum number of rawRow items in rawRowsShard.