From dff5008392fe08d967771461db9904692c4aaa81 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 25 Jun 2024 17:30:02 +0200 Subject: [PATCH] app/vlstorage: add -retention.maxDiskSpaceUsageBytes command-line flag for limiting the retention at VictoriaLogs by disk space usage --- app/vlstorage/main.go | 19 +++--- docs/VictoriaLogs/CHANGELOG.md | 2 + docs/VictoriaLogs/README.md | 26 ++++++++- lib/logstorage/storage.go | 102 +++++++++++++++++++++++++++++---- 4 files changed, 128 insertions(+), 21 deletions(-) diff --git a/app/vlstorage/main.go b/app/vlstorage/main.go index 6ea43a805..f20d66dfa 100644 --- a/app/vlstorage/main.go +++ b/app/vlstorage/main.go @@ -20,10 +20,12 @@ import ( var ( retentionPeriod = flagutil.NewDuration("retentionPeriod", "7d", "Log entries with timestamps older than now-retentionPeriod are automatically deleted; "+ "log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); "+ - "see https://docs.victoriametrics.com/victorialogs/#retention") + "see https://docs.victoriametrics.com/victorialogs/#retention ; see also -retention.maxDiskSpaceUsageBytes") + maxDiskSpaceUsageBytes = flagutil.NewBytes("retention.maxDiskSpaceUsageBytes", 0, "The maximum disk space usage at -storageDataPath before older per-day "+ + "partitions are automatically dropped; see https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage ; see also -retentionPeriod") futureRetention = flagutil.NewDuration("futureRetention", "2d", "Log entries with timestamps bigger than now+futureRetention are rejected during data ingestion; "+ "see https://docs.victoriametrics.com/victorialogs/#retention") - storageDataPath = flag.String("storageDataPath", "victoria-logs-data", "Path to directory with the VictoriaLogs data; "+ + storageDataPath = flag.String("storageDataPath", "victoria-logs-data", "Path to directory where to store VictoriaLogs data; "+ "see https://docs.victoriametrics.com/victorialogs/#storage") inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+ "The saved data survives unclean shutdowns such as OOM crash, hardware reset, SIGKILL, etc. "+ @@ -49,12 +51,13 @@ func Init() { logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s", retentionPeriod) } cfg := &logstorage.StorageConfig{ - Retention: retentionPeriod.Duration(), - FlushInterval: *inmemoryDataFlushInterval, - FutureRetention: futureRetention.Duration(), - LogNewStreams: *logNewStreams, - LogIngestedRows: *logIngestedRows, - MinFreeDiskSpaceBytes: minFreeDiskSpaceBytes.N, + Retention: retentionPeriod.Duration(), + MaxDiskSpaceUsageBytes: maxDiskSpaceUsageBytes.N, + FlushInterval: *inmemoryDataFlushInterval, + FutureRetention: futureRetention.Duration(), + LogNewStreams: *logNewStreams, + LogIngestedRows: *logIngestedRows, + MinFreeDiskSpaceBytes: minFreeDiskSpaceBytes.N, } logger.Infof("opening storage at -storageDataPath=%s", *storageDataPath) startTime := time.Now() diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 28a87ca89..6bcf6dabb 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -19,6 +19,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* FEATURE: add `-retention.maxDiskSpaceUsageBytes` command-line flag, which allows limiting disk space usage for [VictoriaLogs data](https://docs.victoriametrics.com/victorialogs/#storage) by automatic dropping the oldest per-day partitions if the storage disk space usage becomes bigger than the `-retention.maxDiskSpaceUsageBytes`. See [these docs](https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage). + ## [v0.23.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.23.0-victorialogs) Released at 2024-06-25 diff --git a/docs/VictoriaLogs/README.md b/docs/VictoriaLogs/README.md index 44f2d6c9b..d94a1a27a 100644 --- a/docs/VictoriaLogs/README.md +++ b/docs/VictoriaLogs/README.md @@ -77,6 +77,8 @@ For example, the following command starts VictoriaLogs with the retention of 8 w /path/to/victoria-logs -retentionPeriod=8w ``` +See also [retention by disk space usage](#retention-by-disk-space-usage). + VictoriaLogs stores the [ingested](https://docs.victoriametrics.com/victorialogs/data-ingestion/) logs in per-day partition directories. It automatically drops partition directories outside the configured retention. @@ -101,6 +103,23 @@ For example, the following command starts VictoriaLogs, which accepts logs with /path/to/victoria-logs -futureRetention=1y ``` +## Retention by disk space usage + +VictoriaLogs can be configured to automatically drop older per-day partitions if the total size of partitions at [`-storageDataPath` directory](#storage) +becomes bigger than the given threshold at `-retention.maxDiskSpaceUsageBytes` command-line flag. For example, the following command starts VictoriaLogs, +which drops old per-day partitions if the total [storage](#storage) size becomes bigger than `100GiB`: + +```sh +/path/to/victoria-logs -retention.maxDiskSpaceUsageBytes=100GiB +``` + +VictoriaLogs keeps at least two last days of data in order to guarantee that the logs for the last day can be returned in queries. +This means that the total disk space usage may exceed the `-retention.maxDiskSpaceUsageBytes` if the size of the last two days of data +exceeds the `-retention.maxDiskSpaceUsageBytes`. + +See also [retention](#retention). + + ## Storage VictoriaLogs stores all its data in a single directory - `victoria-logs-data`. The path to the directory can be changed via `-storageDataPath` command-line flag. @@ -263,8 +282,11 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. + -retention.maxDiskSpaceUsageBytes size + The maximum disk space usage at -storageDataPath before older per-day partitions are automatically dropped; see https://docs.victoriametrics.com/victorialogs/#retention-by-disk-space-usage ; see also -retentionPeriod + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0) -retentionPeriod value - Log entries with timestamps older than now-retentionPeriod are automatically deleted; log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); see https://docs.victoriametrics.com/victorialogs/#retention + Log entries with timestamps older than now-retentionPeriod are automatically deleted; log entries with timestamps outside the retention are also rejected during data ingestion; the minimum supported retention is 1d (one day); see https://docs.victoriametrics.com/victorialogs/#retention ; see also -retention.maxDiskSpaceUsageBytes The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 7d) -search.maxConcurrentRequests int The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration (default 16) @@ -276,7 +298,7 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line The minimum free disk space at -storageDataPath after which the storage stops accepting new data Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000) -storageDataPath string - Path to directory with the VictoriaLogs data; see https://docs.victoriametrics.com/victorialogs/#storage (default "victoria-logs-data") + Path to directory where to store VictoriaLogs data; see https://docs.victoriametrics.com/victorialogs/#storage (default "victoria-logs-data") -syslog.compressMethod.tcp array Compression method for syslog messages received at the corresponding -syslog.listenAddr.tcp. Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/ Supports an array of values separated by comma or specified via multiple flags. diff --git a/lib/logstorage/storage.go b/lib/logstorage/storage.go index 3307eb989..efa29c5f2 100644 --- a/lib/logstorage/storage.go +++ b/lib/logstorage/storage.go @@ -45,7 +45,12 @@ type StorageConfig struct { // Older data is automatically deleted. Retention time.Duration - // FlushInterval is the interval for flushing the in-memory data to disk at the Storage + // MaxDiskSpaceUsageBytes is an optional maximum disk space logs can use. + // + // The oldest per-day partitions are automatically dropped if the total disk space usage exceeds this limit. + MaxDiskSpaceUsageBytes int64 + + // FlushInterval is the interval for flushing the in-memory data to disk at the Storage. FlushInterval time.Duration // FutureRetention is the allowed retention from the current time to future for the ingested data. @@ -53,7 +58,8 @@ type StorageConfig struct { // Log entries with timestamps bigger than now+FutureRetention are ignored. FutureRetention time.Duration - // MinFreeDiskSpaceBytes is the minimum free disk space at storage path after which the storage stops accepting new data. + // MinFreeDiskSpaceBytes is the minimum free disk space at storage path after which the storage stops accepting new data + // and enters read-only mode. MinFreeDiskSpaceBytes int64 // LogNewStreams indicates whether to log newly created log streams. @@ -81,6 +87,11 @@ type Storage struct { // older data is automatically deleted retention time.Duration + // maxDiskSpaceUsageBytes is an optional maximum disk space logs can use. + // + // The oldest per-day partitions are automatically dropped if the total disk space usage exceeds this limit. + maxDiskSpaceUsageBytes int64 + // flushInterval is the interval for flushing in-memory data to disk flushInterval time.Duration @@ -247,15 +258,16 @@ func MustOpenStorage(path string, cfg *StorageConfig) *Storage { filterStreamCache := workingsetcache.New(mem / 10) s := &Storage{ - path: path, - retention: retention, - flushInterval: flushInterval, - futureRetention: futureRetention, - minFreeDiskSpaceBytes: minFreeDiskSpaceBytes, - logNewStreams: cfg.LogNewStreams, - logIngestedRows: cfg.LogIngestedRows, - flockF: flockF, - stopCh: make(chan struct{}), + path: path, + retention: retention, + maxDiskSpaceUsageBytes: cfg.MaxDiskSpaceUsageBytes, + flushInterval: flushInterval, + futureRetention: futureRetention, + minFreeDiskSpaceBytes: minFreeDiskSpaceBytes, + logNewStreams: cfg.LogNewStreams, + logIngestedRows: cfg.LogIngestedRows, + flockF: flockF, + stopCh: make(chan struct{}), streamIDCache: streamIDCache, streamTagsCache: streamTagsCache, @@ -305,6 +317,7 @@ func MustOpenStorage(path string, cfg *StorageConfig) *Storage { s.partitions = ptws s.runRetentionWatcher() + s.runMaxDiskSpaceUsageWatcher() return s } @@ -318,6 +331,17 @@ func (s *Storage) runRetentionWatcher() { }() } +func (s *Storage) runMaxDiskSpaceUsageWatcher() { + if s.maxDiskSpaceUsageBytes <= 0 { + return + } + s.wg.Add(1) + go func() { + s.watchMaxDiskSpaceUsage() + s.wg.Done() + }() +} + func (s *Storage) watchRetention() { d := timeutil.AddJitterToDuration(time.Hour) ticker := time.NewTicker(d) @@ -360,6 +384,62 @@ func (s *Storage) watchRetention() { } } +func (s *Storage) watchMaxDiskSpaceUsage() { + d := timeutil.AddJitterToDuration(10 * time.Second) + ticker := time.NewTicker(d) + defer ticker.Stop() + for { + s.partitionsLock.Lock() + var n uint64 + ptws := s.partitions + var ptwsToDelete []*partitionWrapper + for i := len(ptws) - 1; i >= 0; i-- { + ptw := ptws[i] + var ps PartitionStats + ptw.pt.updateStats(&ps) + n += ps.IndexdbSizeBytes + ps.CompressedSmallPartSize + ps.CompressedBigPartSize + if n <= uint64(s.maxDiskSpaceUsageBytes) { + continue + } + if i >= len(ptws)-2 { + // Keep the last two per-day partitions, so logs could be queried for one day time range. + continue + } + + // ptws are sorted by time, so just drop all the partitions until i, including i. + i++ + ptwsToDelete = ptws[:i] + s.partitions = ptws[i:] + + // Remove reference to deleted partitions from s.ptwHot + for _, ptw := range ptwsToDelete { + if ptw == s.ptwHot { + s.ptwHot = nil + break + } + } + + break + } + s.partitionsLock.Unlock() + + for i, ptw := range ptwsToDelete { + logger.Infof("the partition %s is scheduled to be deleted because the total size of partitions exceeds -retention.maxDiskSpaceUsageBytes=%d", + ptw.pt.path, s.maxDiskSpaceUsageBytes) + ptw.mustDrop.Store(true) + ptw.decRef() + + ptwsToDelete[i] = nil + } + + select { + case <-s.stopCh: + return + case <-ticker.C: + } + } +} + func (s *Storage) getMinAllowedDay() int64 { return time.Now().UTC().Add(-s.retention).UnixNano() / nsecPerDay }