mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib: allow to configure cache size by type (#2206)
* lib: allow to configure cache size by type https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1940 Signed-off-by: hagen1778 <roman@victoriametrics.com> * Apply suggestions from code review * wip Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
parent
4606d00d7d
commit
bd7837d524
6 changed files with 113 additions and 3 deletions
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
|
@ -48,6 +49,10 @@ var (
|
|||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries")
|
||||
|
||||
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data")
|
||||
|
||||
cacheSizeStorageTSID = flagutil.NewBytes("storage.cacheSizeStorageTSID", 0, "Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning")
|
||||
cacheSizeIndexDBIndexBlocks = flagutil.NewBytes("storage.cacheSizeIndexDBIndexBlocks", 0, "Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning")
|
||||
cacheSizeIndexDBDataBlocks = flagutil.NewBytes("storage.cacheSizeIndexDBDataBlocks", 0, "Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning")
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
@ -64,6 +69,9 @@ func main() {
|
|||
storage.SetBigMergeWorkersCount(*bigMergeConcurrency)
|
||||
storage.SetSmallMergeWorkersCount(*smallMergeConcurrency)
|
||||
storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N)
|
||||
storage.SetTSIDCacheSize(cacheSizeStorageTSID.N)
|
||||
mergeset.SetIndexBlocksCacheSize(cacheSizeIndexDBIndexBlocks.N)
|
||||
mergeset.SetDataBlocksCacheSize(cacheSizeIndexDBDataBlocks.N)
|
||||
|
||||
logger.Infof("opening storage at %q with -retentionPeriod=%s", *storageDataPath, retentionPeriod)
|
||||
startTime := time.Now()
|
||||
|
|
|
@ -14,9 +14,15 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
|||
|
||||
## tip
|
||||
|
||||
* FEATURE: allow overriding default limits for the following in-memory caches, which usually occupy the most memory:
|
||||
* `storage/tsid` - the cache speeds up lookups of internal metric ids by `metric_name{labels...}` during data ingestion. The size for this cache can be tuned with `-storage.cacheSizeStorageTSID` command-line flag.
|
||||
* `indexdb/dataBlocks` - the cache speeds up data lookups in `<-storageDataPath>/indexdb` files. The size for this cache can be tuned with `-storage.cacheSizeIndexDBDataBlocks` command-line flag.
|
||||
* `indexdb/indexBlocks` - the cache speeds up index lookups in `<-storageDataPath>/indexdb` files. The size for this cache can be tuned with `-storage.cacheSizeIndexDBIndexBlocks` command-line flag.
|
||||
See also [cache tuning docs](https://docs.victoriametrics.com/#cache-tuning). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1940).
|
||||
* FEATURE: add `-influxDBLabel` command-line flag for overriding `db` label name for the data [imported into VictoriaMetrics via InfluxDB line protocol](https://docs.victoriametrics.com/#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf). Thanks to @johnatannvmd for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2203).
|
||||
* FEATURE: return `X-Influxdb-Version` HTTP header in responses to [InfluxDB write requests](https://docs.victoriametrics.com/#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf). This is needed for some InfluxDB clients. See, for example, [this comment](https://github.com/ntop/ntopng/issues/5449#issuecomment-1005347597).
|
||||
|
||||
* BUGFIX: reduce memory usage during the first three hours after the upgrade from versions older than v1.73.0. The memory usage spike was related to the need of in-memory caches' re-population after the upgrade because of the fix for [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401). Now cache size limits are reduced in order to occupy less memory during the upgrade.
|
||||
* BUGFIX: fix a bug, which could significantly slow down requests to `/api/v1/labels` and `/api/v1/label/<label_name>/values`. These APIs are used by Grafana for auto-completion of label names and label values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2200).
|
||||
* BUGFIX: vmalert: add support for `$externalLabels` and `$externalURL` template vars in the same way as Prometheus does. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2193).
|
||||
* BUGFIX: update default value for `-promscrape.fileSDCheckInterval`, so it matches default duration used by Prometheus for checking for updates in `file_sd_configs`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2187). Thanks to @corporate-gadfly for the fix.
|
||||
|
|
|
@ -1428,6 +1428,31 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet
|
|||
VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown (e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup. Sometimes it is needed to remove such caches on the next startup. This can be performed by placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details.
|
||||
|
||||
|
||||
## Cache tuning
|
||||
|
||||
VictoriaMetrics uses various in-memory caches for faster data ingestion and query performance.
|
||||
The following metrics for each type of cache are exported at [`/metrics` page](#monitoring):
|
||||
- `vm_cache_size_bytes` - the actual cache size
|
||||
- `vm_cache_size_max_bytes` - cache size limit
|
||||
- `vm_cache_requests_total` - the number of requests to the cache
|
||||
- `vm_cache_misses_total` - the number of cache misses
|
||||
- `vm_cache_entries` - the number of entries in the cache
|
||||
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176)
|
||||
contain `Caches` section with cache metrics visualized. The panels show the current
|
||||
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
|
||||
then cache efficiency is already very high and does not need any tuning.
|
||||
The panel `Cache usage %` in `Troubleshooting` section shows the percentage of used cache size
|
||||
from the allowed size by type. If the percentage is below 100%, then no further tuning needed.
|
||||
|
||||
Please note, default cache sizes were carefully adjusted accordingly to the most
|
||||
practical scenarios and workloads. Change the defaults only if you understand the implications.
|
||||
|
||||
To override the default values see command-line flags with `-storage.cacheSize` prefix.
|
||||
See the full description of flags [here](#list-of-command-line-flags).
|
||||
|
||||
|
||||
## Data migration
|
||||
|
||||
Use [vmctl](https://docs.victoriametrics.com/vmctl.html) for data migration. It supports the following data migration types:
|
||||
|
@ -1898,6 +1923,15 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
authKey, which must be passed in query string to /snapshot* pages
|
||||
-sortLabels
|
||||
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-storage.cacheSizeIndexDBDataBlocks size
|
||||
Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.cacheSizeIndexDBIndexBlocks size
|
||||
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.cacheSizeStorageTSID size
|
||||
Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.maxDailySeries int
|
||||
The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries
|
||||
-storage.maxHourlySeries int
|
||||
|
|
|
@ -1432,6 +1432,31 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet
|
|||
VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown (e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup. Sometimes it is needed to remove such caches on the next startup. This can be performed by placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details.
|
||||
|
||||
|
||||
## Cache tuning
|
||||
|
||||
VictoriaMetrics uses various in-memory caches for faster data ingestion and query performance.
|
||||
The following metrics for each type of cache are exported at [`/metrics` page](#monitoring):
|
||||
- `vm_cache_size_bytes` - the actual cache size
|
||||
- `vm_cache_size_max_bytes` - cache size limit
|
||||
- `vm_cache_requests_total` - the number of requests to the cache
|
||||
- `vm_cache_misses_total` - the number of cache misses
|
||||
- `vm_cache_entries` - the number of entries in the cache
|
||||
|
||||
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
|
||||
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176)
|
||||
contain `Caches` section with cache metrics visualized. The panels show the current
|
||||
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
|
||||
then cache efficiency is already very high and does not need any tuning.
|
||||
The panel `Cache usage %` in `Troubleshooting` section shows the percentage of used cache size
|
||||
from the allowed size by type. If the percentage is below 100%, then no further tuning needed.
|
||||
|
||||
Please note, default cache sizes were carefully adjusted accordingly to the most
|
||||
practical scenarios and workloads. Change the defaults only if you understand the implications.
|
||||
|
||||
To override the default values see command-line flags with `-storage.cacheSize` prefix.
|
||||
See the full description of flags [here](#list-of-command-line-flags).
|
||||
|
||||
|
||||
## Data migration
|
||||
|
||||
Use [vmctl](https://docs.victoriametrics.com/vmctl.html) for data migration. It supports the following data migration types:
|
||||
|
@ -1902,6 +1927,15 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
authKey, which must be passed in query string to /snapshot* pages
|
||||
-sortLabels
|
||||
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
|
||||
-storage.cacheSizeIndexDBDataBlocks size
|
||||
Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.cacheSizeIndexDBIndexBlocks size
|
||||
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.cacheSizeStorageTSID size
|
||||
Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
|
||||
-storage.maxDailySeries int
|
||||
The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries
|
||||
-storage.maxHourlySeries int
|
||||
|
|
|
@ -15,9 +15,16 @@ import (
|
|||
var idxbCache = blockcache.NewCache(getMaxIndexBlocksCacheSize)
|
||||
var ibCache = blockcache.NewCache(getMaxInmemoryBlocksCacheSize)
|
||||
|
||||
// SetIndexBlocksCacheSize overrides the default size of indexdb/indexBlock cache
|
||||
func SetIndexBlocksCacheSize(size int) {
|
||||
maxIndexBlockCacheSize = size
|
||||
}
|
||||
|
||||
func getMaxIndexBlocksCacheSize() int {
|
||||
maxIndexBlockCacheSizeOnce.Do(func() {
|
||||
maxIndexBlockCacheSize = int(0.15 * float64(memory.Allowed()))
|
||||
if maxIndexBlockCacheSize <= 0 {
|
||||
maxIndexBlockCacheSize = int(0.10 * float64(memory.Allowed()))
|
||||
}
|
||||
})
|
||||
return maxIndexBlockCacheSize
|
||||
}
|
||||
|
@ -27,9 +34,16 @@ var (
|
|||
maxIndexBlockCacheSizeOnce sync.Once
|
||||
)
|
||||
|
||||
// SetDataBlocksCacheSize overrides the default size of indexdb/dataBlocks cache
|
||||
func SetDataBlocksCacheSize(size int) {
|
||||
maxInmemoryBlockCacheSize = size
|
||||
}
|
||||
|
||||
func getMaxInmemoryBlocksCacheSize() int {
|
||||
maxInmemoryBlockCacheSizeOnce.Do(func() {
|
||||
maxInmemoryBlockCacheSize = int(0.4 * float64(memory.Allowed()))
|
||||
if maxInmemoryBlockCacheSize <= 0 {
|
||||
maxIndexBlockCacheSize = int(0.25 * float64(memory.Allowed()))
|
||||
}
|
||||
})
|
||||
return maxInmemoryBlockCacheSize
|
||||
}
|
||||
|
|
|
@ -213,7 +213,7 @@ func OpenStorage(path string, retentionMsecs int64, maxHourlySeries, maxDailySer
|
|||
|
||||
// Load caches.
|
||||
mem := memory.Allowed()
|
||||
s.tsidCache = s.mustLoadCache("MetricName->TSID", "metricName_tsid", int(float64(mem)*0.35))
|
||||
s.tsidCache = s.mustLoadCache("MetricName->TSID", "metricName_tsid", getTSIDCacheSize())
|
||||
s.metricIDCache = s.mustLoadCache("MetricID->TSID", "metricID_tsid", mem/16)
|
||||
s.metricNameCache = s.mustLoadCache("MetricID->MetricName", "metricID_metricName", mem/10)
|
||||
s.dateMetricIDCache = newDateMetricIDCache()
|
||||
|
@ -286,6 +286,20 @@ func (s *Storage) RetentionMsecs() int64 {
|
|||
return s.retentionMsecs
|
||||
}
|
||||
|
||||
var maxTSIDCacheSize int
|
||||
|
||||
// SetTSIDCacheSize overrides the default size of storage/tsid cahce
|
||||
func SetTSIDCacheSize(size int) {
|
||||
maxTSIDCacheSize = size
|
||||
}
|
||||
|
||||
func getTSIDCacheSize() int {
|
||||
if maxTSIDCacheSize <= 0 {
|
||||
return int(float64(memory.Allowed()) * 0.37)
|
||||
}
|
||||
return maxTSIDCacheSize
|
||||
}
|
||||
|
||||
func (s *Storage) getDeletedMetricIDs() *uint64set.Set {
|
||||
return s.deletedMetricIDs.Load().(*uint64set.Set)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue