lib: allow to configure cache size by type (#2206)

* lib: allow to configure cache size by type

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1940
Signed-off-by: hagen1778 <roman@victoriametrics.com>

* Apply suggestions from code review

* wip

Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2022-02-21 13:50:34 +02:00 committed by Aliaksandr Valialkin
parent 4606d00d7d
commit bd7837d524
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
6 changed files with 113 additions and 3 deletions

View file

@ -16,6 +16,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
@ -48,6 +49,10 @@ var (
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries")
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data")
cacheSizeStorageTSID = flagutil.NewBytes("storage.cacheSizeStorageTSID", 0, "Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning")
cacheSizeIndexDBIndexBlocks = flagutil.NewBytes("storage.cacheSizeIndexDBIndexBlocks", 0, "Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning")
cacheSizeIndexDBDataBlocks = flagutil.NewBytes("storage.cacheSizeIndexDBDataBlocks", 0, "Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning")
)
func main() {
@ -64,6 +69,9 @@ func main() {
storage.SetBigMergeWorkersCount(*bigMergeConcurrency)
storage.SetSmallMergeWorkersCount(*smallMergeConcurrency)
storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N)
storage.SetTSIDCacheSize(cacheSizeStorageTSID.N)
mergeset.SetIndexBlocksCacheSize(cacheSizeIndexDBIndexBlocks.N)
mergeset.SetDataBlocksCacheSize(cacheSizeIndexDBDataBlocks.N)
logger.Infof("opening storage at %q with -retentionPeriod=%s", *storageDataPath, retentionPeriod)
startTime := time.Now()

View file

@ -14,9 +14,15 @@ The following tip changes can be tested by building VictoriaMetrics components f
## tip
* FEATURE: allow overriding default limits for the following in-memory caches, which usually occupy the most memory:
* `storage/tsid` - the cache speeds up lookups of internal metric ids by `metric_name{labels...}` during data ingestion. The size for this cache can be tuned with `-storage.cacheSizeStorageTSID` command-line flag.
* `indexdb/dataBlocks` - the cache speeds up data lookups in `<-storageDataPath>/indexdb` files. The size for this cache can be tuned with `-storage.cacheSizeIndexDBDataBlocks` command-line flag.
* `indexdb/indexBlocks` - the cache speeds up index lookups in `<-storageDataPath>/indexdb` files. The size for this cache can be tuned with `-storage.cacheSizeIndexDBIndexBlocks` command-line flag.
See also [cache tuning docs](https://docs.victoriametrics.com/#cache-tuning). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1940).
* FEATURE: add `-influxDBLabel` command-line flag for overriding `db` label name for the data [imported into VictoriaMetrics via InfluxDB line protocol](https://docs.victoriametrics.com/#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf). Thanks to @johnatannvmd for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2203).
* FEATURE: return `X-Influxdb-Version` HTTP header in responses to [InfluxDB write requests](https://docs.victoriametrics.com/#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf). This is needed for some InfluxDB clients. See, for example, [this comment](https://github.com/ntop/ntopng/issues/5449#issuecomment-1005347597).
* BUGFIX: reduce memory usage during the first three hours after the upgrade from versions older than v1.73.0. The memory usage spike was related to the need of in-memory caches' re-population after the upgrade because of the fix for [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401). Now cache size limits are reduced in order to occupy less memory during the upgrade.
* BUGFIX: fix a bug, which could significantly slow down requests to `/api/v1/labels` and `/api/v1/label/<label_name>/values`. These APIs are used by Grafana for auto-completion of label names and label values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2200).
* BUGFIX: vmalert: add support for `$externalLabels` and `$externalURL` template vars in the same way as Prometheus does. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2193).
* BUGFIX: update default value for `-promscrape.fileSDCheckInterval`, so it matches default duration used by Prometheus for checking for updates in `file_sd_configs`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2187). Thanks to @corporate-gadfly for the fix.

View file

@ -1428,6 +1428,31 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet
VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown (e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup. Sometimes it is needed to remove such caches on the next startup. This can be performed by placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details.
## Cache tuning
VictoriaMetrics uses various in-memory caches for faster data ingestion and query performance.
The following metrics for each type of cache are exported at [`/metrics` page](#monitoring):
- `vm_cache_size_bytes` - the actual cache size
- `vm_cache_size_max_bytes` - cache size limit
- `vm_cache_requests_total` - the number of requests to the cache
- `vm_cache_misses_total` - the number of cache misses
- `vm_cache_entries` - the number of entries in the cache
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176)
contain `Caches` section with cache metrics visualized. The panels show the current
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
then cache efficiency is already very high and does not need any tuning.
The panel `Cache usage %` in `Troubleshooting` section shows the percentage of used cache size
from the allowed size by type. If the percentage is below 100%, then no further tuning needed.
Please note, default cache sizes were carefully adjusted accordingly to the most
practical scenarios and workloads. Change the defaults only if you understand the implications.
To override the default values see command-line flags with `-storage.cacheSize` prefix.
See the full description of flags [here](#list-of-command-line-flags).
## Data migration
Use [vmctl](https://docs.victoriametrics.com/vmctl.html) for data migration. It supports the following data migration types:
@ -1898,6 +1923,15 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
authKey, which must be passed in query string to /snapshot* pages
-sortLabels
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
-storage.cacheSizeIndexDBDataBlocks size
Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
-storage.cacheSizeIndexDBIndexBlocks size
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
-storage.cacheSizeStorageTSID size
Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
-storage.maxDailySeries int
The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries
-storage.maxHourlySeries int

View file

@ -1432,6 +1432,31 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet
VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown (e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup. Sometimes it is needed to remove such caches on the next startup. This can be performed by placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details.
## Cache tuning
VictoriaMetrics uses various in-memory caches for faster data ingestion and query performance.
The following metrics for each type of cache are exported at [`/metrics` page](#monitoring):
- `vm_cache_size_bytes` - the actual cache size
- `vm_cache_size_max_bytes` - cache size limit
- `vm_cache_requests_total` - the number of requests to the cache
- `vm_cache_misses_total` - the number of cache misses
- `vm_cache_entries` - the number of entries in the cache
Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229)
and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176)
contain `Caches` section with cache metrics visualized. The panels show the current
memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100%
then cache efficiency is already very high and does not need any tuning.
The panel `Cache usage %` in `Troubleshooting` section shows the percentage of used cache size
from the allowed size by type. If the percentage is below 100%, then no further tuning needed.
Please note, default cache sizes were carefully adjusted accordingly to the most
practical scenarios and workloads. Change the defaults only if you understand the implications.
To override the default values see command-line flags with `-storage.cacheSize` prefix.
See the full description of flags [here](#list-of-command-line-flags).
## Data migration
Use [vmctl](https://docs.victoriametrics.com/vmctl.html) for data migration. It supports the following data migration types:
@ -1902,6 +1927,15 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
authKey, which must be passed in query string to /snapshot* pages
-sortLabels
Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit
-storage.cacheSizeIndexDBDataBlocks size
Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
-storage.cacheSizeIndexDBIndexBlocks size
Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
-storage.cacheSizeStorageTSID size
Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0)
-storage.maxDailySeries int
The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries
-storage.maxHourlySeries int

View file

@ -15,9 +15,16 @@ import (
var idxbCache = blockcache.NewCache(getMaxIndexBlocksCacheSize)
var ibCache = blockcache.NewCache(getMaxInmemoryBlocksCacheSize)
// SetIndexBlocksCacheSize overrides the default size of indexdb/indexBlock cache
func SetIndexBlocksCacheSize(size int) {
maxIndexBlockCacheSize = size
}
func getMaxIndexBlocksCacheSize() int {
maxIndexBlockCacheSizeOnce.Do(func() {
maxIndexBlockCacheSize = int(0.15 * float64(memory.Allowed()))
if maxIndexBlockCacheSize <= 0 {
maxIndexBlockCacheSize = int(0.10 * float64(memory.Allowed()))
}
})
return maxIndexBlockCacheSize
}
@ -27,9 +34,16 @@ var (
maxIndexBlockCacheSizeOnce sync.Once
)
// SetDataBlocksCacheSize overrides the default size of indexdb/dataBlocks cache
func SetDataBlocksCacheSize(size int) {
maxInmemoryBlockCacheSize = size
}
func getMaxInmemoryBlocksCacheSize() int {
maxInmemoryBlockCacheSizeOnce.Do(func() {
maxInmemoryBlockCacheSize = int(0.4 * float64(memory.Allowed()))
if maxInmemoryBlockCacheSize <= 0 {
maxIndexBlockCacheSize = int(0.25 * float64(memory.Allowed()))
}
})
return maxInmemoryBlockCacheSize
}

View file

@ -213,7 +213,7 @@ func OpenStorage(path string, retentionMsecs int64, maxHourlySeries, maxDailySer
// Load caches.
mem := memory.Allowed()
s.tsidCache = s.mustLoadCache("MetricName->TSID", "metricName_tsid", int(float64(mem)*0.35))
s.tsidCache = s.mustLoadCache("MetricName->TSID", "metricName_tsid", getTSIDCacheSize())
s.metricIDCache = s.mustLoadCache("MetricID->TSID", "metricID_tsid", mem/16)
s.metricNameCache = s.mustLoadCache("MetricID->MetricName", "metricID_metricName", mem/10)
s.dateMetricIDCache = newDateMetricIDCache()
@ -286,6 +286,20 @@ func (s *Storage) RetentionMsecs() int64 {
return s.retentionMsecs
}
var maxTSIDCacheSize int
// SetTSIDCacheSize overrides the default size of storage/tsid cahce
func SetTSIDCacheSize(size int) {
maxTSIDCacheSize = size
}
func getTSIDCacheSize() int {
if maxTSIDCacheSize <= 0 {
return int(float64(memory.Allowed()) * 0.37)
}
return maxTSIDCacheSize
}
func (s *Storage) getDeletedMetricIDs() *uint64set.Set {
return s.deletedMetricIDs.Load().(*uint64set.Set)
}