mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
app/vmstorage: add vm_slow_metric_name_loads_total
metric, which could be used as an indicator when more RAM is needed for improving query performance
This commit is contained in:
parent
82ccdfaa91
commit
82ffbcb9a6
4 changed files with 21 additions and 8 deletions
11
README.md
11
README.md
|
@ -911,8 +911,11 @@ The most interesting metrics are:
|
||||||
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
||||||
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
||||||
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
|
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
|
||||||
If this value remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||||
for the current number of active time series.
|
of the current number of active time series.
|
||||||
|
* `increase(vm_slow_metric_name_loads_total[5m])` - the number of slow loads of metric names during the last 5 minutes.
|
||||||
|
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||||
|
of the current number of active time series.
|
||||||
|
|
||||||
|
|
||||||
### Troubleshooting
|
### Troubleshooting
|
||||||
|
@ -925,9 +928,9 @@ The most interesting metrics are:
|
||||||
|
|
||||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||||
then it is likely you have too many active time series for the current amount of RAM.
|
then it is likely you have too many active time series for the current amount of RAM.
|
||||||
See `vm_slow_row_inserts_total` and `vm_slow_per_day_index_inserts_total` [metrics](#monitoring).
|
VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics, which could be used as an indicator of low amounts of RAM.
|
||||||
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
||||||
ingestion performance in this case.
|
ingestion and query performance in this case.
|
||||||
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
||||||
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
||||||
|
|
||||||
|
|
|
@ -415,6 +415,9 @@ func registerStorageMetrics() {
|
||||||
metrics.NewGauge(`vm_slow_per_day_index_inserts_total`, func() float64 {
|
metrics.NewGauge(`vm_slow_per_day_index_inserts_total`, func() float64 {
|
||||||
return float64(m().SlowPerDayIndexInserts)
|
return float64(m().SlowPerDayIndexInserts)
|
||||||
})
|
})
|
||||||
|
metrics.NewGauge(`vm_slow_metric_name_loads_total`, func() float64 {
|
||||||
|
return float64(m().SlowMetricNameLoads)
|
||||||
|
})
|
||||||
|
|
||||||
metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
|
metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
|
||||||
return float64(tm().BigRowsCount)
|
return float64(tm().BigRowsCount)
|
||||||
|
|
|
@ -911,8 +911,11 @@ The most interesting metrics are:
|
||||||
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
||||||
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
||||||
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
|
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
|
||||||
If this value remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||||
for the current number of active time series.
|
of the current number of active time series.
|
||||||
|
* `increase(vm_slow_metric_name_loads_total[5m])` - the number of slow loads of metric names during the last 5 minutes.
|
||||||
|
If this number remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||||
|
of the current number of active time series.
|
||||||
|
|
||||||
|
|
||||||
### Troubleshooting
|
### Troubleshooting
|
||||||
|
@ -925,9 +928,9 @@ The most interesting metrics are:
|
||||||
|
|
||||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||||
then it is likely you have too many active time series for the current amount of RAM.
|
then it is likely you have too many active time series for the current amount of RAM.
|
||||||
See `vm_slow_row_inserts_total` and `vm_slow_per_day_index_inserts_total` [metrics](#monitoring).
|
VictoriaMetrics [exposes](#monitoring) `vm_slow_*` metrics, which could be used as an indicator of low amounts of RAM.
|
||||||
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
||||||
ingestion performance in this case.
|
ingestion and query performance in this case.
|
||||||
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
||||||
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,7 @@ type Storage struct {
|
||||||
|
|
||||||
slowRowInserts uint64
|
slowRowInserts uint64
|
||||||
slowPerDayIndexInserts uint64
|
slowPerDayIndexInserts uint64
|
||||||
|
slowMetricNameLoads uint64
|
||||||
|
|
||||||
path string
|
path string
|
||||||
cachePath string
|
cachePath string
|
||||||
|
@ -328,6 +329,7 @@ type Metrics struct {
|
||||||
|
|
||||||
SlowRowInserts uint64
|
SlowRowInserts uint64
|
||||||
SlowPerDayIndexInserts uint64
|
SlowPerDayIndexInserts uint64
|
||||||
|
SlowMetricNameLoads uint64
|
||||||
|
|
||||||
TSIDCacheSize uint64
|
TSIDCacheSize uint64
|
||||||
TSIDCacheSizeBytes uint64
|
TSIDCacheSizeBytes uint64
|
||||||
|
@ -385,6 +387,7 @@ func (s *Storage) UpdateMetrics(m *Metrics) {
|
||||||
|
|
||||||
m.SlowRowInserts += atomic.LoadUint64(&s.slowRowInserts)
|
m.SlowRowInserts += atomic.LoadUint64(&s.slowRowInserts)
|
||||||
m.SlowPerDayIndexInserts += atomic.LoadUint64(&s.slowPerDayIndexInserts)
|
m.SlowPerDayIndexInserts += atomic.LoadUint64(&s.slowPerDayIndexInserts)
|
||||||
|
m.SlowMetricNameLoads += atomic.LoadUint64(&s.slowMetricNameLoads)
|
||||||
|
|
||||||
var cs fastcache.Stats
|
var cs fastcache.Stats
|
||||||
s.tsidCache.UpdateStats(&cs)
|
s.tsidCache.UpdateStats(&cs)
|
||||||
|
@ -814,6 +817,7 @@ func (s *Storage) prefetchMetricNames(tsids []TSID) error {
|
||||||
}
|
}
|
||||||
metricIDs = append(metricIDs, metricID)
|
metricIDs = append(metricIDs, metricID)
|
||||||
}
|
}
|
||||||
|
atomic.AddUint64(&s.slowMetricNameLoads, uint64(len(metricIDs)))
|
||||||
if len(metricIDs) < 500 {
|
if len(metricIDs) < 500 {
|
||||||
// It is cheaper to skip pre-fetching and obtain metricNames inline.
|
// It is cheaper to skip pre-fetching and obtain metricNames inline.
|
||||||
return nil
|
return nil
|
||||||
|
|
Loading…
Reference in a new issue