From 91533531f56d32206b711a15ebaadddf310ddd34 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 20 Mar 2023 13:28:33 -0700 Subject: [PATCH] docs/Troubleshooting.md: document an additional case, which could result in slow inserts If `-cacheExpireDuration` is lower than the interval between ingested samples for the same time series, then vm_slow_row_inserts_total` metric is increased. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183 --- dashboards/victoriametrics-cluster.json | 2 +- dashboards/victoriametrics.json | 2 +- deployment/docker/alerts-cluster.yml | 3 ++- deployment/docker/alerts.yml | 3 ++- docs/Troubleshooting.md | 5 +++++ 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dashboards/victoriametrics-cluster.json b/dashboards/victoriametrics-cluster.json index 2cd8e7225..5bb50de31 100644 --- a/dashboards/victoriametrics-cluster.json +++ b/dashboards/victoriametrics-cluster.json @@ -2746,7 +2746,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of active time series. \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on may other factors such as the number of labels per time series and the length of label values.", + "description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of active time series. \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on many other factors such as the number of labels per time series and the length of label values. See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183", "fieldConfig": { "defaults": { "color": { diff --git a/dashboards/victoriametrics.json b/dashboards/victoriametrics.json index 01068ca4e..b64e344a7 100644 --- a/dashboards/victoriametrics.json +++ b/dashboards/victoriametrics.json @@ -2803,7 +2803,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of active time series. \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on may other factors such as the number of labels per time series and the length of label values.", + "description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of active time series. \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on many other factors such as the number of labels per time series and the length of label values. See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183", "fieldConfig": { "defaults": { "color": { diff --git a/deployment/docker/alerts-cluster.yml b/deployment/docker/alerts-cluster.yml index f1161a48b..72dcd75d0 100644 --- a/deployment/docker/alerts-cluster.yml +++ b/deployment/docker/alerts-cluster.yml @@ -152,7 +152,8 @@ groups: dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=108" summary: "Percentage of slow inserts is more than 5% for the last 15m" description: "High rate of slow inserts may be a sign of resource exhaustion - for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series." + for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series. + See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183" - alert: ProcessNearFDLimits expr: (process_max_fds - process_open_fds) < 100 diff --git a/deployment/docker/alerts.yml b/deployment/docker/alerts.yml index 954f037b8..49cb4317e 100644 --- a/deployment/docker/alerts.yml +++ b/deployment/docker/alerts.yml @@ -132,7 +132,8 @@ groups: dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=68&var-instance={{ $labels.instance }}" summary: "Percentage of slow inserts is more than 5% on \"{{ $labels.instance }}\" for the last 15m" description: "High rate of slow inserts on \"{{ $labels.instance }}\" may be a sign of resource exhaustion - for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series." + for the current load. It is likely more RAM is needed for optimal handling of the current number of active time series. + See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183" - alert: LabelsLimitExceededOnIngestion expr: sum(increase(vm_metrics_with_dropped_labels_total[5m])) by (instance) > 0 diff --git a/docs/Troubleshooting.md b/docs/Troubleshooting.md index b6030bf24..c7a369a97 100644 --- a/docs/Troubleshooting.md +++ b/docs/Troubleshooting.md @@ -186,6 +186,11 @@ There are the following most commons reasons for slow data ingestion in Victoria Issues like this are very hard to catch via [official Grafana dashboard for cluster version of VictoriaMetrics](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#monitoring) and proper diagnosis would require checking resource usage on the instances where VictoriaMetrics runs. +6. If you see `TooHighSlowInsertsRate` [alert](https://docs.victoriametrics.com/#monitoring) when single-node VictoriaMetrics or `vmstorage` has enough + free CPU and RAM, then increase `-cacheExpireDuration` command-line flag at single-node VictoriaMetrics or at `vmstorage` to the value, + which exceeds the interval between ingested samples for the same time series (aka `scrape_interval`). + See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for more details. + ## Slow queries Some queries may take more time and resources (CPU, RAM, network bandwidth) than others.