diff --git a/deployment/docker/alerts-cluster.yml b/deployment/docker/alerts-cluster.yml index 5f2587cf21..c4918c2a10 100644 --- a/deployment/docker/alerts-cluster.yml +++ b/deployment/docker/alerts-cluster.yml @@ -89,17 +89,6 @@ groups: description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n When vmstorage constantly hits the limit it means that storage is overloaded and requires more CPU." - - alert: TooManyLogs - expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0 - for: 15m - labels: - severity: warning - annotations: - dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=104&var-instance={{ $labels.instance }}" - summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})" - description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n - Worth to check logs for specific error messages." - - alert: RowsRejectedOnIngestion expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0 for: 15m diff --git a/deployment/docker/alerts-health.yml b/deployment/docker/alerts-health.yml index 489a7035fe..4668718e64 100644 --- a/deployment/docker/alerts-health.yml +++ b/deployment/docker/alerts-health.yml @@ -51,4 +51,14 @@ groups: annotations: summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m" description: "Too high CPU usage may be a sign of insufficient resources and make process unstable. - Consider to either increase available CPU resources or decrease the load on the process." \ No newline at end of file + Consider to either increase available CPU resources or decrease the load on the process." + + - alert: TooManyLogs + expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0 + for: 15m + labels: + severity: warning + annotations: + summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})" + description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n + Worth to check logs for specific error messages." diff --git a/deployment/docker/alerts.yml b/deployment/docker/alerts.yml index 7cf31ff8ae..bdcf625ec1 100644 --- a/deployment/docker/alerts.yml +++ b/deployment/docker/alerts.yml @@ -71,17 +71,6 @@ groups: description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n When VictoriaMetrics constantly hits the limit it means that storage is overloaded and requires more CPU." - - alert: TooManyLogs - expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0 - for: 15m - labels: - severity: warning - annotations: - dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=67&var-instance={{ $labels.instance }}" - summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})" - description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n - Worth to check logs for specific error messages." - - alert: RowsRejectedOnIngestion expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0 for: 15m @@ -154,4 +143,4 @@ groups: summary: "Metrics ingested in ({{ $labels.instance }}) are exceeding labels limit" description: "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n This prevents from ingesting metrics with too many labels. Please verify that `-maxLabelsPerTimeseries` is configured - correctly or that clients which send these metrics aren't misbehaving." \ No newline at end of file + correctly or that clients which send these metrics aren't misbehaving."