docker-compose: move TooManyLogs into vm-health alerts set (#3199)

This commit is contained in:
Zakhar Bessarab 2022-10-05 20:23:36 +03:00 committed by Aliaksandr Valialkin
parent 8469670fb2
commit 434b00cee8
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
3 changed files with 12 additions and 24 deletions

View file

@ -89,17 +89,6 @@ groups:
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
When vmstorage constantly hits the limit it means that storage is overloaded and requires more CPU."
- alert: TooManyLogs
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
for: 15m
labels:
severity: warning
annotations:
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=104&var-instance={{ $labels.instance }}"
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
Worth to check logs for specific error messages."
- alert: RowsRejectedOnIngestion
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
for: 15m

View file

@ -52,3 +52,13 @@ groups:
summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m"
description: "Too high CPU usage may be a sign of insufficient resources and make process unstable.
Consider to either increase available CPU resources or decrease the load on the process."
- alert: TooManyLogs
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
for: 15m
labels:
severity: warning
annotations:
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
Worth to check logs for specific error messages."

View file

@ -71,17 +71,6 @@ groups:
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
When VictoriaMetrics constantly hits the limit it means that storage is overloaded and requires more CPU."
- alert: TooManyLogs
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
for: 15m
labels:
severity: warning
annotations:
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=67&var-instance={{ $labels.instance }}"
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
Worth to check logs for specific error messages."
- alert: RowsRejectedOnIngestion
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
for: 15m