docker-compose: move TooManyLogs into vm-health alerts set (#3199)

This commit is contained in:
Zakhar Bessarab 2022-10-05 20:23:36 +03:00 committed by GitHub
parent e0ea76db62
commit 6711eec109
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 24 deletions

View file

@ -89,17 +89,6 @@ groups:
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
When vmstorage constantly hits the limit it means that storage is overloaded and requires more CPU."
- alert: TooManyLogs
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
for: 15m
labels:
severity: warning
annotations:
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=104&var-instance={{ $labels.instance }}"
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
Worth to check logs for specific error messages."
- alert: RowsRejectedOnIngestion
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
for: 15m

View file

@ -51,4 +51,14 @@ groups:
annotations:
summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m"
description: "Too high CPU usage may be a sign of insufficient resources and make process unstable.
Consider to either increase available CPU resources or decrease the load on the process."
Consider to either increase available CPU resources or decrease the load on the process."
- alert: TooManyLogs
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
for: 15m
labels:
severity: warning
annotations:
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
Worth to check logs for specific error messages."

View file

@ -71,17 +71,6 @@ groups:
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
When VictoriaMetrics constantly hits the limit it means that storage is overloaded and requires more CPU."
- alert: TooManyLogs
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
for: 15m
labels:
severity: warning
annotations:
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=67&var-instance={{ $labels.instance }}"
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
Worth to check logs for specific error messages."
- alert: RowsRejectedOnIngestion
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
for: 15m
@ -154,4 +143,4 @@ groups:
summary: "Metrics ingested in ({{ $labels.instance }}) are exceeding labels limit"
description: "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n
This prevents from ingesting metrics with too many labels. Please verify that `-maxLabelsPerTimeseries` is configured
correctly or that clients which send these metrics aren't misbehaving."
correctly or that clients which send these metrics aren't misbehaving."