mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
docker-compose: move TooManyLogs
into vm-health
alerts set (#3199)
This commit is contained in:
parent
e0ea76db62
commit
6711eec109
3 changed files with 12 additions and 24 deletions
|
@ -89,17 +89,6 @@ groups:
|
|||
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
|
||||
When vmstorage constantly hits the limit it means that storage is overloaded and requires more CPU."
|
||||
|
||||
- alert: TooManyLogs
|
||||
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=104&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
|
||||
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
|
||||
Worth to check logs for specific error messages."
|
||||
|
||||
- alert: RowsRejectedOnIngestion
|
||||
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
|
||||
for: 15m
|
||||
|
|
|
@ -51,4 +51,14 @@ groups:
|
|||
annotations:
|
||||
summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m"
|
||||
description: "Too high CPU usage may be a sign of insufficient resources and make process unstable.
|
||||
Consider to either increase available CPU resources or decrease the load on the process."
|
||||
Consider to either increase available CPU resources or decrease the load on the process."
|
||||
|
||||
- alert: TooManyLogs
|
||||
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
|
||||
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
|
||||
Worth to check logs for specific error messages."
|
||||
|
|
|
@ -71,17 +71,6 @@ groups:
|
|||
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
|
||||
When VictoriaMetrics constantly hits the limit it means that storage is overloaded and requires more CPU."
|
||||
|
||||
- alert: TooManyLogs
|
||||
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=67&var-instance={{ $labels.instance }}"
|
||||
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
|
||||
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
|
||||
Worth to check logs for specific error messages."
|
||||
|
||||
- alert: RowsRejectedOnIngestion
|
||||
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
|
||||
for: 15m
|
||||
|
@ -154,4 +143,4 @@ groups:
|
|||
summary: "Metrics ingested in ({{ $labels.instance }}) are exceeding labels limit"
|
||||
description: "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n
|
||||
This prevents from ingesting metrics with too many labels. Please verify that `-maxLabelsPerTimeseries` is configured
|
||||
correctly or that clients which send these metrics aren't misbehaving."
|
||||
correctly or that clients which send these metrics aren't misbehaving."
|
||||
|
|
Loading…
Reference in a new issue