mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-11 15:34:56 +00:00
docker-compose: move TooManyLogs
into vm-health
alerts set (#3199)
This commit is contained in:
parent
8469670fb2
commit
434b00cee8
3 changed files with 12 additions and 24 deletions
|
@ -89,17 +89,6 @@ groups:
|
||||||
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
|
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
|
||||||
When vmstorage constantly hits the limit it means that storage is overloaded and requires more CPU."
|
When vmstorage constantly hits the limit it means that storage is overloaded and requires more CPU."
|
||||||
|
|
||||||
- alert: TooManyLogs
|
|
||||||
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=104&var-instance={{ $labels.instance }}"
|
|
||||||
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
|
|
||||||
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
|
|
||||||
Worth to check logs for specific error messages."
|
|
||||||
|
|
||||||
- alert: RowsRejectedOnIngestion
|
- alert: RowsRejectedOnIngestion
|
||||||
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
|
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
|
|
|
@ -51,4 +51,14 @@ groups:
|
||||||
annotations:
|
annotations:
|
||||||
summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m"
|
summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m"
|
||||||
description: "Too high CPU usage may be a sign of insufficient resources and make process unstable.
|
description: "Too high CPU usage may be a sign of insufficient resources and make process unstable.
|
||||||
Consider to either increase available CPU resources or decrease the load on the process."
|
Consider to either increase available CPU resources or decrease the load on the process."
|
||||||
|
|
||||||
|
- alert: TooManyLogs
|
||||||
|
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
|
||||||
|
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
|
||||||
|
Worth to check logs for specific error messages."
|
||||||
|
|
|
@ -71,17 +71,6 @@ groups:
|
||||||
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
|
description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
|
||||||
When VictoriaMetrics constantly hits the limit it means that storage is overloaded and requires more CPU."
|
When VictoriaMetrics constantly hits the limit it means that storage is overloaded and requires more CPU."
|
||||||
|
|
||||||
- alert: TooManyLogs
|
|
||||||
expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
|
|
||||||
for: 15m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=67&var-instance={{ $labels.instance }}"
|
|
||||||
summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
|
|
||||||
description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
|
|
||||||
Worth to check logs for specific error messages."
|
|
||||||
|
|
||||||
- alert: RowsRejectedOnIngestion
|
- alert: RowsRejectedOnIngestion
|
||||||
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
|
expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
|
@ -154,4 +143,4 @@ groups:
|
||||||
summary: "Metrics ingested in ({{ $labels.instance }}) are exceeding labels limit"
|
summary: "Metrics ingested in ({{ $labels.instance }}) are exceeding labels limit"
|
||||||
description: "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n
|
description: "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n
|
||||||
This prevents from ingesting metrics with too many labels. Please verify that `-maxLabelsPerTimeseries` is configured
|
This prevents from ingesting metrics with too many labels. Please verify that `-maxLabelsPerTimeseries` is configured
|
||||||
correctly or that clients which send these metrics aren't misbehaving."
|
correctly or that clients which send these metrics aren't misbehaving."
|
||||||
|
|
Loading…
Reference in a new issue