docker-compose: move TooManyLogs into vm-health alerts set (#3199)

2025-03-11 15:34:56 +00:00 · 2022-10-05 20:23:36 +03:00 · 2022-10-05 20:23:36 +03:00 · 434b00cee8
commit 434b00cee8
parent 8469670fb2
3 changed files with 12 additions and 24 deletions
--- a/deployment/docker/alerts-cluster.yml
+++ b/deployment/docker/alerts-cluster.yml
@ -89,17 +89,6 @@ groups:
          description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
            When vmstorage constantly hits the limit it means that storage is overloaded and requires more CPU."

-      - alert: TooManyLogs
-        expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
-        for: 15m
-        labels:
-          severity: warning
-        annotations:
-          dashboard: "http://localhost:3000/d/oS7Bi_0Wz?viewPanel=104&var-instance={{ $labels.instance }}"
-          summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
-          description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
-           Worth to check logs for specific error messages."
-
      - alert: RowsRejectedOnIngestion
        expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
        for: 15m
--- a/deployment/docker/alerts-health.yml
+++ b/deployment/docker/alerts-health.yml
@ -52,3 +52,13 @@ groups:
          summary: "More than 90% of CPU is used by \"{{ $labels.job }}\"(\"{{ $labels.instance }}\") during the last 5m"
          description: "Too high CPU usage may be a sign of insufficient resources and make process unstable.
               Consider to either increase available CPU resources or decrease the load on the process."
+
+      - alert: TooManyLogs
+        expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
+        for: 15m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
+          description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
+         Worth to check logs for specific error messages."
--- a/deployment/docker/alerts.yml
+++ b/deployment/docker/alerts.yml
@ -71,17 +71,6 @@ groups:
          description: "The limit of concurrent flushes on instance {{ $labels.instance }} is equal to number of CPUs.\n
            When VictoriaMetrics constantly hits the limit it means that storage is overloaded and requires more CPU."

-      - alert: TooManyLogs
-        expr: sum(increase(vm_log_messages_total{level="error"}[5m])) by (job, instance) > 0
-        for: 15m
-        labels:
-          severity: warning
-        annotations:
-          dashboard: "http://localhost:3000/d/wNf0q_kZk?viewPanel=67&var-instance={{ $labels.instance }}"
-          summary: "Too many logs printed for job \"{{ $labels.job }}\" ({{ $labels.instance }})"
-          description: "Logging rate for job \"{{ $labels.job }}\" ({{ $labels.instance }}) is {{ $value }} for last 15m.\n
-           Worth to check logs for specific error messages."
-
      - alert: RowsRejectedOnIngestion
        expr: sum(rate(vm_rows_ignored_total[5m])) by (instance, reason) > 0
        for: 15m