From 3538869942f816f0e64ee0d508f66bbb7e31280c Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Thu, 17 Oct 2024 12:33:06 +0300 Subject: [PATCH] vlogs: added basic alerts (#7252) ### Describe Your Changes Added basic VLogs alerts Signed-off-by: hagen1778 Co-authored-by: hagen1778 --- deployment/docker/README.md | 2 + deployment/docker/alerts-vlogs.yml | 43 +++++++++++++++++++ .../docker/docker-compose-victorialogs.yml | 42 ++++++++++++++++++ deployment/docker/prometheus-victorialogs.yml | 3 ++ docs/VictoriaLogs/CHANGELOG.md | 2 + docs/VictoriaLogs/README.md | 3 ++ 6 files changed, 95 insertions(+) create mode 100644 deployment/docker/alerts-vlogs.yml diff --git a/deployment/docker/README.md b/deployment/docker/README.md index ef9318f98..cd012c09a 100644 --- a/deployment/docker/README.md +++ b/deployment/docker/README.md @@ -165,6 +165,8 @@ The list of alerting rules is the following: alerting rules related to [vmalert](https://docs.victoriametrics.com/vmalert/) component; * [alerts-vmauth.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmauth.yml): alerting rules related to [vmauth](https://docs.victoriametrics.com/vmauth/) component; +* [alerts-vlogs.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml): + alerting rules related to [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/); Please, also see [how to monitor](https://docs.victoriametrics.com/single-server-victoriametrics/#monitoring) VictoriaMetrics installations. diff --git a/deployment/docker/alerts-vlogs.yml b/deployment/docker/alerts-vlogs.yml new file mode 100644 index 000000000..44c8032c7 --- /dev/null +++ b/deployment/docker/alerts-vlogs.yml @@ -0,0 +1,43 @@ +# File contains default list of alerts for VictoriaLogs single server. +# The alerts below are just recommendations and may require some updates +# and threshold calibration according to every specific setup. +groups: + - name: vlogs + interval: 30s + concurrency: 2 + rules: + - alert: DiskRunsOutOfSpace + expr: | + sum(vl_data_size_bytes) by(job, instance) / + ( + sum(vl_free_disk_space_bytes) by(job, instance) + + sum(vl_data_size_bytes) by(job, instance) + ) > 0.8 + for: 30m + labels: + severity: critical + annotations: + summary: "Instance {{ $labels.instance }} (job={{ $labels.job }}) will run out of disk space soon" + description: "Disk utilisation on instance {{ $labels.instance }} is more than 80%.\n + Having less than 20% of free disk space could cripple merge processes and overall performance. + Consider to limit the ingestion rate, decrease retention or scale the disk space if possible." + + - alert: RequestErrorsToAPI + expr: increase(vl_http_errors_total[5m]) > 0 + for: 15m + labels: + severity: warning + annotations: + summary: "Too many errors served for path {{ $labels.path }} (instance {{ $labels.instance }})" + description: "Requests to path {{ $labels.path }} are receiving errors. + Please verify if clients are sending correct requests." + + - alert: RowsRejectedOnIngestion + expr: rate(vl_rows_dropped_total[5m]) > 0 + for: 15m + labels: + severity: warning + annotations: + summary: "Some rows are rejected on \"{{ $labels.instance }}\" on ingestion attempt" + description: "VictoriaLogs is rejecting to ingest rows on \"{{ $labels.instance }}\" due to the + following reason: \"{{ $labels.reason }}\"" diff --git a/deployment/docker/docker-compose-victorialogs.yml b/deployment/docker/docker-compose-victorialogs.yml index 591cd7f11..9f3192f0f 100644 --- a/deployment/docker/docker-compose-victorialogs.yml +++ b/deployment/docker/docker-compose-victorialogs.yml @@ -69,6 +69,48 @@ services: - vm_net restart: always + # vmalert executes alerting and recording rules + vmalert: + container_name: vmalert + image: victoriametrics/vmalert:v1.104.0 + depends_on: + - "victoriametrics" + - "alertmanager" + ports: + - 8880:8880 + volumes: + - ./alerts.yml:/etc/alerts/alerts.yml + - ./alerts-health.yml:/etc/alerts/alerts-health.yml + - ./alerts-vlogs.yml:/etc/alerts/alerts-vlogs.yml + - ./alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml + command: + - "--datasource.url=http://victoriametrics:8428/" + - "--remoteRead.url=http://victoriametrics:8428/" + - "--remoteWrite.url=http://victoriametrics:8428/" + - "--notifier.url=http://alertmanager:9093/" + - "--rule=/etc/alerts/*.yml" + # display source of alerts in grafana + - "--external.url=http://127.0.0.1:3000" #grafana outside container + - '--external.alert.source=explore?orgId=1&left={"datasource":"VictoriaMetrics","queries":[{"expr":{{.Expr|jsonEscape|queryEscape}},"refId":"A"}],"range":{"from":"{{ .ActiveAt.UnixMilli }}","to":"now"}}' + networks: + - vm_net + restart: always + + # alertmanager receives alerting notifications from vmalert + # and distributes them according to --config.file. + alertmanager: + container_name: alertmanager + image: prom/alertmanager:v0.27.0 + volumes: + - ./alertmanager.yml:/config/alertmanager.yml + command: + - "--config.file=/config/alertmanager.yml" + ports: + - 9093:9093 + networks: + - vm_net + restart: always + volumes: vmdata: {} vldata: {} diff --git a/deployment/docker/prometheus-victorialogs.yml b/deployment/docker/prometheus-victorialogs.yml index 0bbcafe8d..7a20550d5 100644 --- a/deployment/docker/prometheus-victorialogs.yml +++ b/deployment/docker/prometheus-victorialogs.yml @@ -5,6 +5,9 @@ scrape_configs: - job_name: 'victoriametrics' static_configs: - targets: ['victoriametrics:8428'] + - job_name: 'vmalert' + static_configs: + - targets: [ 'vmalert:8880' ] - job_name: 'victorialogs' static_configs: - targets: ['victorialogs:9428'] diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 8d65d811a..a16a6056b 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -15,6 +15,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring). + ## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs) Released at 2024-10-16 diff --git a/docs/VictoriaLogs/README.md b/docs/VictoriaLogs/README.md index e2bc665e8..79975ceb4 100644 --- a/docs/VictoriaLogs/README.md +++ b/docs/VictoriaLogs/README.md @@ -38,6 +38,9 @@ It is recommended to set up monitoring of these metrics via VictoriaMetrics (see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter)), vmagent (see [these docs](https://docs.victoriametrics.com/vmagent/#how-to-collect-metrics-in-prometheus-format)) or via Prometheus. +We recommend setting up [alerts](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) +via [vmalert](https://docs.victoriametrics.com/vmalert/) or via Prometheus. + VictoriaLogs emits its own logs to stdout. It is recommended to investigate these logs during troubleshooting. ## Upgrading