mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
vlogs: added basic alerts (#7252)
### Describe Your Changes Added basic VLogs alerts Signed-off-by: hagen1778 <roman@victoriametrics.com> Co-authored-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
952fce152a
commit
7b49d4f5dc
6 changed files with 95 additions and 0 deletions
|
@ -165,6 +165,8 @@ The list of alerting rules is the following:
|
||||||
alerting rules related to [vmalert](https://docs.victoriametrics.com/vmalert/) component;
|
alerting rules related to [vmalert](https://docs.victoriametrics.com/vmalert/) component;
|
||||||
* [alerts-vmauth.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmauth.yml):
|
* [alerts-vmauth.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmauth.yml):
|
||||||
alerting rules related to [vmauth](https://docs.victoriametrics.com/vmauth/) component;
|
alerting rules related to [vmauth](https://docs.victoriametrics.com/vmauth/) component;
|
||||||
|
* [alerts-vlogs.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml):
|
||||||
|
alerting rules related to [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/);
|
||||||
|
|
||||||
Please, also see [how to monitor](https://docs.victoriametrics.com/single-server-victoriametrics/#monitoring)
|
Please, also see [how to monitor](https://docs.victoriametrics.com/single-server-victoriametrics/#monitoring)
|
||||||
VictoriaMetrics installations.
|
VictoriaMetrics installations.
|
||||||
|
|
43
deployment/docker/alerts-vlogs.yml
Normal file
43
deployment/docker/alerts-vlogs.yml
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
# File contains default list of alerts for VictoriaLogs single server.
|
||||||
|
# The alerts below are just recommendations and may require some updates
|
||||||
|
# and threshold calibration according to every specific setup.
|
||||||
|
groups:
|
||||||
|
- name: vlogs
|
||||||
|
interval: 30s
|
||||||
|
concurrency: 2
|
||||||
|
rules:
|
||||||
|
- alert: DiskRunsOutOfSpace
|
||||||
|
expr: |
|
||||||
|
sum(vl_data_size_bytes) by(job, instance) /
|
||||||
|
(
|
||||||
|
sum(vl_free_disk_space_bytes) by(job, instance) +
|
||||||
|
sum(vl_data_size_bytes) by(job, instance)
|
||||||
|
) > 0.8
|
||||||
|
for: 30m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} (job={{ $labels.job }}) will run out of disk space soon"
|
||||||
|
description: "Disk utilisation on instance {{ $labels.instance }} is more than 80%.\n
|
||||||
|
Having less than 20% of free disk space could cripple merge processes and overall performance.
|
||||||
|
Consider to limit the ingestion rate, decrease retention or scale the disk space if possible."
|
||||||
|
|
||||||
|
- alert: RequestErrorsToAPI
|
||||||
|
expr: increase(vl_http_errors_total[5m]) > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Too many errors served for path {{ $labels.path }} (instance {{ $labels.instance }})"
|
||||||
|
description: "Requests to path {{ $labels.path }} are receiving errors.
|
||||||
|
Please verify if clients are sending correct requests."
|
||||||
|
|
||||||
|
- alert: RowsRejectedOnIngestion
|
||||||
|
expr: rate(vl_rows_dropped_total[5m]) > 0
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Some rows are rejected on \"{{ $labels.instance }}\" on ingestion attempt"
|
||||||
|
description: "VictoriaLogs is rejecting to ingest rows on \"{{ $labels.instance }}\" due to the
|
||||||
|
following reason: \"{{ $labels.reason }}\""
|
|
@ -69,6 +69,48 @@ services:
|
||||||
- vm_net
|
- vm_net
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
|
# vmalert executes alerting and recording rules
|
||||||
|
vmalert:
|
||||||
|
container_name: vmalert
|
||||||
|
image: victoriametrics/vmalert:v1.104.0
|
||||||
|
depends_on:
|
||||||
|
- "victoriametrics"
|
||||||
|
- "alertmanager"
|
||||||
|
ports:
|
||||||
|
- 8880:8880
|
||||||
|
volumes:
|
||||||
|
- ./alerts.yml:/etc/alerts/alerts.yml
|
||||||
|
- ./alerts-health.yml:/etc/alerts/alerts-health.yml
|
||||||
|
- ./alerts-vlogs.yml:/etc/alerts/alerts-vlogs.yml
|
||||||
|
- ./alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
|
||||||
|
command:
|
||||||
|
- "--datasource.url=http://victoriametrics:8428/"
|
||||||
|
- "--remoteRead.url=http://victoriametrics:8428/"
|
||||||
|
- "--remoteWrite.url=http://victoriametrics:8428/"
|
||||||
|
- "--notifier.url=http://alertmanager:9093/"
|
||||||
|
- "--rule=/etc/alerts/*.yml"
|
||||||
|
# display source of alerts in grafana
|
||||||
|
- "--external.url=http://127.0.0.1:3000" #grafana outside container
|
||||||
|
- '--external.alert.source=explore?orgId=1&left={"datasource":"VictoriaMetrics","queries":[{"expr":{{.Expr|jsonEscape|queryEscape}},"refId":"A"}],"range":{"from":"{{ .ActiveAt.UnixMilli }}","to":"now"}}'
|
||||||
|
networks:
|
||||||
|
- vm_net
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
# alertmanager receives alerting notifications from vmalert
|
||||||
|
# and distributes them according to --config.file.
|
||||||
|
alertmanager:
|
||||||
|
container_name: alertmanager
|
||||||
|
image: prom/alertmanager:v0.27.0
|
||||||
|
volumes:
|
||||||
|
- ./alertmanager.yml:/config/alertmanager.yml
|
||||||
|
command:
|
||||||
|
- "--config.file=/config/alertmanager.yml"
|
||||||
|
ports:
|
||||||
|
- 9093:9093
|
||||||
|
networks:
|
||||||
|
- vm_net
|
||||||
|
restart: always
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
vmdata: {}
|
vmdata: {}
|
||||||
vldata: {}
|
vldata: {}
|
||||||
|
|
|
@ -5,6 +5,9 @@ scrape_configs:
|
||||||
- job_name: 'victoriametrics'
|
- job_name: 'victoriametrics'
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['victoriametrics:8428']
|
- targets: ['victoriametrics:8428']
|
||||||
|
- job_name: 'vmalert'
|
||||||
|
static_configs:
|
||||||
|
- targets: [ 'vmalert:8880' ]
|
||||||
- job_name: 'victorialogs'
|
- job_name: 'victorialogs'
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['victorialogs:9428']
|
- targets: ['victorialogs:9428']
|
||||||
|
|
|
@ -15,6 +15,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
||||||
|
|
||||||
## tip
|
## tip
|
||||||
|
|
||||||
|
* FEATURE: add basic [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml) for VictoriaLogs process. See details at [monitoring docs](https://docs.victoriametrics.com/victorialogs/index.html#monitoring).
|
||||||
|
|
||||||
## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs)
|
## [v0.36.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.36.0-victorialogs)
|
||||||
|
|
||||||
Released at 2024-10-16
|
Released at 2024-10-16
|
||||||
|
|
|
@ -38,6 +38,9 @@ It is recommended to set up monitoring of these metrics via VictoriaMetrics
|
||||||
(see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter)),
|
(see [these docs](https://docs.victoriametrics.com/#how-to-scrape-prometheus-exporters-such-as-node-exporter)),
|
||||||
vmagent (see [these docs](https://docs.victoriametrics.com/vmagent/#how-to-collect-metrics-in-prometheus-format)) or via Prometheus.
|
vmagent (see [these docs](https://docs.victoriametrics.com/vmagent/#how-to-collect-metrics-in-prometheus-format)) or via Prometheus.
|
||||||
|
|
||||||
|
We recommend setting up [alerts](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vlogs.yml)
|
||||||
|
via [vmalert](https://docs.victoriametrics.com/vmalert/) or via Prometheus.
|
||||||
|
|
||||||
VictoriaLogs emits its own logs to stdout. It is recommended to investigate these logs during troubleshooting.
|
VictoriaLogs emits its own logs to stdout. It is recommended to investigate these logs during troubleshooting.
|
||||||
|
|
||||||
## Upgrading
|
## Upgrading
|
||||||
|
|
Loading…
Reference in a new issue