mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-31 15:06:26 +00:00
9616814728
address https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6706.
See
https://github.com/VictoriaMetrics/VictoriaMetrics/blob/vmalert-support-vlog-ds/docs/VictoriaLogs/vmalert.md.
Related fix
https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7254.
Note: in this pull request, vmalert doesn't support
[backfilling](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/vmalert-support-vlog-ds/docs/VictoriaLogs/vmalert.md#rules-backfilling)
for rules with a customized time filter. It might be added in the
future, see [this
issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7289)
for details.
Feature can be tested with image
`victoriametrics/vmalert:heads-vmalert-support-vlog-ds-0-g420629c-scratch`.
---------
Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
(cherry picked from commit 68bad22fd2
)
29 lines
1.4 KiB
Text
29 lines
1.4 KiB
Text
groups:
|
|
- name: RequestCount
|
|
type: vlogs
|
|
interval: 5m
|
|
rules:
|
|
- record: nginxRequestCount
|
|
expr: 'env: "test" AND service: "nginx" | stats count(*) as requests'
|
|
annotations:
|
|
description: "Service nginx on env test accepted {{$labels.requests}} requests in the last 5 minutes"
|
|
- record: prodRequestCount
|
|
expr: 'env: "prod" | stats by (service) count(*) as requests'
|
|
annotations:
|
|
description: "Service {{$labels.service}} on env prod accepted {{$labels.requests}} requests in the last 5 minutes"
|
|
- name: ServiceLog
|
|
type: vlogs
|
|
interval: 5m
|
|
rules:
|
|
- alert: HasErrorLog
|
|
expr: 'env: "prod" AND status:~"error|warn" | stats by (service) count(*) as errorLog | filter errorLog:>0'
|
|
annotations:
|
|
description: "Service {{$labels.service}} generated {{$labels.errorLog}} error logs in the last 5 minutes"
|
|
- name: ServiceRequest
|
|
type: vlogs
|
|
interval: 10m
|
|
rules:
|
|
- alert: TooManyFailedRequest
|
|
expr: '* | extract "ip=<ip> " | extract "status_code=<code>;" | stats by (ip) count() if (code:!~200) as failed, count() as total| math failed / total as failed_percentage| filter failed_percentage :> 0.01 | fields ip,failed_percentage'
|
|
annotations:
|
|
description: "Connection from address {{$labels.ip}} has {{$value}} failed requests ratio in last 10 minutes"
|