Vmanomaly Guide dashboard provisioning (#5679)
* dashboard provisioning * delete dashboard filter, new query * dashboard screens, guide fixes
|
@ -1,9 +0,0 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: VictoriaMetrics
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://victoriametrics:8428
|
||||
isDefault: true
|
||||
|
|
@ -41,7 +41,9 @@ services:
|
|||
- 3000:3000
|
||||
volumes:
|
||||
- grafanadata-guide-vmanomaly-vmalert:/var/lib/grafana
|
||||
- ./datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml
|
||||
- ./provisioning/datasources:/etc/grafana/provisioning/datasources
|
||||
- ./provisioning/dashboards:/etc/grafana/provisioning/dashboards
|
||||
- ./vmanomaly_guide_dashboard.json:/var/lib/grafana/dashboards/vmanomaly_guide_dashboard.json
|
||||
networks:
|
||||
- vm_net
|
||||
restart: always
|
||||
|
@ -71,21 +73,21 @@ services:
|
|||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.7.2
|
||||
image: victoriametrics/vmanomaly:v1.8.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
- "8500:8500"
|
||||
- "8490:8490"
|
||||
networks:
|
||||
- vm_net
|
||||
restart: always
|
||||
volumes:
|
||||
- ./vmanomaly_config.yml:/config.yaml
|
||||
- ./vmanomaly_license.txt:/license.txt
|
||||
- ./vmanomaly_license:/license
|
||||
platform: "linux/amd64"
|
||||
command:
|
||||
- "/config.yaml"
|
||||
- "--license-file=/license.txt"
|
||||
- "--license-file=/license"
|
||||
alertmanager:
|
||||
container_name: alertmanager
|
||||
image: prom/alertmanager:v0.25.0
|
||||
|
|
|
@ -16,4 +16,4 @@ scrape_configs:
|
|||
- targets: ['node-exporter:9100']
|
||||
- job_name: 'vmanomaly'
|
||||
static_configs:
|
||||
- targets: [ 'vmanomaly:8500' ]
|
||||
- targets: [ 'vmanomaly:8490' ]
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: Prometheus
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
|
@ -0,0 +1,11 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: VictoriaMetrics
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://victoriametrics:8428
|
||||
isDefault: true
|
||||
jsonData:
|
||||
prometheusType: Prometheus
|
||||
prometheusVersion: 2.24.0
|
|
@ -1,6 +1,6 @@
|
|||
scheduler:
|
||||
infer_every: "1m"
|
||||
fit_every: "2h"
|
||||
fit_every: "2m"
|
||||
fit_window: "14d"
|
||||
|
||||
model:
|
||||
|
@ -10,8 +10,9 @@ model:
|
|||
|
||||
reader:
|
||||
datasource_url: "http://victoriametrics:8428/"
|
||||
sampling_period: "60s"
|
||||
queries:
|
||||
node_cpu_rate: "rate(node_cpu_seconds_total)"
|
||||
node_cpu_rate: "quantile by (mode) (0.5, rate(node_cpu_seconds_total[5m]))"
|
||||
|
||||
writer:
|
||||
datasource_url: "http://victoriametrics:8428/"
|
||||
|
@ -20,4 +21,4 @@ writer:
|
|||
monitoring:
|
||||
pull: # Enable /metrics endpoint.
|
||||
addr: "0.0.0.0"
|
||||
port: 8500
|
||||
port: 8490
|
||||
|
|
|
@ -0,0 +1,482 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"description": "",
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 19,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 33,
|
||||
"options": {
|
||||
"code": {
|
||||
"language": "plaintext",
|
||||
"showLineNumbers": false,
|
||||
"showMiniMap": false
|
||||
},
|
||||
"content": "If you don't see any data, please wait a few minutes. \n\nYou will see a lot of false positive anomalies when you run the guide for the first time. \nThe prediction must be more accurate if you provide vmanomaly 2w of data.\n\nEvery row represents information for one specific mode. \nThe query for anomaly detection is `quantile by (mode) (0.5, rate(node_cpu_seconds_total[5m]))`\nThis is a median (or 50% quantileof `rate` function over `node_cpu_seconds_total`)",
|
||||
"mode": "markdown"
|
||||
},
|
||||
"pluginVersion": "10.2.1",
|
||||
"title": "Overview",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 5
|
||||
},
|
||||
"id": 2,
|
||||
"panels": [],
|
||||
"repeat": "mode",
|
||||
"repeatDirection": "h",
|
||||
"title": "CPU Mode: $mode",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"description": "quantile by (mode) (0.5, rate(node_cpu_seconds_total[5m]))",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"__systemRef": "hideSeriesFrom",
|
||||
"matcher": {
|
||||
"id": "byNames",
|
||||
"options": {
|
||||
"mode": "exclude",
|
||||
"names": [
|
||||
"Instance: node-exporter:9100, Job: node-exporter"
|
||||
],
|
||||
"prefix": "All except:",
|
||||
"readOnly": true
|
||||
}
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 6
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "quantile by (mode, instance,job) (0.5, rate(node_cpu_seconds_total{mode=~\"$mode\"}[5m]))",
|
||||
"instant": false,
|
||||
"legendFormat": "Instance: {{instance}}, Job {{job}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU median for $mode mode",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "dashed"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 6
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"min",
|
||||
"max",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "anomaly_score{mode=~\"$mode\"}",
|
||||
"instant": false,
|
||||
"legendFormat": "Instance: {{instance}}, Job: {{job}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Anomaly Scores for $mode mode",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Predicted Value: yhat"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.fillBelowTo",
|
||||
"value": "yhat_lower"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Predicted Upper Boundary"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.fillBelowTo",
|
||||
"value": "yhat"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "yhat{mode=~\"$mode\"}",
|
||||
"instant": false,
|
||||
"legendFormat": "Predicted Value: yhat",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "yhat_lower{mode=~\"$mode\"}",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "Predicted Lower Boundary",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "yhat_upper{mode=~\"$mode\"}",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "Predicted Upper Boundary",
|
||||
"range": true,
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Predicted Value and Boundaries for $mode mode",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "",
|
||||
"schemaVersion": 38,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "VictoriaMetrics"
|
||||
},
|
||||
"definition": "label_values(node_cpu_seconds_total,mode)",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "Mode",
|
||||
"multi": true,
|
||||
"name": "mode",
|
||||
"options": [],
|
||||
"query": {
|
||||
"qryType": 1,
|
||||
"query": "label_values(node_cpu_seconds_total,mode)",
|
||||
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Vmanomaly Guide",
|
||||
"uid": "cfa61e6a-6074-4626-8e54-ea33e08746b9",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
After Width: | Height: | Size: 91 KiB |
After Width: | Height: | Size: 86 KiB |
BIN
docs/anomaly-detection/guides/guide-vmanomaly-vmalert-query.webp
Normal file
After Width: | Height: | Size: 133 KiB |
|
@ -26,7 +26,7 @@ aliases:
|
|||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter)(v1.7.0) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/)(v0.25.0)
|
||||
|
||||
<img width="800" alt="vmanomaly typical setup diagramm" src="guide-vmanomaly-vmalert_overview.webp">
|
||||
<img max-width="1000" alt="vmanomaly typical setup diagramm" src="guide-vmanomaly-vmalert_overview.webp">
|
||||
|
||||
> **Note: Configurations used throughout this guide can be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/vmanomaly/vmanomaly-integration/)**
|
||||
|
||||
|
@ -101,9 +101,9 @@ node_cpu_seconds_total{cpu="1",mode="iowait"} 51.22
|
|||
Here, metric `node_cpu_seconds_total` tells us how many seconds each CPU spent in different modes: _user_, _system_, _iowait_, _idle_, _irq&softirq_, _guest_, or _steal_.
|
||||
These modes are mutually exclusive. A high _iowait_ means that you are disk or network bound, high _user_ or _system_ means that you are CPU bound.
|
||||
|
||||
The metric `node_cpu_seconds_total` is a [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) type of metric. If we'd like to see how much time CPU spent in each of the nodes, we need to calculate the per-second values change via [rate function](https://docs.victoriametrics.com/MetricsQL.html#rate): `rate(node_cpu_seconds_total)`.
|
||||
The metric `node_cpu_seconds_total` is a [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) type of metric. If we'd like to see how much time CPU spent in each of the nodes, we need to calculate the per-second values change via [rate function](https://docs.victoriametrics.com/MetricsQL.html#rate): `rate(node_cpu_seconds_total)`. To aggregate data by mode we'll use median or 50% quantile function. Resulting query will look likt this: `quantile by (mode) (0.5, rate(node_cpu_seconds_total[5m])`
|
||||
Here is how this query may look like in Grafana:
|
||||
<img alt="node_cpu_rate_graph" src="guide-vmanomaly-vmalert_node-cpu-rate-graph.webp">
|
||||
<img max-width="1000" alt="node_cpu_rate_graph" src="guide-vmanomaly-vmalert-query.webp">
|
||||
|
||||
This query result will generate 8 time series per each cpu, and we will use them as an input for our VM Anomaly Detection. vmanomaly will start learning configured model type separately for each of the time series.
|
||||
|
||||
|
@ -146,7 +146,7 @@ Here is an example of the config file `vmanomaly_config.yml`.
|
|||
``` yaml
|
||||
scheduler:
|
||||
infer_every: "1m"
|
||||
fit_every: "2h"
|
||||
fit_every: "2m"
|
||||
fit_window: "14d"
|
||||
|
||||
model:
|
||||
|
@ -157,15 +157,16 @@ model:
|
|||
reader:
|
||||
datasource_url: "http://victoriametrics:8428/"
|
||||
queries:
|
||||
node_cpu_rate: "rate(node_cpu_seconds_total)"
|
||||
node_cpu_rate: "quantile by (mode) (0.5, rate(node_cpu_seconds_total[5m])"
|
||||
|
||||
writer:
|
||||
datasource_url: "http://victoriametrics:8428/"
|
||||
|
||||
monitoring:
|
||||
|
||||
monitoring:
|
||||
pull: # Enable /metrics endpoint.
|
||||
addr: "0.0.0.0"
|
||||
port: 8500
|
||||
port: 8490
|
||||
```
|
||||
|
||||
</div>
|
||||
|
@ -221,9 +222,12 @@ Here are all services we are going to run:
|
|||
* alertmanager - Notification services that handles alerts from vmalert.
|
||||
|
||||
### Grafana setup
|
||||
To enable VictoriaMetrics datasource as the default in Grafana we need to create a file `datasource.yml`
|
||||
|
||||
The default username/password pair is `admin:admin`
|
||||
#### Create a data source manifest
|
||||
In the `provisioning/datasources/` directory, create a file called `datasource.yml` with the following content:
|
||||
|
||||
|
||||
> The default username/password pair is `admin:admin`
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
|
@ -236,11 +240,35 @@ datasources:
|
|||
access: proxy
|
||||
url: http://victoriametrics:8428
|
||||
isDefault: true
|
||||
jsonData:
|
||||
prometheusType: Prometheus
|
||||
prometheusVersion: 2.24.0
|
||||
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
#### Define a dashboard provider
|
||||
In the` provisioning/dashboards/` directory, create a file called `dashboard.yml` with the following content:
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
``` yaml
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: Prometheus
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
### Scrape config
|
||||
|
||||
Let's create `prometheus.yml` file for `vmagent` configuration.
|
||||
|
@ -266,14 +294,14 @@ scrape_configs:
|
|||
- targets: ['node-exporter:9100']
|
||||
- job_name: 'vmanomaly'
|
||||
static_configs:
|
||||
- targets: [ 'vmanomaly:8500' ]
|
||||
- targets: [ 'vmanomaly:8490' ]
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
### vmanomaly licensing
|
||||
|
||||
We will utilize the license key stored locally in the file `vmanomaly_license.txt`.
|
||||
We will utilize the license key stored locally in the file `vmanomaly_license`.
|
||||
|
||||
For additional licensing options, please refer to the [VictoriaMetrics Anomaly Detection documentation on licensing](https://docs.victoriametrics.com/anomaly-detection/Overview#licensing).
|
||||
|
||||
|
@ -340,7 +368,9 @@ services:
|
|||
- 3000:3000
|
||||
volumes:
|
||||
- grafanadata-guide-vmanomaly-vmalert:/var/lib/grafana
|
||||
- ./datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml
|
||||
- ./provisioning/datasources:/etc/grafana/provisioning/datasources
|
||||
- ./provisioning/dashboards:/etc/grafana/provisioning/dashboards
|
||||
- ./vmanomaly_guide_dashboard.json:/var/lib/grafana/dashboards/vmanomaly_guide_dashboard.json
|
||||
networks:
|
||||
- vm_net
|
||||
restart: always
|
||||
|
@ -370,21 +400,21 @@ services:
|
|||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.7.2
|
||||
image: victoriametrics/vmanomaly:v1.8.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
- "8500:8500"
|
||||
- "8490:8490"
|
||||
networks:
|
||||
- vm_net
|
||||
restart: always
|
||||
volumes:
|
||||
- ./vmanomaly_config.yml:/config.yaml
|
||||
- ./vmanomaly_license.txt:/license.txt
|
||||
- ./vmanomaly_license:/license
|
||||
platform: "linux/amd64"
|
||||
command:
|
||||
- "/config.yaml"
|
||||
- "--license-file=/license.txt"
|
||||
- "--license-file=/license"
|
||||
alertmanager:
|
||||
container_name: alertmanager
|
||||
image: prom/alertmanager:v0.25.0
|
||||
|
@ -414,6 +444,7 @@ volumes:
|
|||
grafanadata-guide-vmanomaly-vmalert: {}
|
||||
networks:
|
||||
vm_net:
|
||||
|
||||
```
|
||||
|
||||
</div>
|
||||
|
@ -421,7 +452,7 @@ networks:
|
|||
Before running our docker-compose make sure that your directory contains all required files:
|
||||
|
||||
<p align="center">
|
||||
<img src="guide-vmanomaly-vmalert_files.webp" width="400" alt="all files">
|
||||
<img src="guide-vmanomaly-vmalert_files.webp" max-width="1000" alt="all files">
|
||||
</p>
|
||||
|
||||
This docker-compose file will pull docker images, set up each service and run them all together with the command:
|
||||
|
@ -451,36 +482,30 @@ docker logs vmanomaly
|
|||
To look at model results we need to go to grafana on the `localhost:3000`. Data
|
||||
vmanomaly need some time to generate more data to visualize.
|
||||
Let's investigate model output visualization in Grafana.
|
||||
In the Grafana Explore tab enter queries:
|
||||
On the Grafana Dashboard `Vmanomaly Guide` for each mode of CPU you can investigate:
|
||||
* initial query result - `quantile by (mode) (0.5, rate(node_cpu_seconds_total[5m]))`
|
||||
* `anomaly_score`
|
||||
* `yhat`
|
||||
* `yhat_lower`
|
||||
* `yhat_upper`
|
||||
* `yhat` - Predicted value
|
||||
* `yhat_lower` - Predicted lower boundary
|
||||
* `yhat_upper` - Predicted upper boundary
|
||||
|
||||
Each of these metrics will contain same labels our query `rate(node_cpu_seconds_total)` returns.
|
||||
Each of these metrics will contain same labels our query `quantile by (mode) (0.5, rate(node_cpu_seconds_total[5m]))` returns.
|
||||
|
||||
### Anomaly scores for each metric with its according labels.
|
||||
|
||||
Query: `anomaly_score`
|
||||
<img alt="Anomaly score graph" src="guide-vmanomaly-vmalert_anomaly-score.webp">
|
||||
<img max-width="1000" alt="Anomaly score graph" src="guide-vmanomaly-vmalert-anomaly-score.webp">
|
||||
|
||||
<br>Check out if the anomaly score is high for datapoints you think are anomalies. If not, you can try other parameters in the config file or try other model type.
|
||||
|
||||
As you may notice a lot of data shows anomaly score greater than 1. It is expected as we just started to scrape and store data and there are not enough datapoints to train on. Just wait for some more time for gathering more data to see how well this particular model can find anomalies. In our configs we put 2 days of data required.
|
||||
As you may notice a lot of data shows anomaly score greater than 1. It is expected as we just started to scrape and store data and there are not enough datapoints to train on. Just wait for some more time for gathering more data to see how well this particular model can find anomalies. In our configs we put 2 weeks of data needed to fit the model properly.
|
||||
|
||||
### Actual value from input query with predicted `yhat` metric.
|
||||
|
||||
Query: `yhat`
|
||||
### Lower and upper boundaries and predicted values.
|
||||
|
||||
<img alt="yhat" src="guide-vmanomaly-vmalert_yhat.webp">
|
||||
Queries: `yhat_lower`, `yhat_upper` and `yhat`
|
||||
|
||||
Here we are using one particular set of metrics for visualization. Check out the difference between model prediction and actual values. If values are very different from prediction, it can be considered as anomalous.
|
||||
|
||||
### Lower and upper boundaries that model predicted.
|
||||
|
||||
Queries: `yhat_lower` and `yhat_upper`
|
||||
|
||||
<img alt="yhat lower and yhat upper" src="guide-vmanomaly-vmalert_yhat-lower-upper.webp">
|
||||
<img max-width="1000" alt="yhat lower and yhat upper" src="guide-vmanomaly-vmalert-boundaries.webp">
|
||||
|
||||
Boundaries of 'normal' metric values according to model inference.
|
||||
|
||||
|
@ -488,10 +513,10 @@ Boundaries of 'normal' metric values according to model inference.
|
|||
|
||||
On the page `http://localhost:8880/vmalert/groups` you can find our configured Alerting rule:
|
||||
|
||||
<img alt="alert rule" src="guide-vmanomaly-vmalert_alert-rule.webp">
|
||||
<img max-width="1000" alt="alert rule" src="guide-vmanomaly-vmalert_alert-rule.webp">
|
||||
|
||||
According to the rule configured for vmalert we will see Alert when anomaly score exceed 1. You will see an alert on Alert tab. `http://localhost:8880/vmalert/alerts`
|
||||
<img alt="alerts firing" src="guide-vmanomaly-vmalert_alerts-firing.webp">
|
||||
According to the rule configured for vmalert we will see Alert when anomaly score exceed 1. You will see an alert on Alert tab. `http://localhost:8880/vmalert/alerts`:
|
||||
<img max-width="1000" alt="alerts firing" src="guide-vmanomaly-vmalert_alerts-firing.webp">
|
||||
|
||||
## 10. Conclusion
|
||||
|
||||
|
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 75 KiB |
Before Width: | Height: | Size: 184 KiB |
Before Width: | Height: | Size: 90 KiB |
Before Width: | Height: | Size: 105 KiB |