docs/vmanomaly: preset guide (#6151)

- added instructions of how to run presets - description of Node Exporter indicators covered
2024-11-21 14:44:00 +00:00 · 2024-06-13 21:11:54 +02:00 · 2024-06-13 21:11:54 +02:00 · c32b8d39dd
commit c32b8d39dd
parent a43823774b
12 changed files with 2487 additions and 0 deletions
--- a/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/awesome_alerts.yml
+++ b/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/awesome_alerts.yml
@ -0,0 +1,237 @@
 # This is a reduced version of awesome alerts 2023-12-01.1
 groups:
 - name: AwesomeAlerts
  rules:
  - alert: HostOutOfMemory
    expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host out of memory (instance {{ $labels.instance }})
      description: "Node memory is filling up (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostMemoryIsUnderutilized
    expr: (100 - (rate(node_memory_MemAvailable_bytes[30m]) / node_memory_MemTotal_bytes * 100) < 20) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 1w
    labels:
      severity: info
    annotations:
      summary: Host Memory is underutilized (instance {{ $labels.instance }})
      description: "Node memory is < 20% for 1 week. Consider reducing memory space. (instance {{ $labels.instance }})\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostUnusualDiskReadRate
    expr: (sum by (instance) (rate(node_disk_read_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: Host unusual disk read rate (instance {{ $labels.instance }})
      description: "Disk is probably reading too much data (> 50 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostUnusualDiskWriteRate
    expr: (sum by (instance) (rate(node_disk_written_bytes_total[2m])) / 1024 / 1024 > 50) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host unusual disk write rate (instance {{ $labels.instance }})
      description: "Disk is probably writing too much data (> 50 MB/s)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostOutOfDiskSpace
    expr: ((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host out of disk space (instance {{ $labels.instance }})
      description: "Disk is almost full (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostDiskWillFillIn24Hours
    expr: ((node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host disk will fill in 24 hours (instance {{ $labels.instance }})
      description: "Filesystem is predicted to run out of space within the next 24 hours at current write rate\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostOutOfInodes
    expr: (node_filesystem_files_free / node_filesystem_files * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host out of inodes (instance {{ $labels.instance }})
      description: "Disk is almost running out of available inodes (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostFilesystemDeviceError
    expr: node_filesystem_device_error == 1
    for: 0m
    labels:
      severity: critical
    annotations:
      summary: Host filesystem device error (instance {{ $labels.instance }})
      description: "{{ $labels.instance }}: Device error with the {{ $labels.mountpoint }} filesystem\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostInodesWillFillIn24Hours
    expr: (node_filesystem_files_free / node_filesystem_files * 100 < 10 and predict_linear(node_filesystem_files_free[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host inodes will fill in 24 hours (instance {{ $labels.instance }})
      description: "Filesystem is predicted to run out of inodes within the next 24 hours at current write rate\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostUnusualDiskIo
    expr: (rate(node_disk_io_time_seconds_total[1m]) > 0.5) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: Host unusual disk IO (instance {{ $labels.instance }})
      description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostSwapIsFillingUp
    expr: ((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host swap is filling up (instance {{ $labels.instance }})
      description: "Swap is filling up (>80%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostSystemdServiceCrashed
    expr: (node_systemd_unit_state{state="failed"} == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 0m
    labels:
      severity: warning
    annotations:
      summary: Host systemd service crashed (instance {{ $labels.instance }})
      description: "systemd service crashed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostPhysicalComponentTooHot
    expr: ((node_hwmon_temp_celsius * ignoring(label) group_left(instance, job, node, sensor) node_hwmon_sensor_label{label!="tctl"} > 75)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: Host physical component too hot (instance {{ $labels.instance }})
      description: "Physical hardware component too hot\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostNodeOvertemperatureAlarm
    expr: (node_hwmon_temp_crit_alarm_celsius == 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 0m
    labels:
      severity: critical
    annotations:
      summary: Host node overtemperature alarm (instance {{ $labels.instance }})
      description: "Physical node temperature alarm triggered\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostRaidArrayGotInactive
    expr: (node_md_state{state="inactive"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 0m
    labels:
      severity: critical
    annotations:
      summary: Host RAID array got inactive (instance {{ $labels.instance }})
      description: "RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostRaidDiskFailure
    expr: (node_md_disks{state="failed"} > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host RAID disk failure (instance {{ $labels.instance }})
      description: "At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostKernelVersionDeviations
    expr: (count(sum(label_replace(node_uname_info, "kernel", "$1", "release", "([0-9]+.[0-9]+.[0-9]+).*")) by (kernel)) > 1) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 6h
    labels:
      severity: warning
    annotations:
      summary: Host kernel version deviations (instance {{ $labels.instance }})
      description: "Different kernel versions are running\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostOomKillDetected
    expr: (increase(node_vmstat_oom_kill[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 0m
    labels:
      severity: warning
    annotations:
      summary: Host OOM kill detected (instance {{ $labels.instance }})
      description: "OOM kill detected\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostEdacCorrectableErrorsDetected
    expr: (increase(node_edac_correctable_errors_total[1m]) > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 0m
    labels:
      severity: info
    annotations:
      summary: Host EDAC Correctable Errors detected (instance {{ $labels.instance }})
      description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostEdacUncorrectableErrorsDetected
    expr: (node_edac_uncorrectable_errors_total > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 0m
    labels:
      severity: warning
    annotations:
      summary: Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})
      description: "Host {{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostNetworkInterfaceSaturated
    expr: ((rate(node_network_receive_bytes_total{device!~"^tap.*|^vnet.*|^veth.*|^tun.*"}[1m]) + rate(node_network_transmit_bytes_total{device!~"^tap.*|^vnet.*|^veth.*|^tun.*"}[1m])) / node_network_speed_bytes{device!~"^tap.*|^vnet.*|^veth.*|^tun.*"} > 0.8 < 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: Host Network Interface Saturated (instance {{ $labels.instance }})
      description: "The network interface \"{{ $labels.device }}\" on \"{{ $labels.instance }}\" is getting overloaded.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostNetworkBondDegraded
    expr: ((node_bonding_active - node_bonding_slaves) != 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host Network Bond Degraded (instance {{ $labels.instance }})
      description: "Bond \"{{ $labels.device }}\" degraded on \"{{ $labels.instance }}\".\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostConntrackLimit
    expr: (node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: Host conntrack limit (instance {{ $labels.instance }})
      description: "The number of conntrack is approaching limit\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostClockSkew
    expr: ((node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0)) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 10m
    labels:
      severity: warning
    annotations:
      summary: Host clock skew (instance {{ $labels.instance }})
      description: "Clock skew detected. Clock is out of sync. Ensure NTP is configured correctly on this host.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostClockNotSynchronising
    expr: (min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Host clock not synchronising (instance {{ $labels.instance }})
      description: "Clock not synchronising. Ensure NTP is configured on this host.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  - alert: HostRequiresReboot
    expr: (node_reboot_required > 0) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
    for: 4h
    labels:
      severity: info
    annotations:
      summary: Host requires reboot (instance {{ $labels.instance }})
      description: "{{ $labels.instance }} requires a reboot.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
--- a/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/dashboard.json
+++ b/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/dashboard.json
--- a/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/user_input_example.yml
+++ b/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/user_input_example.yml
@ -0,0 +1,41 @@
 # an example of what config subset user should input to make node_exporter preset fully functional
 # commented fields are optional for the user, as they are already defined in the preset: 
 # ./presets/node_exporter/vmanomaly_config.yml
 preset: "node-exporter"
 # schedulers:  # section is already defined
 # models:  # section is already defined
 reader:
  # queries:  # already defined
  # sampling_period: # already defined
  datasource_url: "http://victoriametrics:8428/" # your datasource url
  # tenant_id: '0:0'  # specify for cluster version
  # add any other field - https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader
 writer:
  # metric_format:  # already defined
  #   __name__: "node_exporter_$VAR"
  #   for: "$QUERY_KEY"
  # but you can override it or add other labels
  datasource_url: "http://victoriametrics:8428/" # your datasource url
  # tenant_id: '0:0'  # specify for cluster version
  # add any other field - https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer
 # monitoring:
 #   # pull section is already defined
 #   # pull:
 #   #   addr: "0.0.0.0"
 #   #   port: 8080
 #   push:  # most of the fields are already defined
 #     # extra_labels:  # some labels are already defined, but you can add more
 #     #   job: "vmanomaly"
 #     #   config: "node_exporter"
 #     url: "http://victoriametrics:8428/"  # your datasource url to push self-monitoring metrics
 #     # tenant_id: '0:0'  # specify for cluster version
 #   # add any other field - https://docs.victoriametrics.com/anomaly-detection/components/monitoring
--- a/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/vmanomaly_alerts.yml
+++ b/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/vmanomaly_alerts.yml
@ -0,0 +1,74 @@
 groups:
 - name: VMAnomaly
  rules:
  - alert: PageFaults
    expr: min(anomaly_score{preset="node-exporter", for="page_faults"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Abnormal Page Faults (instance {{ $labels.instance }}).
  - alert: ReceiveBytes
    expr: min(anomaly_score{preset="node-exporter", for="receive_bytes"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary: Abnormal amount of Received Bytes (instance {{ $labels.instance }}, device {{$labels.device}}).
  - alert: TransmitBytes
    expr: min(anomaly_score{preset="node-exporter", for="transmit_bytes"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary:  Abnormal amount of Transmit bytes (instance {{ $labels.instance }}, device {{$labels.device}}).
  - alert: ReadLatency
    expr: min(anomaly_score{preset="node-exporter", for="read_latency"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary:  Abnormal Read latency (instance {{ $labels.instance }}, device {{$labels.device}}).
  - alert: WriteLatency
    expr: min(anomaly_score{preset="node-exporter", for="write_latency"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary:  Abnormal Write latency (instance {{ $labels.instance }}, device {{$labels.device}}).
  - alert: CpuSecondsTotal
    expr: min(anomaly_score{preset="node-exporter", for="cpu_seconds_total"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary:  Abnormal amount of Node CPU seconds (instance {{ $labels.instance }}).
  - alert: ContextSwitch
    expr: min(anomaly_score{preset="node-exporter", for="context_switch"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary:  Abnormal amount of Context Switches (instance {{ $labels.instance }}).
  - alert: HostNetworkReceiveErrors
    expr: min(anomaly_score{preset="node-exporter", for="host_network_receive_errors"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary:  Abnormal amount of Host Network Receive Errors (instance {{ $labels.instance }}, device {{$labels.device}}).
  - alert: HostNetworkTransmitErrors
    expr: min(anomaly_score{preset="node-exporter", for="host_network_transmit_errors"}) without (model_alias, scheduler_alias)>=1.0
    for: 2m
    labels:
      severity: warning
    annotations:
      summary:  Abnormal amount of Host Network Transmit Errors (instance {{ $labels.instance }}, device {{$labels.device}}).
--- a/docs/anomaly-detection/Overview.md
+++ b/docs/anomaly-detection/Overview.md
@ -35,6 +35,8 @@ In addition to that, setting up alerting rules manually has been proven to be te
 error-prone, while anomaly detection can be easier to set up, and use the same model for different
 metrics.
 `vmanomaly` can be used as a helper to set up your own alerting. You can rely on the spikes you see in anomaly scores to form the metric queries for alerting rules.
 > **Note: `vmanomaly` is a part of [enterprise package](https://docs.victoriametrics.com/enterprise/). You need to get a [free trial license](https://victoriametrics.com/products/enterprise/trial/) for evaluation.**
 ## How?
--- a/docs/anomaly-detection/Presets.md
+++ b/docs/anomaly-detection/Presets.md
@ -0,0 +1,153 @@
 ---
 sort: 3
 weight: 1
 title: Presets
 menu:
  docs:
    parent: "anomaly-detection"
    weight: 1
    title: Presets
 ---
 # Anomaly Detection Presets
 > Please check the [Quick Start Guide](/anomaly-detection/quickstart/) to install and run `vmanomaly`
 > Presets are available from v1.13.0
 Presets enable anomaly detection in indicators that are hard to monitor using alerts based on static thresholds.
 So, the anomaly detection alerting rules based on the [`anomaly_scores`](https://docs.victoriametrics.com/anomaly-detection/faq/#what-is-anomaly-score) stay the same over time, and we generate the anomaly scores using predefined machine learning models.
 Models are constantly retraining on different time frames which helps to keep alerts up to date and to consider underlying data patterns.
 You can set up the simplified configuration file for `vmanomaly` just specifying the type of preset and data sources in [`reader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/) and [`writer`](https://docs.victoriametrics.com/anomaly-detection/components/writer/) sections of the config.
 The rest of the parameters are already set up for you.
 Available presets:
 - [Node-Exporter](#node-exporter)
 Here is an example config file to enable Node-Exporter preset:
 ```yaml
 preset: "node-exporter"
 reader:
  datasource_url: "http://victoriametrics:8428/" # your datasource url
  # tenant_id: '0:0'  # specify for cluster version
 writer:
  datasource_url: "http://victoriametrics:8428/" # your datasource url
  # tenant_id: '0:0'  # specify for cluster version
 ```
 Run a service using config file with one of the [available options](/anomaly-detection/quickstart/#how-to-install-and-run-vmanomaly).
 After you run `vmanomaly`, the available assets can be found here: `http://localhost:8490/presets/`
 <img alt="preset-localhost" src="presets-localhost.webp">
 ## Node-Exporter
 > **Note: Configurations for presets can be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/vmanomaly/vmanomaly-node-exporter-preset/)**
 For enabling Node-Exporter in config file use `preset` parameter:
 ```yaml
 preset: "node-exporter"
 ```
 ### Generated Anomaly scores
 Machine learning models will be fit for each timeseries, returned by underlying [MetricsQL](https://docs.victoriametrics.com/metricsql/).
 Anomaly score metric labels will also contain underlying [model classes](/anomaly-detection/components/models/) and [schedulers](/anomaly-detection/components/scheduler/).
 Here's an example of produced metrics:
 ```shell
 anomaly_score{for="cpu_seconds_total", instance="node-exporter:9100", preset="node-exporter", mode="system", model_alias="holt-winters", scheduler_alias="1d_1m"} 0.23451242720277776
 anomaly_score{for="cpu_seconds_total", instance="node-exporter:9100", preset="node-exporter", mode="user", model_alias="holt-winters", scheduler_alias="1d_1m"} 0.2637952255694444
 anomaly_score{for="page_faults", instance="node-exporter:9100", job="node-exporter", preset="node-exporter", model_alias="holt-winters", scheduler_alias="1d_1m"} 0.00593712535
 anomaly_score{for="read_latency", instance="node-exporter:9100", preset="node-exporter", model_alias="mad", scheduler_alias="1d_1m"} 0.27773362795333334
 anomaly_score{for="receive_bytes", instance="node-exporter:9100", preset="node-exporter", model_alias="mad", scheduler_alias="1d_1m"} 0.037753486136666674
 anomaly_score{for="transmit_bytes", instance="node-exporter:9100", preset="node-exporter", model_alias="mad", scheduler_alias="1d_1m"} 0.17633085235
 anomaly_score{for="write_latency", instance="node-exporter:9100", preset="node-exporter", model_alias="mad", scheduler_alias="1d_1m"} 0.019314370926666668
 anomaly_score{for="cpu_seconds_total", instance="node-exporter:9100", preset="node-exporter", mode="idle", model_alias="mad", scheduler_alias="1d_1m"} 4.2323617935
 anomaly_score{for="cpu_seconds_total", instance="node-exporter:9100", preset="node-exporter", mode="idle", model_alias="mad", scheduler_alias="2w_1m"} 1.5261359215
 anomaly_score{for="cpu_seconds_total", instance="node-exporter:9100", preset="node-exporter", mode="idle", model_alias="prophet", scheduler_alias="2w_1m"} 0.5850743651
 anomaly_score{for="cpu_seconds_total", instance="node-exporter:9100", preset="node-exporter", mode="idle", model_alias="z-score", scheduler_alias="1d_1m"} 1.6496064663
 anomaly_score{for="cpu_seconds_total", instance="node-exporter:9100", preset="node-exporter", mode="idle", model_alias="z-score", scheduler_alias="2w_1m"} 0.924392581
 anomaly_score{for="cpu_seconds_total", instance="node-exporter:9100", preset="node-exporter", mode="iowait", model_alias="mad", scheduler_alias="1d_1m"} 0.8571428657
 ...
 ```
 ### Alerts
 > We recommend to use [Awesome Prometheus alerts](https://github.com/samber/awesome-prometheus-alerts) for alerts not covered by presets.
 Provided alerts are set to fire every time all models vote that the datapoint is anomalous.
 You can find alerting rules here:
 - `vmanomaly` [Anomaly Detection alerts](http://localhost:8490/presets/vmanomaly_alerts.yml): `http://localhost:8490/presets/vmanomaly_alerts.yml`
 - [Modified Awesome Alerts](http://localhost:8490/presets/awesome_alerts.yml): `http://localhost:8490/presets/awesome_alerts.yml`
 #### Awesome Alerts replaced by Machine Learning alerts
 - HostMemoryUnderMemoryPressure
 - HostContextSwitching
 - HostHighCpuLoad
 - HostCpuIsUnderutilized
 - HostCpuStealNoisyNeighbor
 - HostCpuHighIowait
 - HostNetworkReceiveErrors
 - HostNetworkTransmitErrors
 - HostUnusualNetworkThroughputIn
 - HostUnusualNetworkThroughputOut
 ### Grafana dashboard
 Grafana dashboard `.json` file can be found [here](http://localhost:8490/presets/dashboard.json): `http://localhost:8490/presets/dashboard.json`
 ### Indicators monitored by preset
 The produced anomaly scores will contain label `for` with the name of corresponding indicator.
 <table>
    <thead>
        <tr>
            <th>Indicator</th>
            <th>Based on metrics</th>
            <th>Description</th>  
        </tr>
    </thead>
    <tbody>
        <tr>
            <td><code>page_faults</code></td>
            <td><code>node_vmstat_pgmajfault</code></td>
            <td>Number of major faults that have occurred since the last update. Major faults occur when a process tries to access a page in memory that is not currently mapped in the process's address space, and it requires loading data from the disk.</td>
        </tr>
        <tr>
            <td><code>context_switch</code></td>
            <td><code>node_context_switches_total</code></td>
            <td>This metric represents the total number of context switches across all CPUs.</td>
        </tr>
        <tr>
            <td><code>cpu_seconds_total</code></td>
            <td><code>node_cpu_seconds_total</code></td>
            <td>Total amount of CPU time consumed by the system in seconds by CPU processing mode (e.g., user, system, idle).</td>
        </tr>
        <tr>
            <td><code>host_network_receive_errors</code> & <code>host_network_transmit_errors</code></td>
            <td><code>node_network_receive_errs_total</code>, <code>node_network_receive_packets_total</code>, <code>node_network_transmit_errs_total</code>, <code>node_network_transmit_packets_total</code>
            <td>Total number of errors encountered while receiving/transmitting packets on the network interfaces of a node.</td>
        </tr>
        <tr>
            <td><code>receive_bytes</code> & <code>transmit_bytes</code></td>
            <td><code>node_network_receive_bytes_total</code>, <code>node_network_transmit_bytes_total</code></td>
            <td>Total number of bytes received/transmitted on network interfaces of a node.</td>
        </tr>
        <tr>
            <td><code>read_latency</code> & <code>write_latency</code></td>
            <td><code>node_disk_read_time_seconds_total</code>, <code>node_disk_reads_completed_total</code>, <code>node_disk_write_time_seconds_total</code>, <code>node_disk_writes_completed_total</code></td>
            <td>Disk latency. The total read/write time spent in seconds. / The total number of reads/writes completed successfully.</td>
        </tr>
    </tbody>
 </table>
 ## Example
 On the graph 'Percentage of Anomalies,' you can see a spike to 8.75% of anomalies at the timestamp '2024-06-03 10:35:00'. The graph 'Anomalies per Indicator' shows the indicators that were anomalous at the corresponding time.
 <img alt="global" src="presets_global_percentage.webp">
 At this timestamp on the 'Number of Anomalous Indicators by Node,' we can identify the node that had the most anomalies: `10.142.0.27`
 <img alt="by_node" src="presets_anomalies_by_node.webp">
 For this node from the timestamp `2024-06-03 10:35:00` CPU time spent handling software interrupts started to grow.
 (`cpu_seconds_total{mode="softirq"}`)
 <img alt="irq" src="presets_cpu_seconds_softirq.webp">
 At the same time `cpu_seconds_total` for `steal` mode started to grow as well.
 <img alt="steal" src="presets_cpu_seconds_steal.webp">
--- a/docs/anomaly-detection/README.md
+++ b/docs/anomaly-detection/README.md
@ -18,6 +18,7 @@ Begin your VictoriaMetrics Anomaly Detection journey with ease using our guides
 - **Quickstart**: Check out how to get `vmanomaly` up and running [here](/anomaly-detection/QuickStart.html).
 - **Overview**: Find out how `vmanomaly` service operates [here](/anomaly-detection/Overview.html)
 - **Integration**: Integrate anomaly detection into your observability ecosystem. Get started [here](/anomaly-detection/guides/guide-vmanomaly-vmalert.html).
 - **Anomaly Detection Presets**: Enable anomaly detection on predefined set of indicators, that require frequently changing static thresholds for alerting. Find more information [here](/anomaly-detection/presets/).
 - **Installation Options**: Select the method that aligns with your technical requirements:
    - **Docker Installation**: Suitable for containerized environments. See [Docker guide](/anomaly-detection/Overview.html#run-vmanomaly-docker-container).
--- a/docs/anomaly-detection/presets-localhost.webp
+++ b/docs/anomaly-detection/presets-localhost.webp
--- a/docs/anomaly-detection/presets_anomalies_by_node.webp
+++ b/docs/anomaly-detection/presets_anomalies_by_node.webp
--- a/docs/anomaly-detection/presets_cpu_seconds_softirq.webp
+++ b/docs/anomaly-detection/presets_cpu_seconds_softirq.webp
--- a/docs/anomaly-detection/presets_cpu_seconds_steal.webp
+++ b/docs/anomaly-detection/presets_cpu_seconds_steal.webp
--- a/docs/anomaly-detection/presets_global_percentage.webp
+++ b/docs/anomaly-detection/presets_global_percentage.webp