diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 3d57b38a3..11ff12aef 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,7 +7,7 @@ updates: - package-ecosystem: "gomod" directory: "/" schedule: - interval: "daily" + interval: "weekly" - package-ecosystem: "bundler" directory: "/docs" schedule: @@ -15,7 +15,7 @@ updates: - package-ecosystem: "gomod" directory: "/app/vmui/packages/vmui/web" schedule: - interval: "daily" + interval: "weekly" - package-ecosystem: "docker" directory: "/" schedule: @@ -23,4 +23,4 @@ updates: - package-ecosystem: "npm" directory: "/app/vmui" schedule: - interval: "daily" + interval: "weekly" diff --git a/README.md b/README.md index f9cd08f1e..f6149a481 100644 --- a/README.md +++ b/README.md @@ -1757,6 +1757,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode -promscrape.openstackSDCheckInterval duration Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s) + -promscrape.seriesLimitPerTarget int + Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info -promscrape.streamParse Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control -promscrape.suppressDuplicateScrapeTargetErrors diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 0d95d837e..acd48f8ea 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -36,7 +36,7 @@ to `vmagent` such as the ability to push metrics instead of pulling them. We did * Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared with Prometheus. * Scrape targets can be spread among multiple `vmagent` instances when big number of targets must be scraped. See [these docs](#scraping-big-number-of-targets). * Can efficiently scrape targets that expose millions of time series such as [/federate endpoint in Prometheus](https://prometheus.io/docs/prometheus/latest/federation/). See [these docs](#stream-parsing-mode). -* Can deal with [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues by limiting the number of unique time series sent to remote storage systems. See [these docs](#cardinality-limiter). +* Can deal with [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues by limiting the number of unique time series at scrape time and before sending them to remote storage systems. See [these docs](#cardinality-limiter). * Can load scrape configs from multiple files. See [these docs](#loading-scrape-configs-from-multiple-files). ## Quick Start @@ -196,7 +196,12 @@ Please file feature requests to [our issue tracker](https://github.com/VictoriaM to save network bandwidth. * `disable_keepalive: true` - to disable [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis. By default, `vmagent` uses keep-alive connections to scrape targets to reduce overhead on connection re-establishing. +* `series_limit: N` - for limiting the number of unique time series a single scrape target can expose. See [these docs](#cardinality-limiter). * `stream_parse: true` - for scraping targets in a streaming manner. This may be useful for targets exporting big number of metrics. See [these docs](#stream-parsing-mode). +* `scrape_align_interval: duration` - for aligning scrapes to the given interval instead of using random offset in the range `[0 ... scrape_interval]` for scraping each target. The random offset helps spreading scrapes evenly in time. +* `scrape_offset: duration` - for specifying the exact offset for scraping instead of using random offset in the range `[0 ... scrape_interval]`. +* `relabel_debug: true` - for enabling debug logging during relabeling of the discovered targets. See [these docs](#relabeling). +* `metric_relabel_debug: true` - for enabling debug logging during relabeling of the scraped metrics. See [these docs](#relabeling). Note that `vmagent` doesn't support `refresh_interval` option for these scrape configs. Use the corresponding `-promscrape.*CheckInterval` command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs` @@ -359,6 +364,10 @@ scrape_configs: ## Cardinality limiter +By default `vmagent` doesn't limit the number of time series each scrape target can expose. The limit can be enforced across all the scrape targets by specifying `-promscrape.seriesLimitPerTarget` command-line option. The limit also can be specified via `series_limit` option at `scrape_config` section. All the scraped metrics are dropped for time series exceeding the given limit. The exceeded limit can be [monitored](#monitoring) via `promscrape_series_limit_rows_dropped_total` metric, which shows the number of metrics dropped due to the exceeded limit. + +See also `sample_limit` option at [scrape_config section](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). + By default `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`. The limit can be enforced by setting the following command-line flags: * `-remoteWrite.maxHourlySeries` - limits the number of unique time series `vmagent` can write to remote storage systems during the last hour. Useful for limiting the number of active time series. @@ -672,8 +681,6 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control -promscrape.disableKeepAlive Whether to disable HTTP keep-alive connections when scraping all the targets. This may be useful when targets has no support for HTTP keep-alive connection. It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control. Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets - -promscrape.noStaleMarkers - Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode -promscrape.discovery.concurrency int The maximum number of concurrent requests to Prometheus autodiscovery API (Consul, Kubernetes, etc.) (default 100) -promscrape.discovery.concurrentWaitTime duration @@ -705,8 +712,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . -promscrape.maxScrapeSize size The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216) + -promscrape.noStaleMarkers + Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode -promscrape.openstackSDCheckInterval duration Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s) + -promscrape.seriesLimitPerTarget int + Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info -promscrape.streamParse Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control -promscrape.suppressDuplicateScrapeTargetErrors @@ -737,12 +748,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . The maximum size in bytes of unpacked request to send to remote storage. It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 8388608) -remoteWrite.maxDailySeries int - The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -remoteWrite.maxHourlySeries + The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter -remoteWrite.maxDiskUsagePerURL size The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500000000. Disk usage is unlimited if the value is set to 0 Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) -remoteWrite.maxHourlySeries int - The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See also -remoteWrite.maxDailySeries + The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter -remoteWrite.multitenantURL array Base path for multitenant remote storage URL to write data to. See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://:8480 . Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url Supports an array of values separated by comma or specified via multiple flags. diff --git a/app/vmagent/remotewrite/remotewrite.go b/app/vmagent/remotewrite/remotewrite.go index 2bbcc2204..c4ea41c4e 100644 --- a/app/vmagent/remotewrite/remotewrite.go +++ b/app/vmagent/remotewrite/remotewrite.go @@ -53,9 +53,9 @@ var ( `For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+ `Enabled sorting for labels can slow down ingestion performance a bit`) maxHourlySeries = flag.Int("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+ - "Excess series are logged and dropped. This can be useful for limiting series cardinality. See also -remoteWrite.maxDailySeries") + "Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter") maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+ - "Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -remoteWrite.maxHourlySeries") + "Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter") ) var ( @@ -218,6 +218,13 @@ func Stop() { } } rwctxsMap = nil + + if sl := hourlySeriesLimiter; sl != nil { + sl.MustStop() + } + if sl := dailySeriesLimiter; sl != nil { + sl.MustStop() + } } // Push sends wr to remote storage systems set via `-remoteWrite.url`. diff --git a/dashboards/vmagent.json b/dashboards/vmagent.json index 2c3fe6ec8..b35429c2c 100644 --- a/dashboards/vmagent.json +++ b/dashboards/vmagent.json @@ -57,7 +57,7 @@ } ] }, - "description": "Overview for VictoriaMetrics vmagent v1.57.0 or higher", + "description": "Overview for VictoriaMetrics vmagent v1.64.0 or higher", "editable": true, "gnetId": null, "graphTooltip": 1, diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 4db267fa2..060b4c94e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -8,6 +8,7 @@ sort: 15 * FEATURE: vmagent: add ability to read scrape configs from multiple files specified in `scrape_config_files` section. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1559). * FEATURE: vmagent: reduce memory usage and CPU usage when [Prometheus staleness tracking](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) is enabled for metrics exported from the deleted or disappeared scrape targets. +* FEATURE: vmagent: add the ability to limit the number of unique time series scraped per each target. This can be done either globally via `-promscrape.seriesLimitPerTarget` command-line option or on per-target basis via `series_limit` option at `scrape_config` section. See [the updated docs on cardinality limiter](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1561). * FEATURE: vmagent: discover `role: ingress` and `role: endpointslice` in [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) via v1 API instead of v1beta1 API if Kubernetes supports it. This fixes service discovery in Kubernetes v1.22 and newer versions. See [these docs](https://kubernetes.io/docs/reference/using-api/deprecation-guide/#ingress-v122). * FEATURE: take into account failed queries in `vm_request_duration_seconds` summary at `/metrics`. Previously only successful queries were taken into account. This could result in skewed summary. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1537). * FEATURE: vmalert: add an official dashboard for vmalert. See [these docs](https://docs.victoriametrics.com/vmalert.html#monitoring). diff --git a/docs/README.md b/docs/README.md index f9cd08f1e..f6149a481 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1757,6 +1757,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode -promscrape.openstackSDCheckInterval duration Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s) + -promscrape.seriesLimitPerTarget int + Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info -promscrape.streamParse Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control -promscrape.suppressDuplicateScrapeTargetErrors diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index ada3760b4..0cda761c1 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1761,6 +1761,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode -promscrape.openstackSDCheckInterval duration Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s) + -promscrape.seriesLimitPerTarget int + Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info -promscrape.streamParse Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control -promscrape.suppressDuplicateScrapeTargetErrors diff --git a/docs/vmagent.md b/docs/vmagent.md index d6ae0550c..6606733aa 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -40,7 +40,7 @@ to `vmagent` such as the ability to push metrics instead of pulling them. We did * Uses lower amounts of RAM, CPU, disk IO and network bandwidth compared with Prometheus. * Scrape targets can be spread among multiple `vmagent` instances when big number of targets must be scraped. See [these docs](#scraping-big-number-of-targets). * Can efficiently scrape targets that expose millions of time series such as [/federate endpoint in Prometheus](https://prometheus.io/docs/prometheus/latest/federation/). See [these docs](#stream-parsing-mode). -* Can deal with [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues by limiting the number of unique time series sent to remote storage systems. See [these docs](#cardinality-limiter). +* Can deal with [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) and [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues by limiting the number of unique time series at scrape time and before sending them to remote storage systems. See [these docs](#cardinality-limiter). * Can load scrape configs from multiple files. See [these docs](#loading-scrape-configs-from-multiple-files). ## Quick Start @@ -200,7 +200,12 @@ Please file feature requests to [our issue tracker](https://github.com/VictoriaM to save network bandwidth. * `disable_keepalive: true` - to disable [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis. By default, `vmagent` uses keep-alive connections to scrape targets to reduce overhead on connection re-establishing. +* `series_limit: N` - for limiting the number of unique time series a single scrape target can expose. See [these docs](#cardinality-limiter). * `stream_parse: true` - for scraping targets in a streaming manner. This may be useful for targets exporting big number of metrics. See [these docs](#stream-parsing-mode). +* `scrape_align_interval: duration` - for aligning scrapes to the given interval instead of using random offset in the range `[0 ... scrape_interval]` for scraping each target. The random offset helps spreading scrapes evenly in time. +* `scrape_offset: duration` - for specifying the exact offset for scraping instead of using random offset in the range `[0 ... scrape_interval]`. +* `relabel_debug: true` - for enabling debug logging during relabeling of the discovered targets. See [these docs](#relabeling). +* `metric_relabel_debug: true` - for enabling debug logging during relabeling of the scraped metrics. See [these docs](#relabeling). Note that `vmagent` doesn't support `refresh_interval` option for these scrape configs. Use the corresponding `-promscrape.*CheckInterval` command-line flag instead. For example, `-promscrape.consulSDCheckInterval=60s` sets `refresh_interval` for all the `consul_sd_configs` @@ -363,6 +368,10 @@ scrape_configs: ## Cardinality limiter +By default `vmagent` doesn't limit the number of time series each scrape target can expose. The limit can be enforced across all the scrape targets by specifying `-promscrape.seriesLimitPerTarget` command-line option. The limit also can be specified via `series_limit` option at `scrape_config` section. All the scraped metrics are dropped for time series exceeding the given limit. The exceeded limit can be [monitored](#monitoring) via `promscrape_series_limit_rows_dropped_total` metric, which shows the number of metrics dropped due to the exceeded limit. + +See also `sample_limit` option at [scrape_config section](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). + By default `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`. The limit can be enforced by setting the following command-line flags: * `-remoteWrite.maxHourlySeries` - limits the number of unique time series `vmagent` can write to remote storage systems during the last hour. Useful for limiting the number of active time series. @@ -676,8 +685,6 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control -promscrape.disableKeepAlive Whether to disable HTTP keep-alive connections when scraping all the targets. This may be useful when targets has no support for HTTP keep-alive connection. It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control. Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets - -promscrape.noStaleMarkers - Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode -promscrape.discovery.concurrency int The maximum number of concurrent requests to Prometheus autodiscovery API (Consul, Kubernetes, etc.) (default 100) -promscrape.discovery.concurrentWaitTime duration @@ -709,8 +716,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . -promscrape.maxScrapeSize size The maximum size of scrape response in bytes to process from Prometheus targets. Bigger responses are rejected Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16777216) + -promscrape.noStaleMarkers + Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode -promscrape.openstackSDCheckInterval duration Interval for checking for changes in openstack API server. This works only if openstack_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config for details (default 30s) + -promscrape.seriesLimitPerTarget int + Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info -promscrape.streamParse Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful for reducing memory usage when millions of metrics are exposed per each scrape target. It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control -promscrape.suppressDuplicateScrapeTargetErrors @@ -741,12 +752,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . The maximum size in bytes of unpacked request to send to remote storage. It shouldn't exceed -maxInsertRequestSize from VictoriaMetrics Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 8388608) -remoteWrite.maxDailySeries int - The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -remoteWrite.maxHourlySeries + The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter -remoteWrite.maxDiskUsagePerURL size The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks, so the minimum practical value for this flag is 500000000. Disk usage is unlimited if the value is set to 0 Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) -remoteWrite.maxHourlySeries int - The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See also -remoteWrite.maxDailySeries + The maximum number of unique series vmagent can send to remote storage systems during the last hour. Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter -remoteWrite.multitenantURL array Base path for multitenant remote storage URL to write data to. See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://:8480 . Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url Supports an array of values separated by comma or specified via multiple flags. diff --git a/go.mod b/go.mod index 484f0ac75..c869ce92c 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,6 @@ module github.com/VictoriaMetrics/VictoriaMetrics require ( - cloud.google.com/go v0.94.0 // indirect cloud.google.com/go/storage v1.16.1 github.com/VictoriaMetrics/fastcache v1.6.0 diff --git a/go.sum b/go.sum index 5e17b05b0..aea462b59 100644 --- a/go.sum +++ b/go.sum @@ -24,9 +24,8 @@ cloud.google.com/go v0.83.0/go.mod h1:Z7MJUsANfY0pYPdw0lbnivPx4/vhy/e2FEkSkF7vAV cloud.google.com/go v0.84.0/go.mod h1:RazrYuxIK6Kb7YrzzhPoLmCVzl7Sup4NrbKPg8KHSUM= cloud.google.com/go v0.87.0/go.mod h1:TpDYlFy7vuLzZMMZ+B6iRiELaY7z/gJPaqbMx6mlWcY= cloud.google.com/go v0.90.0/go.mod h1:kRX0mNRHe0e2rC6oNakvwQqzyDmg57xJ+SZU1eT2aDQ= +cloud.google.com/go v0.93.3 h1:wPBktZFzYBcCZVARvwVKqH1uEj+aLXofJEtrb4oOsio= cloud.google.com/go v0.93.3/go.mod h1:8utlLll2EF5XMAV15woO4lSbWQlk8rer9aLOfLh7+YI= -cloud.google.com/go v0.94.0 h1:QDB2MZHqjTt0hGKnoEWyG/iWykue/lvkLdogLgrg10U= -cloud.google.com/go v0.94.0/go.mod h1:qAlAugsXlC+JWO+Bke5vCtc9ONxjQT3drlTTnAplMW4= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= @@ -1373,7 +1372,6 @@ google.golang.org/api v0.48.0/go.mod h1:71Pr1vy+TAZRPkPs/xlCf5SsU8WjuAWv1Pfjbtuk google.golang.org/api v0.50.0/go.mod h1:4bNT5pAuq5ji4SRZm+5QIkjny9JAyVD/3gaSihNefaw= google.golang.org/api v0.51.0/go.mod h1:t4HdrdoNgyN5cbEfm7Lum0lcLDLiise1F8qDKX00sOU= google.golang.org/api v0.54.0/go.mod h1:7C4bFFOvVDGXjfDTAsgGwDgAxRDeQ4X8NvUedIt6z3k= -google.golang.org/api v0.55.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= google.golang.org/api v0.56.0 h1:08F9XVYTLOGeSQb3xI9C0gXMuQanhdGed0cWFhDozbI= google.golang.org/api v0.56.0/go.mod h1:38yMfeP1kfjsl8isn0tliTjIb1rJXcQi4UXlbqivdVE= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= @@ -1442,9 +1440,8 @@ google.golang.org/genproto v0.0.0-20210805201207-89edb61ffb67/go.mod h1:ob2IJxKr google.golang.org/genproto v0.0.0-20210813162853-db860fec028c/go.mod h1:cFeNkxwySK631ADgubI+/XFU/xp8FD5KIVV4rj8UC5w= google.golang.org/genproto v0.0.0-20210821163610-241b8fcbd6c8/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20210825212027-de86158e7fda/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= +google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71 h1:z+ErRPu0+KS02Td3fOAgdX+lnPDh/VyaABEJPD4JRQs= google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= -google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2 h1:NHN4wOCScVzKhPenJ2dt+BTs3X/XkBVI/Rh4iDt55T8= -google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= diff --git a/lib/bloomfilter/limiter.go b/lib/bloomfilter/limiter.go index 152d999b0..1d047414b 100644 --- a/lib/bloomfilter/limiter.go +++ b/lib/bloomfilter/limiter.go @@ -1,6 +1,7 @@ package bloomfilter import ( + "sync" "sync/atomic" "time" ) @@ -11,23 +12,42 @@ import ( type Limiter struct { maxItems int v atomic.Value + + wg sync.WaitGroup + stopCh chan struct{} } // NewLimiter creates new Limiter, which can hold up to maxItems unique items during the given refreshInterval. func NewLimiter(maxItems int, refreshInterval time.Duration) *Limiter { l := &Limiter{ maxItems: maxItems, + stopCh: make(chan struct{}), } l.v.Store(newLimiter(maxItems)) + l.wg.Add(1) go func() { + defer l.wg.Done() + t := time.NewTicker(refreshInterval) + defer t.Stop() for { - time.Sleep(refreshInterval) - l.v.Store(newLimiter(maxItems)) + select { + case <-t.C: + l.v.Store(newLimiter(maxItems)) + case <-l.stopCh: + return + } } }() return l } +// MustStop stops the given limiter. +// It is expected that nobody access the limiter at MustStop call. +func (l *Limiter) MustStop() { + close(l.stopCh) + l.wg.Wait() +} + // MaxItems returns the maxItems passed to NewLimiter. func (l *Limiter) MaxItems() int { return l.maxItems diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 80c59ad40..1210485d1 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -151,6 +151,7 @@ type ScrapeConfig struct { StreamParse bool `yaml:"stream_parse,omitempty"` ScrapeAlignInterval time.Duration `yaml:"scrape_align_interval,omitempty"` ScrapeOffset time.Duration `yaml:"scrape_offset,omitempty"` + SeriesLimit int `yaml:"series_limit,omitempty"` ProxyClientConfig promauth.ProxyClientConfig `yaml:",inline"` // This is set in loadConfig @@ -773,6 +774,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf streamParse: sc.StreamParse, scrapeAlignInterval: sc.ScrapeAlignInterval, scrapeOffset: sc.ScrapeOffset, + seriesLimit: sc.SeriesLimit, } return swc, nil } @@ -799,6 +801,7 @@ type scrapeWorkConfig struct { streamParse bool scrapeAlignInterval time.Duration scrapeOffset time.Duration + seriesLimit int } type targetLabelsGetter interface { @@ -1066,6 +1069,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel StreamParse: swc.streamParse, ScrapeAlignInterval: swc.scrapeAlignInterval, ScrapeOffset: swc.scrapeOffset, + SeriesLimit: swc.seriesLimit, jobNameOriginal: swc.jobName, } diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go index 5ce96b4fd..00a732e4a 100644 --- a/lib/promscrape/config_test.go +++ b/lib/promscrape/config_test.go @@ -1344,6 +1344,7 @@ scrape_configs: stream_parse: true scrape_align_interval: 1s scrape_offset: 0.5s + series_limit: 123 static_configs: - targets: - 192.168.1.2 # SNMP device. @@ -1400,6 +1401,7 @@ scrape_configs: StreamParse: true, ScrapeAlignInterval: time.Second, ScrapeOffset: 500 * time.Millisecond, + SeriesLimit: 123, jobNameOriginal: "snmp", }, }) diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index a62459c5f..8e0a1fe8a 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -9,6 +9,7 @@ import ( "sync" "time" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/leveledbytebufferpool" @@ -26,7 +27,8 @@ import ( var ( suppressScrapeErrors = flag.Bool("promscrape.suppressScrapeErrors", false, "Whether to suppress scrape errors logging. "+ "The last error for each target is always available at '/targets' page even if scrape errors logging is suppressed") - noStaleMarkers = flag.Bool("promscrape.noStaleMarkers", false, "Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode") + noStaleMarkers = flag.Bool("promscrape.noStaleMarkers", false, "Whether to disable seding Prometheus stale markers for metrics when scrape target disappears. This option may reduce memory usage if stale markers aren't needed for your setup. See also https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode") + seriesLimitPerTarget = flag.Int("promscrape.seriesLimitPerTarget", 0, "Optional limit on the number of unique time series a single scrape target can expose. See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter for more info") ) // ScrapeWork represents a unit of work for scraping Prometheus metrics. @@ -103,6 +105,9 @@ type ScrapeWork struct { // The offset for the first scrape. ScrapeOffset time.Duration + // Optional limit on the number of unique series the scrape target can expose. + SeriesLimit int + // The original 'job_name' jobNameOriginal string } @@ -114,11 +119,11 @@ func (sw *ScrapeWork) key() string { // Do not take into account OriginalLabels. key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, DenyRedirects=%v, Labels=%s, "+ "ProxyURL=%s, ProxyAuthConfig=%s, AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, "+ - "ScrapeAlignInterval=%s, ScrapeOffset=%s", + "ScrapeAlignInterval=%s, ScrapeOffset=%s, SeriesLimit=%d", sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.DenyRedirects, sw.LabelsString(), sw.ProxyURL.String(), sw.ProxyAuthConfig.String(), sw.AuthConfig.String(), sw.MetricRelabelConfigs.String(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse, - sw.ScrapeAlignInterval, sw.ScrapeOffset) + sw.ScrapeAlignInterval, sw.ScrapeOffset, sw.SeriesLimit) return key } @@ -178,6 +183,9 @@ type scrapeWork struct { seriesAdded int labelsHashBuf []byte + // Optional limiter on the number of unique series per scrape target. + seriesLimiter *bloomfilter.Limiter + // prevBodyLen contains the previous response body length for the given scrape work. // It is used as a hint in order to reduce memory usage for body buffers. prevBodyLen int @@ -241,6 +249,9 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}) { case <-stopCh: t := time.Now().UnixNano() / 1e6 sw.sendStaleMarkersForLastScrape(t, true) + if sw.seriesLimiter != nil { + sw.seriesLimiter.MustStop() + } return case tt := <-ticker.C: t := tt.UnixNano() / 1e6 @@ -481,13 +492,31 @@ func (sw *scrapeWork) updateSeriesAdded(wc *writeRequestCtx) { sw.seriesMap = make(map[uint64]struct{}, len(wc.writeRequest.Timeseries)) } m := sw.seriesMap + seriesLimit := *seriesLimitPerTarget + if sw.Config.SeriesLimit > 0 { + seriesLimit = sw.Config.SeriesLimit + } + if sw.seriesLimiter == nil && seriesLimit > 0 { + sw.seriesLimiter = bloomfilter.NewLimiter(seriesLimit, 24*time.Hour) + } + hsl := sw.seriesLimiter + dstSeries := wc.writeRequest.Timeseries[:0] for _, ts := range wc.writeRequest.Timeseries { h := sw.getLabelsHash(ts.Labels) + if hsl != nil && !hsl.Add(h) { + // The limit on the number of hourly unique series per scrape target has been exceeded. + // Drop the metric. + metrics.GetOrCreateCounter(fmt.Sprintf(`promscrape_series_limit_rows_dropped_total{job=%q,target=%q}`, + sw.Config.jobNameOriginal, sw.Config.ScrapeURL)).Inc() + continue + } + dstSeries = append(dstSeries, ts) if _, ok := m[h]; !ok { m[h] = struct{}{} sw.seriesAdded++ } } + wc.writeRequest.Timeseries = dstSeries } func (sw *scrapeWork) updateLastScrape(response string) { diff --git a/lib/promscrape/scrapework_test.go b/lib/promscrape/scrapework_test.go index c98bb0fb9..b19093c7d 100644 --- a/lib/promscrape/scrapework_test.go +++ b/lib/promscrape/scrapework_test.go @@ -333,6 +333,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) { `, &ScrapeWork{ HonorLabels: true, SampleLimit: 1, + SeriesLimit: 123, }, ` up 0 123 scrape_samples_scraped 2 123 diff --git a/lib/storage/storage.go b/lib/storage/storage.go index f6829cee3..dd4d70ed2 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -699,6 +699,14 @@ func (s *Storage) MustClose() { if err := s.flockF.Close(); err != nil { logger.Panicf("FATAL: cannot close lock file %q: %s", s.flockF.Name(), err) } + + // Stop series limiters. + if sl := s.hourlySeriesLimiter; sl != nil { + sl.MustStop() + } + if sl := s.dailySeriesLimiter; sl != nil { + sl.MustStop() + } } func (s *Storage) mustLoadNextDayMetricIDs(date uint64) *byDateMetricIDEntry {