From b33b620af66f1199b93dc63324b3a32f0a16c402 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 6 Mar 2024 13:25:49 +0200 Subject: [PATCH] app/vmselect/prometheus: do not drop `match[]` filters if `-search.ignoreExtraFiltersAtLabelsAPI` flag is set The `match[]` filter is mandatory at /api/v1/series, so it mustn't be dropped here. There is no sense in dropping `match[]` filter together with `extra_label` and `extra_filters[]` at /api/v1/labels and /api/v1/label/.../values if -search.ignoreExtraFiltersAtLabelsAPI commnad-line flag is set, since: - the `match[]` filter triggers slow path at these APIs; - the `extra_label` and `extra_filters[]` filters narrow down the number of matched time series, so they improve performance comparing to the case when only `match[]` filter is left, while `extra_label` and `extra_filters[]` filters are dropped. This is a follow-up for 0b7a23a91d5f29535c3c485f0c1d92f87ae91fef --- README.md | 15 ++++++++------- app/vmselect/prometheus/prometheus.go | 17 ++++++++++------- docs/CHANGELOG.md | 2 ++ docs/Cluster-VictoriaMetrics.md | 14 +++++++------- docs/url-examples.md | 19 +++++++++---------- 5 files changed, 36 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 266d1d3ad..f84c60268 100644 --- a/README.md +++ b/README.md @@ -1675,8 +1675,14 @@ By default, VictoriaMetrics is tuned for an optimal resource usage under typical This allows saving CPU and RAM when executing unexpected heavy queries. - `-search.maxConcurrentRequests` limits the number of concurrent requests VictoriaMetrics can process. Bigger number of concurrent requests usually means bigger memory usage. For example, if a single query needs 100 MiB of additional memory during its execution, then 100 concurrent queries may need `100 * 100 MiB = 10 GiB` - of additional memory. So it is better to limit the number of concurrent queries, while suspending additional incoming queries if the concurrency limit is reached. - VictoriaMetrics provides `-search.maxQueueDuration` command-line flag for limiting the max wait time for suspended queries. See also `-search.maxMemoryPerQuery` command-line flag. + of additional memory. So it is better to limit the number of concurrent queries, while pausing additional incoming queries if the concurrency limit is reached. + VictoriaMetrics provides `-search.maxQueueDuration` command-line flag for limiting the max wait time for paused queries. See also `-search.maxMemoryPerQuery` command-line flag. +- `-search.maxQueueDuration` limits the maximum duration queries may wait for execution when `-search.maxConcurrentRequests` concurrent queries are executed. +- `-search.ignoreExtraFiltersAtLabelsAPI` enables ignoring of `match[]`, [`extra_filters[]` and `extra_label`](https://docs.victoriametrics.com/#prometheus-querying-api-enhancements) + query args at [/api/v1/labels](https://docs.victoriametrics.com/url-examples/#apiv1labels) and + [/api/v1/label/.../values](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues). + This may be useful for reducing the load on VictoriaMetrics if the provided extra filters match too many time series. + The downside is that the endpoints can return labels and series, which do not match the provided extra filters. - `-search.maxSamplesPerSeries` limits the number of raw samples the query can process per each time series. VictoriaMetrics sequentially processes raw samples per each found time series during the query. It unpacks raw samples on the selected time range per each time series into memory and then applies the given [rollup function](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions). The `-search.maxSamplesPerSeries` command-line flag @@ -1719,11 +1725,6 @@ By default, VictoriaMetrics is tuned for an optimal resource usage under typical when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxLabelsAPIDuration` to quite low value in order to limit CPU and memory usage. See also `-search.maxLabelsAPISeries` and `-search.ignoreExtraFiltersAtLabelsAPI`. -- `-search.ignoreExtraFiltersAtLabelsAPI` enables ignoring of `match[]`, [`extra_filters[]` and `extra_label`](https://docs.victoriametrics.com/#prometheus-querying-api-enhancements) - query args at [/api/v1/labels](https://docs.victoriametrics.com/url-examples/#apiv1labels) and - [/api/v1/label/.../values](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues). - This may be useful for reducing the load on VictoriaMetrics if the provided extra filters match too many time series. - The downside is that the endpoints can return labels and series, which do not match the provided extra filters. - `-search.maxTagValueSuffixesPerSearch` limits the number of entries, which may be returned from `/metrics/find` endpoint. See [Graphite Metrics API usage docs](#graphite-metrics-api-usage). See also [resource usage limits at VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#resource-usage-limits), diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index a897bdf7b..f74ebcc1e 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -1181,18 +1181,21 @@ func getCommonParamsInternal(r *http.Request, startTime time.Time, requireNonEmp if requireNonEmptyMatch && len(matches) == 0 { return nil, fmt.Errorf("missing `match[]` arg") } + filterss, err := getTagFilterssFromMatches(matches) + if err != nil { + return nil, err + } - var filterss [][]storage.TagFilter - if !isLabelsAPI || !*ignoreExtraFiltersAtLabelsAPI { - tagFilterss, err := getTagFilterssFromMatches(matches) - if err != nil { - return nil, err - } + if len(filterss) > 0 || !isLabelsAPI || !*ignoreExtraFiltersAtLabelsAPI { + // If matches isn't empty, then there is no sense in ignoring extra filters + // even if ignoreExtraLabelsAtLabelsAPI is set, since extra filters won't slow down + // the query - they can only improve query performance by reducing the number + // of matching series at the storage level. etfs, err := searchutils.GetExtraTagFilters(r) if err != nil { return nil, err } - filterss = searchutils.JoinTagFilterss(tagFilterss, etfs) + filterss = searchutils.JoinTagFilterss(filterss, etfs) } cp := &commonParams{ diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 3b0d45570..2ae996c64 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -41,6 +41,8 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): expose `vm_streamaggr_flush_timeouts_total` and `vm_streamaggr_dedup_flush_timeouts_total` [counters](https://docs.victoriametrics.com/keyconcepts/#counter) at [`/metrics` page](https://docs.victoriametrics.com/#monitoring), which can be used for detecting flush timeouts for stream aggregation states. Expose also `vm_streamaggr_flush_duration_seconds` and `vm_streamaggr_dedup_flush_duration_seconds` [histograms](https://docs.victoriametrics.com/keyconcepts/#histogram) for monitoring the real flush durations of stream aggregation states. * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): improve trace display for better visual separation of branches. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5926). +* BUGFIX: do not drop `match[]` filter at [`/api/v1/series`](https://docs.victoriametrics.com/url-examples/#apiv1series) if `-search.ignoreExtraFiltersAtLabelsAPI` command-line flag is set, since this broke the `/api/v1/series` requests. + ## [v1.99.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.99.0) Released at 2024-03-01 diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index f739a0229..737b25640 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -658,11 +658,16 @@ Some workloads may need fine-grained resource usage limits. In these cases the f Bigger number of concurrent requests usually require bigger amounts of memory at both `vmselect` and `vmstorage`. For example, if a single query needs 100 MiB of additional memory during its execution, then 100 concurrent queries may need `100 * 100 MiB = 10 GiB` of additional memory. So it is better to limit the number of concurrent queries, - while suspending additional incoming queries if the concurrency limit is reached. - `vmselect` and `vmstorage` provides `-search.maxQueueDuration` command-line flag for limiting the maximum wait time for suspended queries. + while pausing additional incoming queries if the concurrency limit is reached. + `vmselect` and `vmstorage` provides `-search.maxQueueDuration` command-line flag for limiting the maximum wait time for paused queries. See also `-search.maxMemoryPerQuery` command-line flag at `vmselect`. - `-search.maxQueueDuration` at `vmselect` and `vmstorage` limits the maximum duration queries may wait for execution when `-search.maxConcurrentRequests` concurrent queries are executed. +- `-search.ignoreExtraFiltersAtLabelsAPI` at `vmselect` enables ignoring of `match[]`, [`extra_filters[]` and `extra_label`](https://docs.victoriametrics.com/#prometheus-querying-api-enhancements) + query args at [/api/v1/labels](https://docs.victoriametrics.com/url-examples/#apiv1labels) and + [/api/v1/label/.../values](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues). + This may be useful for reducing the load on `vmstorage` if the provided extra filters match too many time series. + The downside is that the endpoints can return labels and series, which do not match the provided extra filters. - `-search.maxSamplesPerSeries` at `vmselect` limits the number of raw samples the query can process per each time series. `vmselect` processes raw samples sequentially per each found time series during the query. It unpacks raw samples on the selected time range per each time series into memory and then applies the given [rollup function](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions). @@ -709,11 +714,6 @@ Some workloads may need fine-grained resource usage limits. In these cases the f when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxLabelsAPIDuration` to quite low value in order to limit CPU and memory usage. See also `-search.maxLabelsAPISeries` and `-search.ignoreExtraFiltersAtLabelsAPI`. -- `-search.ignoreExtraFiltersAtLabelsAPI` at `vmselect` enables ignoring of `match[]`, [`extra_filters[]` and `extra_label`](https://docs.victoriametrics.com/#prometheus-querying-api-enhancements) - query args at [/api/v1/labels](https://docs.victoriametrics.com/url-examples/#apiv1labels) and - [/api/v1/label/.../values](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues). - This may be useful for reducing the load on `vmstorage` if the provided extra filters match too many time series. - The downside is that the endpoints can return labels and series, which do not match the provided extra filters. - `-storage.maxDailySeries` at `vmstorage` can be used for limiting the number of time series seen per day aka [time series churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). See [cardinality limiter docs](#cardinality-limiter). - `-storage.maxHourlySeries` at `vmstorage` can be used for limiting the number of [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series). diff --git a/docs/url-examples.md b/docs/url-examples.md index ccf5521f2..788deb14f 100644 --- a/docs/url-examples.md +++ b/docs/url-examples.md @@ -261,9 +261,9 @@ Cluster version of VictoriaMetrics: curl http://:8481/select/0/prometheus/api/v1/labels ``` - -By default, VictoriaMetrics returns labels seen during the last day starting at 00:00 UTC. An arbitrary time range can be set via [`start` and `end` query args](https://docs.victoriametrics.com/#timestamp-formats). -The specified `start..end` time range is rounded to day granularity because of performance optimization concerns. +By default, VictoriaMetrics returns labels seen during the last day starting at 00:00 UTC because of performance reasons. +An arbitrary time range can be set via [`start` and `end` query args](https://docs.victoriametrics.com/#timestamp-formats). +The specified `start..end` time range is rounded to UTC day granularity because of performance reasons. Additional information: * [Getting label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names) @@ -280,16 +280,15 @@ Single-node VictoriaMetrics: curl http://localhost:8428/prometheus/api/v1/label/job/values ``` - Cluster version of VictoriaMetrics: ```sh curl http://:8481/select/0/prometheus/api/v1/label/job/values ``` - -By default, VictoriaMetrics returns labels values seen during the last day starting at 00:00 UTC. An arbitrary time range can be set via `start` and `end` query args. -The specified `start..end` time range is rounded to day granularity because of performance optimization concerns. +By default, VictoriaMetrics returns labels values seen during the last day starting at 00:00 UTC because of performance reasons. +An arbitrary time range can be set via `start` and `end` query args. +The specified `start..end` time range is rounded to UTC day granularity because of performance reasons. Additional information: * [Querying label values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values) @@ -361,9 +360,9 @@ Cluster version of VictoriaMetrics: curl http://:8481/select/0/prometheus/api/v1/series -d 'match[]=vm_http_request_errors_total' ``` - -By default, VictoriaMetrics returns time series seen during the last day starting at 00:00 UTC. An arbitrary time range can be set via `start` and `end` query args. -The specified `start..end` time range is rounded to day granularity because of performance optimization concerns. +By default, VictoriaMetrics returns time series seen during the last day starting at 00:00 UTC because of performance reasons. +An arbitrary time range can be set via `start` and `end` query args. +The specified `start..end` time range is rounded to UTC day granularity because of performance reasons. Additional information: * [Finding series by label matchers](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)