From 6a1e0692f67aa0c9f4f6867a64ebbf72292d7a6b Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 22 Jun 2022 13:42:07 +0300 Subject: [PATCH 1/5] docs/Cluster-VictoriaMetrics.md: small fixes --- docs/Cluster-VictoriaMetrics.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index 950ab50e3..3e86c1bbf 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -273,7 +273,7 @@ Cluster performance and capacity can be scaled up in two ways: General recommendations for cluster scalability: -- Adding more CPU and RAM to existing `vmselect` nodes improves the performance for heavy queries, which process big number of time series with big number of raw samples. +- Adding more CPU and RAM to existing `vmselect` nodes improves the performance for heavy queries, which process big number of time series with big number of raw samples. See [this article on how to detect and optimize heavy queries](https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986). - Adding more `vmstorage` nodes increases the number of [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series) the cluster can handle. This also increases query performance over time series with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). The cluster stability is also improved with the number of `vmstorage` nodes, since active `vmstorage` nodes need to handle lower additional workload when some of `vmstorage` nodes become unavailable. - Adding more CPU and RAM to existing `vmstorage` nodes increases the number of [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series) the cluster can handle. It is preferred to add more `vmstorage` nodes over adding more CPU and RAM to existing `vmstorage` nodes, since higher number of `vmstorage` nodes increases cluster stability and improves query performance over time series with [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). - Adding more `vminsert` nodes increases the maximum possible data ingestion speed, since the ingested data may be split among bigger number of `vminsert` nodes. @@ -294,8 +294,6 @@ with new configs. Cluster should remain in working state if at least a single node of each type remains available during the update process. See [cluster availability](#cluster-availability) section for details. -See also more advanced [cardinality limiter in vmagent](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). - ## Cluster availability - HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes. @@ -348,7 +346,7 @@ By default cluster components of VictoriaMetrics are tuned for an optimal resour - `-search.maxTagKeys` at `vmselect` limits the number of items, which may be returned from [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names). This endpoint is used mostly by Grafana for auto-completion of label names. Queries to this endpoint may take big amounts of CPU time and memory at `vmstorage` and `vmselect` when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagKeys` to quite low value in order to limit CPU and memory usage. - `-search.maxTagValues` at `vmselect` limits the number of items, which may be returned from [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values). This endpoint is used mostly by Grafana for auto-completion of label values. Queries to this endpoint may take big amounts of CPU time and memory at `vmstorage` and `vmselect` when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagValues` to quite low value in order to limit CPU and memory usage. -See also [capacity planning docs](#capacity-planning). +See also [capacity planning docs](#capacity-planning) and [cardinality limiter in vmagent](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). ## High availability @@ -398,7 +396,7 @@ When the replication is enabled, `-dedup.minScrapeInterval=1ms` command-line fla Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883), so it is recommended performing regular backups. See [these docs](#backups) for details. -Note that the replication increases resource usage - CPU, RAM, disk space, network bandwidth - by up to `-replicationFactor=N` times, because `vminsert` stores `N` copies of incoming data to distinct `vmstorage` nodes and `vmselect` needs to de-duplicate the replicated data obtained from `vmstorage` nodes during querying. So it is more cost-effective to offload the replication to underlying replicated durable storage pointed by `-storageDataPath` such as [Google Compute Engine persistent disk](https://cloud.google.com/compute/docs/disks/#pdspecs), which is protected from data loss and data corruption. It also provide consistently high performance and [may be resized](https://cloud.google.com/compute/docs/disks/add-persistent-disk) without downtime. HDD-based persistent disks should be enough for the majority of use cases. It is recommended using durable replicated persistent volumes in Kubernetes. +Note that the replication increases resource usage - CPU, RAM, disk space, network bandwidth - by up to `-replicationFactor=N` times, because `vminsert` stores `N` copies of incoming data to distinct `vmstorage` nodes and `vmselect` needs to de-duplicate the replicated data obtained from `vmstorage` nodes during querying. So it is more cost-effective to offload the replication to underlying replicated durable storage pointed by `-storageDataPath` such as [Google Compute Engine persistent disk](https://cloud.google.com/compute/docs/disks/#pdspecs), which is protected from data loss and data corruption. It also provides consistently high performance and [may be resized](https://cloud.google.com/compute/docs/disks/add-persistent-disk) without downtime. HDD-based persistent disks should be enough for the majority of use cases. It is recommended using durable replicated persistent volumes in Kubernetes. ## Deduplication From 51362f9333f77c194b8a04ac3d6cae281d77d67b Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 22 Jun 2022 14:17:02 +0300 Subject: [PATCH 2/5] app/vmselect: add `-search.setLookbackToStep` command-line flag for making the gap filling algorithm similar to InfluxDB data model This option should override `-search.maxStalenessInterval` for most cases when users migrate from InfluxDB to VictoriaMetrics --- README.md | 8 +++++--- app/vmselect/prometheus/prometheus.go | 18 ++++++++++++++++-- docs/CHANGELOG.md | 2 ++ docs/Cluster-VictoriaMetrics.md | 4 +++- docs/README.md | 8 +++++--- docs/Single-server-VictoriaMetrics.md | 8 +++++--- docs/guides/migrate-from-influx.md | 6 ++++-- 7 files changed, 40 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 3e8735138..1d507f477 100644 --- a/README.md +++ b/README.md @@ -1599,8 +1599,8 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet If the gaps are related to irregular intervals between samples, then try adjusting `-search.minStalenessInterval` command-line flag to value close to the maximum interval between samples. -* If you are switching from InfluxDB or TimescaleDB, then take a look at `-search.maxStalenessInterval` command-line flag. - It may be needed in order to suppress default gap filling algorithm used by VictoriaMetrics - by default it assumes +* If you are switching from InfluxDB or TimescaleDB, then it may be needed to set `-search.setLookbackToStep` command-line flag. + This suppresses default gap filling algorithm used by VictoriaMetrics - by default it assumes each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals. * Metrics and labels leading to [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) or [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) can be determined via [cardinality explorer](#cardinality-explorer) and via [/api/v1/status/tsdb](#tsdb-stats) endpoint. @@ -2108,7 +2108,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li -search.maxSeries int The maximum number of time series, which can be returned from /api/v1/series. This option allows limiting memory usage (default 100000) -search.maxStalenessInterval duration - The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.maxLookback' flag, which has the same meaning due to historical reasons + The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.setLookbackToStep' flag -search.maxStatusRequestDuration duration The maximum duration for /api/v1/status/* requests (default 5m0s) -search.maxStepForPointsAdjustment duration @@ -2133,6 +2133,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats (default 1ms) -search.resetCacheAuthKey string Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call + -search.setLookbackToStep + Whether to fix lookback interval to 'step' query arg value. If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored -search.treatDotsAsIsInRegexps Whether to treat dots as is in regexp label filters used in queries. For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter -selfScrapeInstance string diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index 1c86588d9..b30bdc304 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -38,7 +38,9 @@ var ( maxStalenessInterval = flag.Duration("search.maxStalenessInterval", 0, "The maximum interval for staleness calculations. "+ "By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning "+ "Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. "+ - "See also '-search.maxLookback' flag, which has the same meaning due to historical reasons") + "See also '-search.setLookbackToStep' flag") + setLookbackToStep = flag.Bool("search.setLookbackToStep", false, "Whether to fix lookback interval to 'step' query arg value. "+ + "If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored") maxStepForPointsAdjustment = flag.Duration("search.maxStepForPointsAdjustment", time.Minute, "The maximum step when /api/v1/query_range handler adjusts "+ "points with timestamps closer than -search.latencyOffset to the current time. The adjustment is needed because such points may contain incomplete data") @@ -981,7 +983,19 @@ func getMaxLookback(r *http.Request) (int64, error) { if d == 0 { d = maxStalenessInterval.Milliseconds() } - return searchutils.GetDuration(r, "max_lookback", d) + maxLookback, err := searchutils.GetDuration(r, "max_lookback", d) + if err != nil { + return 0, err + } + d = maxLookback + if *setLookbackToStep { + step, err := searchutils.GetDuration(r, "step", d) + if err != nil { + return 0, err + } + d = step + } + return d, nil } func getTagFilterssFromMatches(matches []string) ([][]storage.TagFilter, error) { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 4266addda..f0ff8d37e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,8 @@ The following tip changes can be tested by building VictoriaMetrics components f ## tip +* FEATURE: add `-search.setLookbackToStep` command-line flag, which enables InfluxDB-like gap filling during querying. See [these docs](https://docs.victoriametrics.com/guides/migrate-from-influx.html) for details. + ## [v1.78.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.78.0) diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index 3e86c1bbf..f50323ae2 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -774,7 +774,7 @@ Below is the output for `/path/to/vmselect -help`: -search.maxSeries int The maximum number of time series, which can be returned from /api/v1/series. This option allows limiting memory usage (default 100000) -search.maxStalenessInterval duration - The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.maxLookback' flag, which has the same meaning due to historical reasons + The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.setLookbackToStep' flag -search.maxStatusRequestDuration duration The maximum duration for /api/v1/status/* requests (default 5m0s) -search.maxStepForPointsAdjustment duration @@ -793,6 +793,8 @@ Below is the output for `/path/to/vmselect -help`: The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats (default 1ms) -search.resetCacheAuthKey string Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call + -search.setLookbackToStep + Whether to fix lookback interval to 'step' query arg value. If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored -search.treatDotsAsIsInRegexps Whether to treat dots as is in regexp label filters used in queries. For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter -selectNode array diff --git a/docs/README.md b/docs/README.md index 3e8735138..1d507f477 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1599,8 +1599,8 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet If the gaps are related to irregular intervals between samples, then try adjusting `-search.minStalenessInterval` command-line flag to value close to the maximum interval between samples. -* If you are switching from InfluxDB or TimescaleDB, then take a look at `-search.maxStalenessInterval` command-line flag. - It may be needed in order to suppress default gap filling algorithm used by VictoriaMetrics - by default it assumes +* If you are switching from InfluxDB or TimescaleDB, then it may be needed to set `-search.setLookbackToStep` command-line flag. + This suppresses default gap filling algorithm used by VictoriaMetrics - by default it assumes each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals. * Metrics and labels leading to [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) or [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) can be determined via [cardinality explorer](#cardinality-explorer) and via [/api/v1/status/tsdb](#tsdb-stats) endpoint. @@ -2108,7 +2108,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li -search.maxSeries int The maximum number of time series, which can be returned from /api/v1/series. This option allows limiting memory usage (default 100000) -search.maxStalenessInterval duration - The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.maxLookback' flag, which has the same meaning due to historical reasons + The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.setLookbackToStep' flag -search.maxStatusRequestDuration duration The maximum duration for /api/v1/status/* requests (default 5m0s) -search.maxStepForPointsAdjustment duration @@ -2133,6 +2133,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats (default 1ms) -search.resetCacheAuthKey string Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call + -search.setLookbackToStep + Whether to fix lookback interval to 'step' query arg value. If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored -search.treatDotsAsIsInRegexps Whether to treat dots as is in regexp label filters used in queries. For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter -selfScrapeInstance string diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 882bf2029..eb302a089 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1603,8 +1603,8 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet If the gaps are related to irregular intervals between samples, then try adjusting `-search.minStalenessInterval` command-line flag to value close to the maximum interval between samples. -* If you are switching from InfluxDB or TimescaleDB, then take a look at `-search.maxStalenessInterval` command-line flag. - It may be needed in order to suppress default gap filling algorithm used by VictoriaMetrics - by default it assumes +* If you are switching from InfluxDB or TimescaleDB, then it may be needed to set `-search.setLookbackToStep` command-line flag. + This suppresses default gap filling algorithm used by VictoriaMetrics - by default it assumes each time series is continuous instead of discrete, so it fills gaps between real samples with regular intervals. * Metrics and labels leading to [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality) or [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) can be determined via [cardinality explorer](#cardinality-explorer) and via [/api/v1/status/tsdb](#tsdb-stats) endpoint. @@ -2112,7 +2112,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li -search.maxSeries int The maximum number of time series, which can be returned from /api/v1/series. This option allows limiting memory usage (default 100000) -search.maxStalenessInterval duration - The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.maxLookback' flag, which has the same meaning due to historical reasons + The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.setLookbackToStep' flag -search.maxStatusRequestDuration duration The maximum duration for /api/v1/status/* requests (default 5m0s) -search.maxStepForPointsAdjustment duration @@ -2137,6 +2137,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats (default 1ms) -search.resetCacheAuthKey string Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call + -search.setLookbackToStep + Whether to fix lookback interval to 'step' query arg value. If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored -search.treatDotsAsIsInRegexps Whether to treat dots as is in regexp label filters used in queries. For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter -selfScrapeInstance string diff --git a/docs/guides/migrate-from-influx.md b/docs/guides/migrate-from-influx.md index a4965f91f..ccd3ba656 100644 --- a/docs/guides/migrate-from-influx.md +++ b/docs/guides/migrate-from-influx.md @@ -202,8 +202,10 @@ detail [here](https://docs.victoriametrics.com/keyConcepts.html#range-query). In behavior by adding `fill(previous)` to the query. VictoriaMetrics fills the gaps on the graph assuming time series are always continious and not discrete. -To limit the interval on which VictoriaMetrics will try to fill the gaps try setting `-search.maxStalenessInterval` -command-line flag to the value equal to actual resolution between data points (for example, to `10s`). +To limit the interval on which VictoriaMetrics will try to fill the gaps, set `-search.setLookbackToStep` +command-line flag. This limits the gap filling to a single `step` interval passed to +[/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries). +This behavior is close to InfluxDB data model. ### Advanced usage From 1c4f67c5d2483d8d2b059b3768b7de68a048a849 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 22 Jun 2022 20:38:43 +0300 Subject: [PATCH 3/5] lib/promauth: add ability to send additional http headers in requests to scrape targets This solves https://stackoverflow.com/questions/66032498/prometheus-scrape-metric-with-custom-header --- README.md | 5 +- app/vmagent/README.md | 18 +++- app/vmagent/remotewrite/client.go | 6 +- app/vmalert/datasource/vm.go | 4 +- app/vmalert/datasource/vm_test.go | 6 +- app/vmalert/notifier/alertmanager.go | 4 +- app/vmalert/remotewrite/remotewrite.go | 4 +- docs/CHANGELOG.md | 10 ++ docs/README.md | 5 +- docs/Single-server-VictoriaMetrics.md | 5 +- docs/vmagent.md | 18 +++- lib/promauth/config.go | 92 ++++++++++++++++-- lib/promauth/config_test.go | 93 ++++++++++++++++++- lib/promscrape/client.go | 38 ++++---- lib/promscrape/config_test.go | 24 ++++- lib/promscrape/discovery/kubernetes/api.go | 7 +- .../discovery/kubernetes/api_watcher.go | 14 ++- lib/promscrape/discovery/openstack/api.go | 2 +- lib/promscrape/discoveryutils/client.go | 40 ++++---- lib/proxy/proxy.go | 26 +++++- 20 files changed, 329 insertions(+), 92 deletions(-) diff --git a/README.md b/README.md index 1d507f477..d8be3e812 100644 --- a/README.md +++ b/README.md @@ -1134,6 +1134,8 @@ to a file containing a list of [relabel_config](https://prometheus.io/docs/prome The `-relabelConfig` also can point to http or https url. For example, `-relabelConfig=https://config-server/relabel_config.yml`. See [this article with relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2). +The `-relabelConfig` files can contain special placeholders in the form `%{ENV_VAR}`, which are replaced by the corresponding environment variable values. + Example contents for `-relabelConfig` file: ```yml @@ -1147,8 +1149,7 @@ Example contents for `-relabelConfig` file: regex: true ``` -VictoriaMetrics components provide additional relabeling features such as Graphite-style relabeling. -See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details. +VictoriaMetrics provides additional relabeling features such as Graphite-style relabeling. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details. ## Federation diff --git a/app/vmagent/README.md b/app/vmagent/README.md index da787f5cc..6ce496659 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -183,6 +183,16 @@ Please file feature requests to [our issue tracker](https://github.com/VictoriaM `vmagent` also support the following additional options in `scrape_configs` section: +* `headers` - a list of HTTP headers to send to scrape target with each scrape request. This can be used when the scrape target needs custom authorization and authentication. For example: + +```yaml +scrape_configs: +- job_name: custom_headers + headers: + - "TenantID: abc" + - "My-Auth: TopSecret" +``` + * `disable_compression: true` - to disable response compression on a per-job basis. By default `vmagent` requests compressed responses from scrape targets to save network bandwidth. * `disable_keepalive: true` - to disable [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis. @@ -297,6 +307,8 @@ The relabeling can be defined in the following places: * At the `-remoteWrite.relabelConfig` file. This relabeling is applied to all the collected metrics before sending them to remote storage. This relabeling can be debugged by passing `-remoteWrite.relabelDebug` command-line option to `vmagent`. In this case `vmagent` logs metrics before and after the relabeling and then drops all the logged metrics instead of sending them to remote storage. * At the `-remoteWrite.urlRelabelConfig` files. This relabeling is applied to metrics before sending them to the corresponding `-remoteWrite.url`. This relabeling can be debugged by passing `-remoteWrite.urlRelabelDebug` command-line options to `vmagent`. In this case `vmagent` logs metrics before and after the relabeling and then drops all the logged metrics instead of sending them to the corresponding `-remoteWrite.url`. +All the files with relabeling configs can contain special placeholders in the form `%{ENV_VAR}`, which are replaced by the corresponding environment variable values. + You can read more about relabeling in the following articles: * [How to use Relabeling in Prometheus and VictoriaMetrics](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2) @@ -424,9 +436,11 @@ scrape_configs: Proxy can be configured with the following optional settings: * `proxy_authorization` for generic token authorization. See [Prometheus docs for details on authorization section](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) -* `proxy_bearer_token` and `proxy_bearer_token_file` for Bearer token authorization * `proxy_basic_auth` for Basic authorization. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). +* `proxy_bearer_token` and `proxy_bearer_token_file` for Bearer token authorization +* `proxy_oauth2` for OAuth2 config. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#oauth2). * `proxy_tls_config` for TLS config. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config). +* `proxy_headers` for passing additional HTTP headers in requests to proxy. For example: @@ -443,6 +457,8 @@ scrape_configs: key_file: /path/to/key ca_file: /path/to/ca server_name: real-server-name + proxy_headers: + - "Proxy-Auth: top-secret" ``` ## Cardinality limiter diff --git a/app/vmagent/remotewrite/client.go b/app/vmagent/remotewrite/client.go index aa5f98c79..da6a8960b 100644 --- a/app/vmagent/remotewrite/client.go +++ b/app/vmagent/remotewrite/client.go @@ -219,7 +219,7 @@ func getAuthConfig(argIdx int) (*promauth.Config, error) { InsecureSkipVerify: tlsInsecureSkipVerify.GetOptionalArg(argIdx), } - authCfg, err := promauth.NewConfig(".", nil, basicAuthCfg, token, tokenFile, oauth2Cfg, tlsCfg) + authCfg, err := promauth.NewConfig(".", nil, basicAuthCfg, token, tokenFile, oauth2Cfg, tlsCfg, nil) if err != nil { return nil, fmt.Errorf("cannot populate OAuth2 config for remoteWrite idx: %d, err: %w", argIdx, err) } @@ -306,9 +306,7 @@ again: h.Set("Content-Type", "application/x-protobuf") h.Set("Content-Encoding", "snappy") h.Set("X-Prometheus-Remote-Write-Version", "0.1.0") - if ah := c.authCfg.GetAuthHeader(); ah != "" { - req.Header.Set("Authorization", ah) - } + c.authCfg.SetHeaders(req, true) if c.awsCfg != nil { if err := c.awsCfg.SignRequest(req, sigv4Hash); err != nil { // there is no need in retry, request will be rejected by client.Do and retried by code below diff --git a/app/vmalert/datasource/vm.go b/app/vmalert/datasource/vm.go index 0b60bbcde..df415dd66 100644 --- a/app/vmalert/datasource/vm.go +++ b/app/vmalert/datasource/vm.go @@ -146,9 +146,7 @@ func (s *VMStorage) newRequestPOST() (*http.Request, error) { } req.Header.Set("Content-Type", "application/json") if s.authCfg != nil { - if auth := s.authCfg.GetAuthHeader(); auth != "" { - req.Header.Set("Authorization", auth) - } + s.authCfg.SetHeaders(req, true) } return req, nil } diff --git a/app/vmalert/datasource/vm_test.go b/app/vmalert/datasource/vm_test.go index dba689550..1cd433cee 100644 --- a/app/vmalert/datasource/vm_test.go +++ b/app/vmalert/datasource/vm_test.go @@ -83,7 +83,7 @@ func TestVMInstantQuery(t *testing.T) { srv := httptest.NewServer(mux) defer srv.Close() - authCfg, err := promauth.NewConfig(".", nil, baCfg, "", "", nil, nil) + authCfg, err := baCfg.NewConfig(".") if err != nil { t.Fatalf("unexpected: %s", err) } @@ -206,7 +206,7 @@ func TestVMRangeQuery(t *testing.T) { srv := httptest.NewServer(mux) defer srv.Close() - authCfg, err := promauth.NewConfig(".", nil, baCfg, "", "", nil, nil) + authCfg, err := baCfg.NewConfig(".") if err != nil { t.Fatalf("unexpected: %s", err) } @@ -247,7 +247,7 @@ func TestVMRangeQuery(t *testing.T) { } func TestRequestParams(t *testing.T) { - authCfg, err := promauth.NewConfig(".", nil, baCfg, "", "", nil, nil) + authCfg, err := baCfg.NewConfig(".") if err != nil { t.Fatalf("unexpected: %s", err) } diff --git a/app/vmalert/notifier/alertmanager.go b/app/vmalert/notifier/alertmanager.go index c64387877..de6d5c226 100644 --- a/app/vmalert/notifier/alertmanager.go +++ b/app/vmalert/notifier/alertmanager.go @@ -79,9 +79,7 @@ func (am *AlertManager) send(ctx context.Context, alerts []Alert) error { req = req.WithContext(ctx) if am.authCfg != nil { - if auth := am.authCfg.GetAuthHeader(); auth != "" { - req.Header.Set("Authorization", auth) - } + am.authCfg.SetHeaders(req, true) } resp, err := am.client.Do(req) if err != nil { diff --git a/app/vmalert/remotewrite/remotewrite.go b/app/vmalert/remotewrite/remotewrite.go index 08e60a5a5..ec5ba98e6 100644 --- a/app/vmalert/remotewrite/remotewrite.go +++ b/app/vmalert/remotewrite/remotewrite.go @@ -245,9 +245,7 @@ func (c *Client) send(ctx context.Context, data []byte) error { req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0") if c.authCfg != nil { - if auth := c.authCfg.GetAuthHeader(); auth != "" { - req.Header.Set("Authorization", auth) - } + c.authCfg.SetHeaders(req, true) } if !*disablePathAppend { req.URL.Path = path.Join(req.URL.Path, "/api/v1/write") diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f0ff8d37e..b6c612c59 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -16,7 +16,17 @@ The following tip changes can be tested by building VictoriaMetrics components f ## tip * FEATURE: add `-search.setLookbackToStep` command-line flag, which enables InfluxDB-like gap filling during querying. See [these docs](https://docs.victoriametrics.com/guides/migrate-from-influx.html) for details. +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add ability to specify additional HTTP headers to send to scrape targets via `headers` section in `scrape_configs`. This can be used when the scrape target requires custom authorization and authentication like in [this stackoverflow question](https://stackoverflow.com/questions/66032498/prometheus-scrape-metric-with-custom-header). For example, the following config instructs sending `My-Auth: top-secret` and `TenantID: FooBar` headers with each request to `http://host123:8080/metrics`: +```yaml +scrape_configs: +- job_name: foo + headers: + - "My-Auth: top-secret" + - "TenantID: FooBar" + static_configs: + - targets: ["host123:8080"] +``` ## [v1.78.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.78.0) diff --git a/docs/README.md b/docs/README.md index 1d507f477..d8be3e812 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1134,6 +1134,8 @@ to a file containing a list of [relabel_config](https://prometheus.io/docs/prome The `-relabelConfig` also can point to http or https url. For example, `-relabelConfig=https://config-server/relabel_config.yml`. See [this article with relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2). +The `-relabelConfig` files can contain special placeholders in the form `%{ENV_VAR}`, which are replaced by the corresponding environment variable values. + Example contents for `-relabelConfig` file: ```yml @@ -1147,8 +1149,7 @@ Example contents for `-relabelConfig` file: regex: true ``` -VictoriaMetrics components provide additional relabeling features such as Graphite-style relabeling. -See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details. +VictoriaMetrics provides additional relabeling features such as Graphite-style relabeling. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details. ## Federation diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index eb302a089..45147f2e4 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1138,6 +1138,8 @@ to a file containing a list of [relabel_config](https://prometheus.io/docs/prome The `-relabelConfig` also can point to http or https url. For example, `-relabelConfig=https://config-server/relabel_config.yml`. See [this article with relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2). +The `-relabelConfig` files can contain special placeholders in the form `%{ENV_VAR}`, which are replaced by the corresponding environment variable values. + Example contents for `-relabelConfig` file: ```yml @@ -1151,8 +1153,7 @@ Example contents for `-relabelConfig` file: regex: true ``` -VictoriaMetrics components provide additional relabeling features such as Graphite-style relabeling. -See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details. +VictoriaMetrics provides additional relabeling features such as Graphite-style relabeling. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling) for more details. ## Federation diff --git a/docs/vmagent.md b/docs/vmagent.md index 9ad84df6e..e3e97534c 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -187,6 +187,16 @@ Please file feature requests to [our issue tracker](https://github.com/VictoriaM `vmagent` also support the following additional options in `scrape_configs` section: +* `headers` - a list of HTTP headers to send to scrape target with each scrape request. This can be used when the scrape target needs custom authorization and authentication. For example: + +```yaml +scrape_configs: +- job_name: custom_headers + headers: + - "TenantID: abc" + - "My-Auth: TopSecret" +``` + * `disable_compression: true` - to disable response compression on a per-job basis. By default `vmagent` requests compressed responses from scrape targets to save network bandwidth. * `disable_keepalive: true` - to disable [HTTP keep-alive connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) on a per-job basis. @@ -301,6 +311,8 @@ The relabeling can be defined in the following places: * At the `-remoteWrite.relabelConfig` file. This relabeling is applied to all the collected metrics before sending them to remote storage. This relabeling can be debugged by passing `-remoteWrite.relabelDebug` command-line option to `vmagent`. In this case `vmagent` logs metrics before and after the relabeling and then drops all the logged metrics instead of sending them to remote storage. * At the `-remoteWrite.urlRelabelConfig` files. This relabeling is applied to metrics before sending them to the corresponding `-remoteWrite.url`. This relabeling can be debugged by passing `-remoteWrite.urlRelabelDebug` command-line options to `vmagent`. In this case `vmagent` logs metrics before and after the relabeling and then drops all the logged metrics instead of sending them to the corresponding `-remoteWrite.url`. +All the files with relabeling configs can contain special placeholders in the form `%{ENV_VAR}`, which are replaced by the corresponding environment variable values. + You can read more about relabeling in the following articles: * [How to use Relabeling in Prometheus and VictoriaMetrics](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2) @@ -428,9 +440,11 @@ scrape_configs: Proxy can be configured with the following optional settings: * `proxy_authorization` for generic token authorization. See [Prometheus docs for details on authorization section](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) -* `proxy_bearer_token` and `proxy_bearer_token_file` for Bearer token authorization * `proxy_basic_auth` for Basic authorization. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). +* `proxy_bearer_token` and `proxy_bearer_token_file` for Bearer token authorization +* `proxy_oauth2` for OAuth2 config. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#oauth2). * `proxy_tls_config` for TLS config. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tls_config). +* `proxy_headers` for passing additional HTTP headers in requests to proxy. For example: @@ -447,6 +461,8 @@ scrape_configs: key_file: /path/to/key ca_file: /path/to/ca server_name: real-server-name + proxy_headers: + - "Proxy-Auth: top-secret" ``` ## Cardinality limiter diff --git a/lib/promauth/config.go b/lib/promauth/config.go index 3f7ee1178..020f6af25 100644 --- a/lib/promauth/config.go +++ b/lib/promauth/config.go @@ -15,6 +15,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/fasthttp" "github.com/cespare/xxhash/v2" "golang.org/x/oauth2" "golang.org/x/oauth2/clientcredentials" @@ -116,6 +117,9 @@ type HTTPClientConfig struct { BearerTokenFile string `yaml:"bearer_token_file,omitempty"` OAuth2 *OAuth2Config `yaml:"oauth2,omitempty"` TLSConfig *TLSConfig `yaml:"tls_config,omitempty"` + + // Headers contains optional HTTP headers, which must be sent in the request to the server + Headers []string `yaml:"headers,omitempty"` } // ProxyClientConfig represents proxy client config. @@ -124,7 +128,11 @@ type ProxyClientConfig struct { BasicAuth *BasicAuthConfig `yaml:"proxy_basic_auth,omitempty"` BearerToken *Secret `yaml:"proxy_bearer_token,omitempty"` BearerTokenFile string `yaml:"proxy_bearer_token_file,omitempty"` + OAuth2 *OAuth2Config `yaml:"proxy_oauth2,omitempty"` TLSConfig *TLSConfig `yaml:"proxy_tls_config,omitempty"` + + // Headers contains optional HTTP headers, which must be sent in the request to the proxy + Headers []string `yaml:"proxy_headers,omitempty"` } // OAuth2Config represent OAuth2 configuration @@ -257,9 +265,71 @@ type Config struct { authHeader string authHeaderDeadline uint64 + headers []keyValue + authDigest string } +type keyValue struct { + key string + value string +} + +func parseHeaders(headers []string) ([]keyValue, error) { + if len(headers) == 0 { + return nil, nil + } + kvs := make([]keyValue, len(headers)) + for i, h := range headers { + n := strings.IndexByte(h, ':') + if n < 0 { + return nil, fmt.Errorf(`missing ':' in header %q; expecting "key: value" format`, h) + } + kv := &kvs[i] + kv.key = strings.TrimSpace(h[:n]) + kv.value = strings.TrimSpace(h[n+1:]) + } + return kvs, nil +} + +// HeadersNoAuthString returns string representation of ac headers +func (ac *Config) HeadersNoAuthString() string { + if len(ac.headers) == 0 { + return "" + } + a := make([]string, len(ac.headers)) + for i, h := range ac.headers { + a[i] = h.key + ": " + h.value + "\r\n" + } + return strings.Join(a, "") +} + +// SetHeaders sets the configuted ac headers to req. +func (ac *Config) SetHeaders(req *http.Request, setAuthHeader bool) { + reqHeaders := req.Header + for _, h := range ac.headers { + reqHeaders.Set(h.key, h.value) + } + if setAuthHeader { + if ah := ac.GetAuthHeader(); ah != "" { + reqHeaders.Set("Authorization", ah) + } + } +} + +// SetFasthttpHeaders sets the configured ac headers to req. +func (ac *Config) SetFasthttpHeaders(req *fasthttp.Request, setAuthHeader bool) { + reqHeaders := &req.Header + for _, h := range ac.headers { + reqHeaders.Set(h.key, h.value) + } + if setAuthHeader { + if ah := ac.GetAuthHeader(); ah != "" { + reqHeaders.Set("Authorization", ah) + } + } +} + // GetAuthHeader returns optional `Authorization: ...` http header. func (ac *Config) GetAuthHeader() string { f := ac.getAuthHeader @@ -281,8 +351,8 @@ func (ac *Config) GetAuthHeader() string { // It is also used for comparing Config objects for equality. If two Config // objects have the same string representation, then they are considered equal. func (ac *Config) String() string { - return fmt.Sprintf("AuthDigest=%s, TLSRootCA=%s, TLSCertificate=%s, TLSServerName=%s, TLSInsecureSkipVerify=%v, TLSMinVersion=%d", - ac.authDigest, ac.tlsRootCAString(), ac.tlsCertDigest, ac.TLSServerName, ac.TLSInsecureSkipVerify, ac.TLSMinVersion) + return fmt.Sprintf("AuthDigest=%s, Headers=%s, TLSRootCA=%s, TLSCertificate=%s, TLSServerName=%s, TLSInsecureSkipVerify=%v, TLSMinVersion=%d", + ac.authDigest, ac.headers, ac.tlsRootCAString(), ac.tlsCertDigest, ac.TLSServerName, ac.TLSInsecureSkipVerify, ac.TLSMinVersion) } func (ac *Config) tlsRootCAString() string { @@ -330,21 +400,26 @@ func (ac *Config) NewTLSConfig() *tls.Config { // NewConfig creates auth config for the given hcc. func (hcc *HTTPClientConfig) NewConfig(baseDir string) (*Config, error) { - return NewConfig(baseDir, hcc.Authorization, hcc.BasicAuth, hcc.BearerToken.String(), hcc.BearerTokenFile, hcc.OAuth2, hcc.TLSConfig) + return NewConfig(baseDir, hcc.Authorization, hcc.BasicAuth, hcc.BearerToken.String(), hcc.BearerTokenFile, hcc.OAuth2, hcc.TLSConfig, hcc.Headers) } // NewConfig creates auth config for the given pcc. func (pcc *ProxyClientConfig) NewConfig(baseDir string) (*Config, error) { - return NewConfig(baseDir, pcc.Authorization, pcc.BasicAuth, pcc.BearerToken.String(), pcc.BearerTokenFile, nil, pcc.TLSConfig) + return NewConfig(baseDir, pcc.Authorization, pcc.BasicAuth, pcc.BearerToken.String(), pcc.BearerTokenFile, pcc.OAuth2, pcc.TLSConfig, pcc.Headers) } // NewConfig creates auth config for the given o. func (o *OAuth2Config) NewConfig(baseDir string) (*Config, error) { - return NewConfig(baseDir, nil, nil, "", "", nil, o.TLSConfig) + return NewConfig(baseDir, nil, nil, "", "", nil, o.TLSConfig, nil) +} + +// NewConfig creates auth config for the given ba. +func (ba *BasicAuthConfig) NewConfig(baseDir string) (*Config, error) { + return NewConfig(baseDir, nil, ba, "", "", nil, nil, nil) } // NewConfig creates auth config from the given args. -func NewConfig(baseDir string, az *Authorization, basicAuth *BasicAuthConfig, bearerToken, bearerTokenFile string, o *OAuth2Config, tlsConfig *TLSConfig) (*Config, error) { +func NewConfig(baseDir string, az *Authorization, basicAuth *BasicAuthConfig, bearerToken, bearerTokenFile string, o *OAuth2Config, tlsConfig *TLSConfig, headers []string) (*Config, error) { var getAuthHeader func() string authDigest := "" if az != nil { @@ -517,6 +592,10 @@ func NewConfig(baseDir string, az *Authorization, basicAuth *BasicAuthConfig, be tlsMinVersion = v } } + parsedHeaders, err := parseHeaders(headers) + if err != nil { + return nil, err + } ac := &Config{ TLSRootCA: tlsRootCA, TLSServerName: tlsServerName, @@ -527,6 +606,7 @@ func NewConfig(baseDir string, az *Authorization, basicAuth *BasicAuthConfig, be tlsCertDigest: tlsCertDigest, getAuthHeader: getAuthHeader, + headers: parsedHeaders, authDigest: authDigest, } return ac, nil diff --git a/lib/promauth/config_test.go b/lib/promauth/config_test.go index 04795710a..dcbbd2a30 100644 --- a/lib/promauth/config_test.go +++ b/lib/promauth/config_test.go @@ -4,6 +4,8 @@ import ( "net/http" "net/http/httptest" "testing" + + "github.com/VictoriaMetrics/fasthttp" ) func TestNewConfig(t *testing.T) { @@ -116,18 +118,103 @@ func TestNewConfig(t *testing.T) { mock := httptest.NewServer(r) tt.args.oauth.TokenURL = mock.URL } - got, err := NewConfig(tt.args.baseDir, tt.args.az, tt.args.basicAuth, tt.args.bearerToken, tt.args.bearerTokenFile, tt.args.oauth, tt.args.tlsConfig) + got, err := NewConfig(tt.args.baseDir, tt.args.az, tt.args.basicAuth, tt.args.bearerToken, tt.args.bearerTokenFile, tt.args.oauth, tt.args.tlsConfig, nil) if (err != nil) != tt.wantErr { t.Errorf("NewConfig() error = %v, wantErr %v", err, tt.wantErr) return } if got != nil { - ah := got.GetAuthHeader() + req, err := http.NewRequest("GET", "http://foo", nil) + if err != nil { + t.Fatalf("unexpected error in http.NewRequest: %s", err) + } + got.SetHeaders(req, true) + ah := req.Header.Get("Authorization") if ah != tt.expectHeader { - t.Fatalf("unexpected auth header; got %q; want %q", ah, tt.expectHeader) + t.Fatalf("unexpected auth header from net/http request; got %q; want %q", ah, tt.expectHeader) + } + var fhreq fasthttp.Request + got.SetFasthttpHeaders(&fhreq, true) + ahb := fhreq.Header.Peek("Authorization") + if string(ahb) != tt.expectHeader { + t.Fatalf("unexpected auth header from fasthttp request; got %q; want %q", ahb, tt.expectHeader) } } }) } } + +func TestParseHeadersSuccess(t *testing.T) { + f := func(headers []string) { + t.Helper() + headersParsed, err := parseHeaders(headers) + if err != nil { + t.Fatalf("unexpected error when parsing %s: %s", headers, err) + } + for i, h := range headersParsed { + s := h.key + ": " + h.value + if s != headers[i] { + t.Fatalf("unexpected header parsed; got %q; want %q", s, headers[i]) + } + } + } + f(nil) + f([]string{"foo: bar"}) + f([]string{"Foo: bar", "A-b-c: d-e-f"}) +} + +func TestParseHeadersFailure(t *testing.T) { + f := func(headers []string) { + t.Helper() + headersParsed, err := parseHeaders(headers) + if err == nil { + t.Fatalf("expecting non-nil error from parseHeaders(%s)", headers) + } + if headersParsed != nil { + t.Fatalf("expecting nil result from parseHeaders(%s)", headers) + } + } + f([]string{"foo"}) + f([]string{"foo bar baz"}) +} + +func TestConfigHeaders(t *testing.T) { + f := func(headers []string, resultExpected string) { + t.Helper() + headersParsed, err := parseHeaders(headers) + if err != nil { + t.Fatalf("cannot parse headers: %s", err) + } + c, err := NewConfig("", nil, nil, "", "", nil, nil, headers) + if err != nil { + t.Fatalf("cannot create config: %s", err) + } + req, err := http.NewRequest("GET", "http://foo", nil) + if err != nil { + t.Fatalf("unexpected error in http.NewRequest: %s", err) + } + result := c.HeadersNoAuthString() + if result != resultExpected { + t.Fatalf("unexpected result from HeadersNoAuthString; got\n%s\nwant\n%s", result, resultExpected) + } + c.SetHeaders(req, false) + for _, h := range headersParsed { + v := req.Header.Get(h.key) + if v != h.value { + t.Fatalf("unexpected value for net/http header %q; got %q; want %q", h.key, v, h.value) + } + } + var fhreq fasthttp.Request + c.SetFasthttpHeaders(&fhreq, false) + for _, h := range headersParsed { + v := fhreq.Header.Peek(h.key) + if string(v) != h.value { + t.Fatalf("unexpected value for fasthttp header %q; got %q; want %q", h.key, v, h.value) + } + } + } + f(nil, "") + f([]string{"foo: bar"}, "foo: bar\r\n") + f([]string{"Foo-Bar: Baz s:sdf", "A:b", "X-Forwarded-For: A-B:c"}, "Foo-Bar: Baz s:sdf\r\nA: b\r\nX-Forwarded-For: A-B:c\r\n") +} diff --git a/lib/promscrape/client.go b/lib/promscrape/client.go index 1ad31af1d..651001bba 100644 --- a/lib/promscrape/client.go +++ b/lib/promscrape/client.go @@ -48,8 +48,10 @@ type client struct { scrapeTimeoutSecondsStr string host string requestURI string - getAuthHeader func() string - getProxyAuthHeader func() string + setHeaders func(req *http.Request) + setProxyHeaders func(req *http.Request) + setFasthttpHeaders func(req *fasthttp.Request) + setFasthttpProxyHeaders func(req *fasthttp.Request) denyRedirects bool disableCompression bool disableKeepAlive bool @@ -65,7 +67,8 @@ func newClient(sw *ScrapeWork) *client { if isTLS { tlsCfg = sw.AuthConfig.NewTLSConfig() } - getProxyAuthHeader := func() string { return "" } + setProxyHeaders := func(req *http.Request) {} + setFasthttpProxyHeaders := func(req *fasthttp.Request) {} proxyURL := sw.ProxyURL if !isTLS && proxyURL.IsHTTPOrHTTPS() { // Send full sw.ScrapeURL in requests to a proxy host for non-TLS scrape targets @@ -79,8 +82,11 @@ func newClient(sw *ScrapeWork) *client { tlsCfg = sw.ProxyAuthConfig.NewTLSConfig() } proxyURLOrig := proxyURL - getProxyAuthHeader = func() string { - return proxyURLOrig.GetAuthHeader(sw.ProxyAuthConfig) + setProxyHeaders = func(req *http.Request) { + proxyURLOrig.SetHeaders(sw.ProxyAuthConfig, req) + } + setFasthttpProxyHeaders = func(req *fasthttp.Request) { + proxyURLOrig.SetFasthttpHeaders(sw.ProxyAuthConfig, req) } proxyURL = &proxy.URL{} } @@ -148,8 +154,10 @@ func newClient(sw *ScrapeWork) *client { scrapeTimeoutSecondsStr: fmt.Sprintf("%.3f", sw.ScrapeTimeout.Seconds()), host: host, requestURI: requestURI, - getAuthHeader: sw.AuthConfig.GetAuthHeader, - getProxyAuthHeader: getProxyAuthHeader, + setHeaders: func(req *http.Request) { sw.AuthConfig.SetHeaders(req, true) }, + setProxyHeaders: setProxyHeaders, + setFasthttpHeaders: func(req *fasthttp.Request) { sw.AuthConfig.SetFasthttpHeaders(req, true) }, + setFasthttpProxyHeaders: setFasthttpProxyHeaders, denyRedirects: sw.DenyRedirects, disableCompression: sw.DisableCompression, disableKeepAlive: sw.DisableKeepAlive, @@ -173,12 +181,8 @@ func (c *client) GetStreamReader() (*streamReader, error) { // Set X-Prometheus-Scrape-Timeout-Seconds like Prometheus does, since it is used by some exporters such as PushProx. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1179#issuecomment-813117162 req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", c.scrapeTimeoutSecondsStr) - if ah := c.getAuthHeader(); ah != "" { - req.Header.Set("Authorization", ah) - } - if ah := c.getProxyAuthHeader(); ah != "" { - req.Header.Set("Proxy-Authorization", ah) - } + c.setHeaders(req) + c.setProxyHeaders(req) resp, err := c.sc.Do(req) if err != nil { cancel() @@ -224,12 +228,8 @@ func (c *client) ReadData(dst []byte) ([]byte, error) { // Set X-Prometheus-Scrape-Timeout-Seconds like Prometheus does, since it is used by some exporters such as PushProx. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1179#issuecomment-813117162 req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", c.scrapeTimeoutSecondsStr) - if ah := c.getAuthHeader(); ah != "" { - req.Header.Set("Authorization", ah) - } - if ah := c.getProxyAuthHeader(); ah != "" { - req.Header.Set("Proxy-Authorization", ah) - } + c.setFasthttpHeaders(req) + c.setFasthttpProxyHeaders(req) if !*disableCompression && !c.disableCompression { req.Header.Set("Accept-Encoding", "gzip") } diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go index 6659e5376..bd6ba5277 100644 --- a/lib/promscrape/config_test.go +++ b/lib/promscrape/config_test.go @@ -141,6 +141,9 @@ scrape_configs: - x authorization: type: foobar + headers: + - 'TenantID: fooBar' + - 'X: y:z' relabel_configs: - source_labels: [abc] static_configs: @@ -149,6 +152,8 @@ scrape_configs: relabel_debug: true scrape_align_interval: 1h30m0s proxy_bearer_token_file: file.txt + proxy_headers: + - 'My-Auth-Header: top-secret' `) } @@ -332,7 +337,7 @@ scrape_configs: jobNameOriginal: "blackbox", }} if !reflect.DeepEqual(sws, swsExpected) { - t.Fatalf("unexpected scrapeWork;\ngot\n%+v\nwant\n%+v", sws, swsExpected) + t.Fatalf("unexpected scrapeWork;\ngot\n%#v\nwant\n%#v", sws, swsExpected) } } @@ -1650,12 +1655,25 @@ scrape_configs: jobNameOriginal: "aaa", }, }) + + ac, err := promauth.NewConfig(".", nil, nil, "", "", nil, nil, []string{"My-Auth: foo-Bar"}) + if err != nil { + t.Fatalf("unexpected error when creating promauth.Config: %s", err) + } + proxyAC, err := promauth.NewConfig(".", nil, nil, "", "", nil, nil, []string{"Foo:bar"}) + if err != nil { + t.Fatalf("unexpected error when creating promauth.Config for proxy: %s", err) + } f(` scrape_configs: - job_name: 'snmp' sample_limit: 100 disable_keepalive: true disable_compression: true + headers: + - "My-Auth: foo-Bar" + proxy_headers: + - "Foo: bar" scrape_align_interval: 1s scrape_offset: 0.5s static_configs: @@ -1727,8 +1745,8 @@ scrape_configs: Value: "snmp", }, }, - AuthConfig: &promauth.Config{}, - ProxyAuthConfig: &promauth.Config{}, + AuthConfig: ac, + ProxyAuthConfig: proxyAC, SampleLimit: 100, DisableKeepAlive: true, DisableCompression: true, diff --git a/lib/promscrape/discovery/kubernetes/api.go b/lib/promscrape/discovery/kubernetes/api.go index 8950e623d..45cb7a06f 100644 --- a/lib/promscrape/discovery/kubernetes/api.go +++ b/lib/promscrape/discovery/kubernetes/api.go @@ -16,7 +16,8 @@ func newAPIConfig(sdc *SDConfig, baseDir string, swcFunc ScrapeWorkConstructorFu default: return nil, fmt.Errorf("unexpected `role`: %q; must be one of `node`, `pod`, `service`, `endpoints`, `endpointslice` or `ingress`", role) } - ac, err := sdc.HTTPClientConfig.NewConfig(baseDir) + cc := &sdc.HTTPClientConfig + ac, err := cc.NewConfig(baseDir) if err != nil { return nil, fmt.Errorf("cannot parse auth config: %w", err) } @@ -30,7 +31,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string, swcFunc ScrapeWorkConstructorFu if err != nil { return nil, fmt.Errorf("cannot build kube config from the specified `kubeconfig_file` config option: %w", err) } - acNew, err := promauth.NewConfig(".", nil, kc.basicAuth, kc.token, kc.tokenFile, nil, kc.tlsConfig) + acNew, err := promauth.NewConfig(".", nil, kc.basicAuth, kc.token, kc.tokenFile, cc.OAuth2, kc.tlsConfig, cc.Headers) if err != nil { return nil, fmt.Errorf("cannot initialize auth config from `kubeconfig_file: %q`: %w", sdc.KubeConfigFile, err) } @@ -57,7 +58,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string, swcFunc ScrapeWorkConstructorFu tlsConfig := promauth.TLSConfig{ CAFile: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", } - acNew, err := promauth.NewConfig(".", nil, nil, "", "/var/run/secrets/kubernetes.io/serviceaccount/token", nil, &tlsConfig) + acNew, err := promauth.NewConfig(".", nil, nil, "", "/var/run/secrets/kubernetes.io/serviceaccount/token", cc.OAuth2, &tlsConfig, cc.Headers) if err != nil { return nil, fmt.Errorf("cannot initialize service account auth: %w; probably, `kubernetes_sd_config->api_server` is missing in Prometheus configs?", err) } diff --git a/lib/promscrape/discovery/kubernetes/api_watcher.go b/lib/promscrape/discovery/kubernetes/api_watcher.go index 6f76d5d0e..63a266afb 100644 --- a/lib/promscrape/discovery/kubernetes/api_watcher.go +++ b/lib/promscrape/discovery/kubernetes/api_watcher.go @@ -207,8 +207,8 @@ type groupWatcher struct { selectors []Selector attachNodeMetadata bool - getAuthHeader func() string - client *http.Client + setHeaders func(req *http.Request) + client *http.Client mu sync.Mutex m map[string]*urlWatcher @@ -235,9 +235,9 @@ func newGroupWatcher(apiServer string, ac *promauth.Config, namespaces []string, selectors: selectors, attachNodeMetadata: attachNodeMetadata, - getAuthHeader: ac.GetAuthHeader, - client: client, - m: make(map[string]*urlWatcher), + setHeaders: func(req *http.Request) { ac.SetHeaders(req, true) }, + client: client, + m: make(map[string]*urlWatcher), } } @@ -407,9 +407,7 @@ func (gw *groupWatcher) doRequest(requestURL string) (*http.Response, error) { if err != nil { logger.Fatalf("cannot create a request for %q: %s", requestURL, err) } - if ah := gw.getAuthHeader(); ah != "" { - req.Header.Set("Authorization", ah) - } + gw.setHeaders(req) resp, err := gw.client.Do(req) if err != nil { return nil, err diff --git a/lib/promscrape/discovery/openstack/api.go b/lib/promscrape/discovery/openstack/api.go index 40aa06ab9..010973c67 100644 --- a/lib/promscrape/discovery/openstack/api.go +++ b/lib/promscrape/discovery/openstack/api.go @@ -81,7 +81,7 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { port: sdc.Port, } if sdc.TLSConfig != nil { - ac, err := promauth.NewConfig(baseDir, nil, nil, "", "", nil, sdc.TLSConfig) + ac, err := promauth.NewConfig(baseDir, nil, nil, "", "", nil, sdc.TLSConfig, nil) if err != nil { return nil, err } diff --git a/lib/promscrape/discoveryutils/client.go b/lib/promscrape/discoveryutils/client.go index 6050b60bb..d306f73a2 100644 --- a/lib/promscrape/discoveryutils/client.go +++ b/lib/promscrape/discoveryutils/client.go @@ -42,10 +42,10 @@ type Client struct { apiServer string - hostPort string - getAuthHeader func() string - getProxyAuthHeader func() string - sendFullURL bool + hostPort string + setFasthttpHeaders func(req *fasthttp.Request) + setFasthttpProxyHeaders func(req *fasthttp.Request) + sendFullURL bool } // NewClient returns new Client for the given args. @@ -70,7 +70,7 @@ func NewClient(apiServer string, ac *promauth.Config, proxyURL *proxy.URL, proxy tlsCfg = ac.NewTLSConfig() } sendFullURL := !isTLS && proxyURL.IsHTTPOrHTTPS() - getProxyAuthHeader := func() string { return "" } + setFasthttpProxyHeaders := func(req *fasthttp.Request) {} if sendFullURL { // Send full urls in requests to a proxy host for non-TLS apiServer // like net/http package from Go does. @@ -82,8 +82,8 @@ func NewClient(apiServer string, ac *promauth.Config, proxyURL *proxy.URL, proxy tlsCfg = proxyAC.NewTLSConfig() } proxyURLOrig := proxyURL - getProxyAuthHeader = func() string { - return proxyURLOrig.GetAuthHeader(proxyAC) + setFasthttpProxyHeaders = func(req *fasthttp.Request) { + proxyURLOrig.SetFasthttpHeaders(proxyAC, req) } proxyURL = &proxy.URL{} } @@ -123,18 +123,18 @@ func NewClient(apiServer string, ac *promauth.Config, proxyURL *proxy.URL, proxy MaxConns: 64 * 1024, Dial: dialFunc, } - getAuthHeader := func() string { return "" } + setFasthttpHeaders := func(req *fasthttp.Request) {} if ac != nil { - getAuthHeader = ac.GetAuthHeader + setFasthttpHeaders = func(req *fasthttp.Request) { ac.SetFasthttpHeaders(req, true) } } return &Client{ - hc: hc, - blockingClient: blockingClient, - apiServer: apiServer, - hostPort: hostPort, - getAuthHeader: getAuthHeader, - getProxyAuthHeader: getProxyAuthHeader, - sendFullURL: sendFullURL, + hc: hc, + blockingClient: blockingClient, + apiServer: apiServer, + hostPort: hostPort, + setFasthttpHeaders: setFasthttpHeaders, + setFasthttpProxyHeaders: setFasthttpProxyHeaders, + sendFullURL: sendFullURL, }, nil } @@ -202,12 +202,8 @@ func (c *Client) getAPIResponseWithParamsAndClient(client *fasthttp.HostClient, } req.Header.SetHost(c.hostPort) req.Header.Set("Accept-Encoding", "gzip") - if ah := c.getAuthHeader(); ah != "" { - req.Header.Set("Authorization", ah) - } - if ah := c.getProxyAuthHeader(); ah != "" { - req.Header.Set("Proxy-Authorization", ah) - } + c.setFasthttpHeaders(&req) + c.setFasthttpProxyHeaders(&req) if modifyRequest != nil { modifyRequest(&req) } diff --git a/lib/proxy/proxy.go b/lib/proxy/proxy.go index 46c29bfbd..3756f0220 100644 --- a/lib/proxy/proxy.go +++ b/lib/proxy/proxy.go @@ -6,6 +6,7 @@ import ( "encoding/base64" "fmt" "net" + "net/http" "net/url" "strings" "time" @@ -60,8 +61,26 @@ func (u *URL) String() string { return pu.String() } -// GetAuthHeader returns Proxy-Authorization auth header for the given u and ac. -func (u *URL) GetAuthHeader(ac *promauth.Config) string { +// SetHeaders sets headers to req according to u and ac configs. +func (u *URL) SetHeaders(ac *promauth.Config, req *http.Request) { + ah := u.getAuthHeader(ac) + if ah != "" { + req.Header.Set("Proxy-Authorization", ah) + } + ac.SetHeaders(req, false) +} + +// SetFasthttpHeaders sets headers to req according to u and ac configs. +func (u *URL) SetFasthttpHeaders(ac *promauth.Config, req *fasthttp.Request) { + ah := u.getAuthHeader(ac) + if ah != "" { + req.Header.Set("Proxy-Authorization", ah) + } + ac.SetFasthttpHeaders(req, false) +} + +// getAuthHeader returns Proxy-Authorization auth header for the given u and ac. +func (u *URL) getAuthHeader(ac *promauth.Config) string { authHeader := "" if ac != nil { authHeader = ac.GetAuthHeader() @@ -130,9 +149,10 @@ func (u *URL) NewDialFunc(ac *promauth.Config) (fasthttp.DialFunc, error) { if isTLS { proxyConn = tls.Client(proxyConn, tlsCfg) } - authHeader := u.GetAuthHeader(ac) + authHeader := u.getAuthHeader(ac) if authHeader != "" { authHeader = "Proxy-Authorization: " + authHeader + "\r\n" + authHeader += ac.HeadersNoAuthString() } conn, err := sendConnectRequest(proxyConn, proxyAddr, addr, authHeader) if err != nil { From 668d67a3d363bd24106e31daa4f94a45f6590ddc Mon Sep 17 00:00:00 2001 From: Denys Holius <5650611+denisgolius@users.noreply.github.com> Date: Wed, 22 Jun 2022 21:56:57 +0300 Subject: [PATCH 4/5] Adds a list of supported architectures (#2769) * add list of supported architectures * Update docs/BestPractices.md Co-authored-by: Aliaksandr Valialkin --- docs/BestPractices.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/BestPractices.md b/docs/BestPractices.md index f4031bcd5..c30377e90 100644 --- a/docs/BestPractices.md +++ b/docs/BestPractices.md @@ -27,11 +27,21 @@ VictoriaMetrics is production-ready for the following operating systems: * Linux (Alpine, Ubuntu, Debian, RedHat, etc.) * FreeBSD * OpenBSD +* Solaris/SmartOS Some VictoriaMetrics components ([vmagent](https://docs.victoriametrics.com/vmagent.html), [vmalert](https://docs.victoriametrics.com/vmalert.html) and [vmauth](https://docs.victoriametrics.com/vmauth.html)) can run on Windows. VictoriaMetrics can run also on MacOS for testing and development purposes. +## Supported Architectures + +* **Linux**: i386, amd64, arm, arm64, ppc64le +* **FreeBSD**: i386, amd64, arm +* **OpenBSD**: i386, amd64, arm +* **Solaris/SmartOS**: i386, amd64 +* **MacOS**: amd64, arm64 (for testing and development purposes) +* **Windows**: amd64 (supported by [vmagent](https://docs.victoriametrics.com/vmagent.html), [vmalert](https://docs.victoriametrics.com/vmalert.html) and [vmauth](https://docs.victoriametrics.com/vmauth.html)). + ## Upgrade procedure It is safe to upgrade VictoriaMetrics to new versions unless the [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) say otherwise. It is safe to skip multiple versions during the upgrade unless release notes say otherwise. It is recommended to perform regular upgrades to the latest version, since it may contain important bug fixes, performance optimizations or new features. From 52eadb729ec4d502cd21a2ea56b0cfcce3d00774 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 23 Jun 2022 10:55:14 +0300 Subject: [PATCH 5/5] lib/promscrape: always send stale markers with the real scrape timestamp This guarantees that query won't return data just after the series is disappeared. --- docs/CHANGELOG.md | 2 ++ lib/promscrape/scrapework.go | 13 ++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index b6c612c59..8227ac2c9 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -28,6 +28,8 @@ scrape_configs: - targets: ["host123:8080"] ``` +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes. + ## [v1.78.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.78.0) Released at 20-06-2022 diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 92dda54d6..f622edfbf 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -332,7 +332,10 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}, globalStopCh <-chan struct{}) // Do not send staleness markers on graceful shutdown as Prometheus does. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2013#issuecomment-1006994079 default: - // Send staleness markers when the given target disappears. + // Send staleness markers to all the metrics scraped last time from the target + // when the given target disappears as Prometheus does. + // Use the current real timestamp for staleness markers, so queries + // stop returning data just after the time the target disappears. sw.sendStaleSeries(lastScrape, "", t, true) } if sw.seriesLimiter != nil { @@ -491,7 +494,9 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error } // body must be released only after wc is released, since wc refers to body. if !areIdenticalSeries { - sw.sendStaleSeries(lastScrape, bodyString, scrapeTimestamp, false) + // Send stale markers for disappeared metrics with the real scrape timestamp + // in order to guarantee that query doesn't return data after this time for the disappeared metrics. + sw.sendStaleSeries(lastScrape, bodyString, realTimestamp, false) sw.storeLastScrape(body.B) } sw.finalizeLastScrape() @@ -599,7 +604,9 @@ func (sw *scrapeWork) scrapeStream(scrapeTimestamp, realTimestamp int64) error { wc.reset() writeRequestCtxPool.Put(wc) if !areIdenticalSeries { - sw.sendStaleSeries(lastScrape, bodyString, scrapeTimestamp, false) + // Send stale markers for disappeared metrics with the real scrape timestamp + // in order to guarantee that query doesn't return data after this time for the disappeared metrics. + sw.sendStaleSeries(lastScrape, bodyString, realTimestamp, false) sw.storeLastScrape(sbr.body) } sw.finalizeLastScrape()