From 727bc02a5c0662240854a3d9a06b7031bef8e6e1 Mon Sep 17 00:00:00 2001 From: Andrei Baidarov Date: Mon, 25 Nov 2024 18:02:09 +0100 Subject: [PATCH] vmagent: set up a timeout for tcp connection establishment during k8s discovery Previously, default dial timeout was used for kubernetes API server connection. This commit changes it for custom dialer used by the all VictoriaMetrics components. It has lower connection timeout (30s by default). Related issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7127 --------- Co-authored-by: f41gh7 --- docs/changelog/CHANGELOG.md | 1 + lib/promscrape/discovery/kubernetes/api_watcher.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/changelog/CHANGELOG.md b/docs/changelog/CHANGELOG.md index 926f680a7..830de5a39 100644 --- a/docs/changelog/CHANGELOG.md +++ b/docs/changelog/CHANGELOG.md @@ -25,6 +25,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): add `ignore_first_sample_interval` param to [aggregation config](https://docs.victoriametrics.com/stream-aggregation/#stream-aggregation-config). It allows users to control the time interval when aggregation skips sending aggregated samples to avoid unexpected spikes in values. By default, this interval is set to x2 of `staleness_interval`. The new setting is applicable only to `total`, `total_prometheus`, `increase`, `increase_prometheus` and `histogram_bucket` outputs. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7116) for details. Thanks to @iyuroch for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7313). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent): Properly return `200 OK` HTTP status code when importing data via [Pushgateway protocol](https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format) using [multitenant URL format](https://docs.victoriametrics.com/cluster-victoriametrics/#url-format). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7571). +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent): Properly set `TCP` connection timeout for `Kubernetes API server` connection for metric scrapping with `kubernetes_sd_configs`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7127). * BUGFIX: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/), `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): properly return result for binary operation `^` aka pow at query requests for `NaN` values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7359) for details. * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix rendering of isolated data points on the graph that are not connected to other points. * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert): improve the correctness of alert [state restoration](https://docs.victoriametrics.com/vmalert/#alerts-state-on-restarts). Previously, it could result in false-positive alerts if alert was resolved shortly before vmalert restart. diff --git a/lib/promscrape/discovery/kubernetes/api_watcher.go b/lib/promscrape/discovery/kubernetes/api_watcher.go index 58f16b735..e93a4d9d1 100644 --- a/lib/promscrape/discovery/kubernetes/api_watcher.go +++ b/lib/promscrape/discovery/kubernetes/api_watcher.go @@ -23,6 +23,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool" @@ -267,6 +268,7 @@ func getHTTPClient(ac *promauth.Config, proxyURL *url.URL) *http.Client { getTransport := func(cfg *tls.Config) http.RoundTripper { return &http.Transport{ Proxy: proxy, + DialContext: netutil.Dialer.DialContext, TLSHandshakeTimeout: 10 * time.Second, IdleConnTimeout: *apiServerTimeout, MaxIdleConnsPerHost: 100,