From ae85b20c5b56651492322de8f1c768bf182bf348 Mon Sep 17 00:00:00 2001 From: Nikolay Date: Wed, 30 Aug 2023 16:03:41 +0200 Subject: [PATCH] =?UTF-8?q?lib/promscrape/k8s=5Fsd:=20set=20resourceVersio?= =?UTF-8?q?n=20to=200=20by=20default=20for=20watch=20=E2=80=A6=20(#4901)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * lib/promscrape/k8s_sd: set resourceVersion to 0 by default for watch requests it must reduce load for kubernetes ETCD servers. Since requests without resourceVersion performs force cache sync at kubernetes API server with ETCD more info at https://kubernetes.io/docs/reference/using-api/api-concepts/\#semantics-for-watch https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4855 * wip --------- Co-authored-by: Aliaksandr Valialkin --- docs/CHANGELOG.md | 1 + .../discovery/kubernetes/api_watcher.go | 20 ++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 2a7f841546..f38f7b0626 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -29,6 +29,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add ability to set `member num` label for all the metrics scraped by a particular `vmagent` instance in [a cluster of vmagents](https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets) via `-promscrape.cluster.memberLabel` command-line flag. See [these docs](https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4247). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not log `unexpected EOF` when reading incoming metrics, since this error is expected and is handled during metrics' parsing. This reduces the amounts of noisy logs. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4817). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): retry failed write request on the closed connection immediately, without waiting for backoff. This should improve data delivery speed and reduce amount of error logs emitted by vmagent when using idle connections. See related [issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139). +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): reduces load on Kubernetes control plane during initial service discovery. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4855) for details. * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): reduce the maximum recovery time at `vmselect` and `vminsert` when some of `vmstorage` nodes become unavailable because of networking issues from 60 seconds to 3 seconds by default. The recovery time can be tuned at `vmselect` and `vminsert` nodes with `-vmstorageUserTimeout` command-line flag if needed. Thanks to @wjordan for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4423). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): make the warning message more noticeable for text fields. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4848). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add button for auto-formatting PromQL/MetricsQL queries. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4681). Thanks to @aramattamara for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4694). diff --git a/lib/promscrape/discovery/kubernetes/api_watcher.go b/lib/promscrape/discovery/kubernetes/api_watcher.go index 7732847e50..78bb7ddd75 100644 --- a/lib/promscrape/discovery/kubernetes/api_watcher.go +++ b/lib/promscrape/discovery/kubernetes/api_watcher.go @@ -580,7 +580,13 @@ func (uw *urlWatcher) reloadObjects() string { } startTime := time.Now() - requestURL := uw.apiURL + apiURL := uw.apiURL + + // Set resourceVersion to 0 in order to reduce load on Kubernetes control plane. + // See https://kubernetes.io/docs/reference/using-api/api-concepts/#semantics-for-get-and-list + // and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4855 . + delimiter := getQueryArgsDelimiter(apiURL) + requestURL := apiURL + delimiter + "resourceVersion=0&resourceVersionMatch=NotOlderThan" resp, err := uw.gw.doRequest(requestURL) if err != nil { logger.Errorf("cannot perform request to %q: %s", requestURL, err) @@ -657,10 +663,7 @@ func (uw *urlWatcher) watchForUpdates() { } } apiURL := uw.apiURL - delimiter := "?" - if strings.Contains(apiURL, "?") { - delimiter = "&" - } + delimiter := getQueryArgsDelimiter(apiURL) timeoutSeconds := time.Duration(0.9 * float64(uw.gw.client.Timeout)).Seconds() apiURL += delimiter + "watch=1&allowWatchBookmarks=true&timeoutSeconds=" + strconv.Itoa(int(timeoutSeconds)) for { @@ -943,3 +946,10 @@ func getObjectParsersForRole(role string) (parseObjectFunc, parseObjectListFunc) return nil, nil } } + +func getQueryArgsDelimiter(apiURL string) string { + if strings.Contains(apiURL, "?") { + return "&" + } + return "?" +}