lib/promscrape/discovery/kubernetes: reload k8s resources on every error

This is needed for obtaining fresh resourceVersion
This commit is contained in:
Aliaksandr Valialkin 2021-02-27 01:45:34 +02:00
parent 7f1302688f
commit 6d0e7fb8b0

View file

@ -22,7 +22,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
) )
var apiServerTimeout = flag.Duration("promscrape.kubernetes.apiServerTimeout", 2*time.Minute, "Timeout for requests to Kuberntes API server") var apiServerTimeout = flag.Duration("promscrape.kubernetes.apiServerTimeout", 10*time.Minute, "How frequently to reload the full state from Kuberntes API server")
// apiConfig contains config for API server // apiConfig contains config for API server
type apiConfig struct { type apiConfig struct {
@ -345,7 +345,7 @@ func (uw *urlWatcher) watchForUpdates(resourceVersion string) {
if err != nil { if err != nil {
logger.Errorf("error when performing a request to %q: %s", requestURL, err) logger.Errorf("error when performing a request to %q: %s", requestURL, err)
backoffSleep() backoffSleep()
// There is no sense in reloading resources on non-http errors. resourceVersion = uw.reloadObjects()
continue continue
} }
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
@ -353,27 +353,23 @@ func (uw *urlWatcher) watchForUpdates(resourceVersion string) {
_ = resp.Body.Close() _ = resp.Body.Close()
logger.Errorf("unexpected status code for request to %q: %d; want %d; response: %q", requestURL, resp.StatusCode, http.StatusOK, body) logger.Errorf("unexpected status code for request to %q: %d; want %d; response: %q", requestURL, resp.StatusCode, http.StatusOK, body)
if resp.StatusCode == 410 { if resp.StatusCode == 410 {
// Update stale resourceVersion. See https://kubernetes.io/docs/reference/using-api/api-concepts/#410-gone-responses // There is no need for sleep on 410 error. See https://kubernetes.io/docs/reference/using-api/api-concepts/#410-gone-responses
resourceVersion = uw.reloadObjects()
backoffDelay = time.Second backoffDelay = time.Second
} else { } else {
backoffSleep() backoffSleep()
// There is no sense in reloading resources on non-410 status codes.
} }
resourceVersion = uw.reloadObjects()
continue continue
} }
backoffDelay = time.Second backoffDelay = time.Second
err = uw.readObjectUpdateStream(resp.Body) err = uw.readObjectUpdateStream(resp.Body)
_ = resp.Body.Close() _ = resp.Body.Close()
if err != nil { if err != nil {
if errors.Is(err, io.EOF) { if !errors.Is(err, io.EOF) {
// The stream has been closed (probably due to timeout)
backoffSleep()
continue
}
logger.Errorf("error when reading WatchEvent stream from %q: %s", requestURL, err) logger.Errorf("error when reading WatchEvent stream from %q: %s", requestURL, err)
}
backoffSleep() backoffSleep()
// There is no sense in reloading resources on non-http errors. resourceVersion = uw.reloadObjects()
continue continue
} }
} }