From 2d1366353cc320712328088670bb95f4169c9d02 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 18 Jul 2022 17:15:02 +0300 Subject: [PATCH] lib/promscrape: reload all the scrape configs when the `global` section is changed inside `-promscrape.config` See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2884 --- docs/CHANGELOG.md | 1 + lib/promscrape/config.go | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 4d687d33d..df35dcf98 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f ## tip +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): restart all the scrape jobs during [config reload](https://docs.victoriametrics.com/vmagent.html#configuration-update) after `global` section is changed inside `-promscrape.config`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2884). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly assume role with AWS ECS credentials. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2875). Thanks to @transacid for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2876). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): return series from `q1` if `q2` doesn't return matching time series in the query `q1 ifnot q2`. Previously series from `q1` weren't returned in this case. diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 9d9e64e70..b6b54d276 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -130,6 +130,10 @@ func (cfg *Config) mustRestart(prevCfg *Config) { prevScrapeCfgByName[scPrev.JobName] = scPrev } + // Restart all the scrape jobs on Global config change. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2884 + needGlobalRestart := !areEqualGlobalConfigs(&cfg.Global, &prevCfg.Global) + // Loop over the the new jobs, start new ones and restart updated ones. var started, stopped, restarted int currentJobNames := make(map[string]struct{}, len(cfg.ScrapeConfigs)) @@ -142,7 +146,7 @@ func (cfg *Config) mustRestart(prevCfg *Config) { started++ continue } - if areEqualScrapeConfigs(scPrev, sc) { + if !needGlobalRestart && areEqualScrapeConfigs(scPrev, sc) { // The scrape config didn't change, so no need to restart it. // Use the reference to the previous job, so it could be stopped properly later. cfg.ScrapeConfigs[i] = scPrev @@ -165,6 +169,12 @@ func (cfg *Config) mustRestart(prevCfg *Config) { logger.Infof("restarted service discovery routines in %.3f seconds, stopped=%d, started=%d, restarted=%d", time.Since(startTime).Seconds(), stopped, started, restarted) } +func areEqualGlobalConfigs(a, b *GlobalConfig) bool { + sa := a.marshalJSON() + sb := b.marshalJSON() + return string(sa) == string(sb) +} + func areEqualScrapeConfigs(a, b *ScrapeConfig) bool { sa := a.marshalJSON() sb := b.marshalJSON() @@ -183,6 +193,14 @@ func (sc *ScrapeConfig) marshalJSON() []byte { return data } +func (gc *GlobalConfig) marshalJSON() []byte { + data, err := json.Marshal(gc) + if err != nil { + logger.Panicf("BUG: cannot marshal GlobalConfig: %s", err) + } + return data +} + func (cfg *Config) mustStop() { startTime := time.Now() logger.Infof("stopping service discovery routines...")