mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
lib/promscrape: stop scrapers for the removed targets before starting scrapers for the added targets
This should prevent from possible time series overlap when old target is substituted by new target (for example, during Kubernetes deployments). Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1526 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1530 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/748 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1509
This commit is contained in:
parent
fe8c462044
commit
db34c40aec
2 changed files with 36 additions and 13 deletions
|
@ -8,6 +8,8 @@ sort: 15
|
|||
|
||||
* FEATURE: vmalert: add `-remoteWrite.disablePathAppend` command-line flag, which can be used when custom `-remoteWrite.url` must be specified. For example, `./vmalert -disablePathAppend -remoteWrite.url='http://foo.bar/a/b/c?d=e'` would write data to `http://foo.bar/a/b/c?d=e` instead of `http://foo.bar/a/b/c?d=e/api/v1/write`. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1536).
|
||||
|
||||
* BUGFIX: vmagent: stop scrapers for deleted targets before starting scrapers for added targets. This should prevent from possible time series overlap when old targets are substituted by new targets (for example, during new deployment in Kubernetes). The overlap could lead to incorrect query results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1509).
|
||||
|
||||
|
||||
## [v1.64.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.64.0)
|
||||
|
||||
|
|
|
@ -301,6 +301,7 @@ func (sg *scraperGroup) update(sws []*ScrapeWork) {
|
|||
additionsCount := 0
|
||||
deletionsCount := 0
|
||||
swsMap := make(map[string][]prompbmarshal.Label, len(sws))
|
||||
var swsToStart []*ScrapeWork
|
||||
for _, sw := range sws {
|
||||
key := sw.key()
|
||||
originalLabels := swsMap[key]
|
||||
|
@ -320,33 +321,47 @@ func (sg *scraperGroup) update(sws []*ScrapeWork) {
|
|||
// The scraper for the given key already exists.
|
||||
continue
|
||||
}
|
||||
swsToStart = append(swsToStart, sw)
|
||||
}
|
||||
|
||||
// Start a scraper for the missing key.
|
||||
// Stop deleted scrapers before starting new scrapers in order to prevent
|
||||
// series overlap when old scrape target is substituted by new scrape target.
|
||||
var stoppedChs []<-chan struct{}
|
||||
for key, sc := range sg.m {
|
||||
if _, ok := swsMap[key]; !ok {
|
||||
close(sc.stopCh)
|
||||
stoppedChs = append(stoppedChs, sc.stoppedCh)
|
||||
delete(sg.m, key)
|
||||
deletionsCount++
|
||||
}
|
||||
}
|
||||
// Wait until all the deleted scrapers are stopped before starting new scrapers.
|
||||
for _, ch := range stoppedChs {
|
||||
<-ch
|
||||
}
|
||||
|
||||
// Start new scrapers only after the deleted scrapers are stopped.
|
||||
for _, sw := range swsToStart {
|
||||
sc := newScraper(sw, sg.name, sg.pushData)
|
||||
sg.activeScrapers.Inc()
|
||||
sg.scrapersStarted.Inc()
|
||||
sg.wg.Add(1)
|
||||
tsmGlobal.Register(sw)
|
||||
go func(sw *ScrapeWork) {
|
||||
defer sg.wg.Done()
|
||||
defer func() {
|
||||
sg.wg.Done()
|
||||
close(sc.stoppedCh)
|
||||
}()
|
||||
sc.sw.run(sc.stopCh)
|
||||
tsmGlobal.Unregister(sw)
|
||||
sg.activeScrapers.Dec()
|
||||
sg.scrapersStopped.Inc()
|
||||
}(sw)
|
||||
key := sw.key()
|
||||
sg.m[key] = sc
|
||||
additionsCount++
|
||||
}
|
||||
|
||||
// Stop deleted scrapers, which are missing in sws.
|
||||
for key, sc := range sg.m {
|
||||
if _, ok := swsMap[key]; !ok {
|
||||
close(sc.stopCh)
|
||||
delete(sg.m, key)
|
||||
deletionsCount++
|
||||
}
|
||||
}
|
||||
|
||||
if additionsCount > 0 || deletionsCount > 0 {
|
||||
sg.changesCount.Add(additionsCount + deletionsCount)
|
||||
logger.Infof("%s: added targets: %d, removed targets: %d; total targets: %d", sg.name, additionsCount, deletionsCount, len(sg.m))
|
||||
|
@ -354,13 +369,19 @@ func (sg *scraperGroup) update(sws []*ScrapeWork) {
|
|||
}
|
||||
|
||||
type scraper struct {
|
||||
sw scrapeWork
|
||||
sw scrapeWork
|
||||
|
||||
// stopCh is unblocked when the given scraper must be stopped.
|
||||
stopCh chan struct{}
|
||||
|
||||
// stoppedCh is unblocked when the given scraper is stopped.
|
||||
stoppedCh chan struct{}
|
||||
}
|
||||
|
||||
func newScraper(sw *ScrapeWork, group string, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
|
||||
sc := &scraper{
|
||||
stopCh: make(chan struct{}),
|
||||
stopCh: make(chan struct{}),
|
||||
stoppedCh: make(chan struct{}),
|
||||
}
|
||||
c := newClient(sw)
|
||||
sc.sw.Config = sw
|
||||
|
|
Loading…
Reference in a new issue