From cc0427897cad2c64e824764e9ed24104fe279ec9 Mon Sep 17 00:00:00 2001 From: Dmytro Kozlov Date: Mon, 3 Apr 2023 06:26:13 +0300 Subject: [PATCH] lib/promscrape: fix the problem with scrape work duplicates when file_sd_config can't be read (#4027) * lib/promscrape: fix the problem with scrape work duplicates when file_sd_config can't be read * lib/promscrape: clarified comment * lib/promscrape: made better approach to handle a problem with growing []*ScrapeWork on each error when loading config * lib/promscrape: added CHANGELOG.md * Update docs/CHANGELOG.md --------- Co-authored-by: Aliaksandr Valialkin --- docs/CHANGELOG.md | 1 + lib/promscrape/config.go | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 873493529..99ff90045 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -48,6 +48,7 @@ created by v1.90.0 or newer versions. The solution is to upgrade to v1.90.0 or n * BUGFIX: allow using dashes and dots in environment variables names referred in config files via `%{ENV-VAR.SYNTAX}`. See [these docs](https://docs.victoriametrics.com/#environment-variables) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3999). * BUGFIX: return back query performance scalability on hosts with big number of CPU cores. The scalability has been reduced in [v1.86.0](https://docs.victoriametrics.com/CHANGELOG.html#v1860). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3966). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly convert [VictoriaMetrics historgram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) to Prometheus histogram buckets when VictoriaMetrics histogram contain zero buckets. Previously these buckets were ignored, and this could lead to missing Prometheus histogram buckets after the conversion. Thanks to @zklapow for [the fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4021). +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmgent.html): fix CPU and memory usage spikes when files pointed by [file_sd_config](https://docs.victoriametrics.com/sd_configs.html#file_sd_configs) cannot be re-read. See [this_issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3989). * BUGFIX: prevent unexpected merges on start-up when `-storage.minFreeDiskSpaceBytes` is set. See [the issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4023). * BUGFIX: properly support comma-separated filters inside [retention filters](https://docs.victoriametrics.com/#retention-filters). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3915). * BUGFIX: verify response code when fetching configuration files via HTTP. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4034). diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 3914c5f71..d281f612b 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -718,14 +718,19 @@ func (cfg *Config) getFileSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork { if len(filepath) == 0 { logger.Panicf("BUG: missing `__vm_filepath` label") } else { - swsMapPrev[filepath] = append(swsMapPrev[filepath], sw) + // user can define many file_sd_config with the same path and it will produce the same ScrapeWorks + // in this case we just make key for map as job name and filepath with ":" delimiter, + // it will create each job with its ScrapeWorks + key := fmt.Sprintf("%s:%s", sw.Job(), filepath) + swsMapPrev[key] = append(swsMapPrev[key], sw) } } dst := make([]*ScrapeWork, 0, len(prev)) for _, sc := range cfg.ScrapeConfigs { + configPaths := make(map[string]struct{}, len(sc.FileSDConfigs)) for j := range sc.FileSDConfigs { sdc := &sc.FileSDConfigs[j] - dst = sdc.appendScrapeWork(dst, swsMapPrev, cfg.baseDir, sc.swc) + dst = sdc.appendScrapeWork(dst, swsMapPrev, cfg.baseDir, sc.swc, configPaths) } } return dst @@ -1122,7 +1127,7 @@ func appendScrapeWorkForTargetLabels(dst []*ScrapeWork, swc *scrapeWorkConfig, t return dst } -func (sdc *FileSDConfig) appendScrapeWork(dst []*ScrapeWork, swsMapPrev map[string][]*ScrapeWork, baseDir string, swc *scrapeWorkConfig) []*ScrapeWork { +func (sdc *FileSDConfig) appendScrapeWork(dst []*ScrapeWork, swsMapPrev map[string][]*ScrapeWork, baseDir string, swc *scrapeWorkConfig, configPaths map[string]struct{}) []*ScrapeWork { metaLabels := promutils.GetLabels() defer promutils.PutLabels(metaLabels) for _, file := range sdc.Files { @@ -1138,10 +1143,20 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []*ScrapeWork, swsMapPrev map[stri } } for _, path := range paths { + // make a key as for previous ScrapeWorks (swsMapPrev map[string][]*ScrapeWork) and show to user + // warning about identical file_sd_config. + // We skip it because it will make dst with duplicated ScrapeWork. + key := fmt.Sprintf("%s:%s", swc.jobName, path) + if _, ok := configPaths[key]; ok { + logger.Warnf("file_sd_config contains multiple references to %q, ignoring duplicated entry. please check -promscrape.config and remove duplicated configurations", path) + continue + } + configPaths[key] = struct{}{} + stcs, err := loadStaticConfigs(path) if err != nil { // Do not return this error, since other paths may contain valid scrape configs. - if sws := swsMapPrev[path]; sws != nil { + if sws := swsMapPrev[key]; sws != nil { // Re-use the previous valid scrape work for this path. logger.Errorf("keeping the previously loaded `static_configs` from %q because of error when re-loading the file: %s", path, err) dst = append(dst, sws...)