lib/promscrape: add ability to load scrape configs from multiple files

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1559
This commit is contained in:
Aliaksandr Valialkin 2021-08-26 08:51:14 +03:00
parent dd96792a43
commit 7fdb4db73d
8 changed files with 130 additions and 24 deletions

View file

@ -6,6 +6,7 @@ sort: 15
## tip ## tip
* FEATURE: vmagent: add ability to read scrape configs from multiple files specified in `scrape_config_files` section. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1559).
* FEATURE: vmagent: reduce memory usage and CPU usage when Prometheus staleness tracking is enabled for metrics exported from the deleted or disappeared scrape targets. * FEATURE: vmagent: reduce memory usage and CPU usage when Prometheus staleness tracking is enabled for metrics exported from the deleted or disappeared scrape targets.
* FEATURE: take into account failed queries in `vm_request_duration_seconds` summary at `/metrics`. Previously only successful queries were taken into account. This could result in skewed summary. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1537). * FEATURE: take into account failed queries in `vm_request_duration_seconds` summary at `/metrics`. Previously only successful queries were taken into account. This could result in skewed summary. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1537).
* FEATURE: vmalert: add `-disableAlertgroupLabel` command-line flag for disabling the label with alert group name. This may be needed for proper deduplication in Alertmanager. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1532). * FEATURE: vmalert: add `-disableAlertgroupLabel` command-line flag for disabling the label with alert group name. This may be needed for proper deduplication in Alertmanager. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1532).

View file

@ -56,13 +56,22 @@ var (
// Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/ // Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/
type Config struct { type Config struct {
Global GlobalConfig `yaml:"global"` Global GlobalConfig `yaml:"global,omitempty"`
ScrapeConfigs []ScrapeConfig `yaml:"scrape_configs"` ScrapeConfigs []ScrapeConfig `yaml:"scrape_configs"`
ScrapeConfigFiles []string `yaml:"scrape_config_files"`
// This is set to the directory from where the config has been loaded. // This is set to the directory from where the config has been loaded.
baseDir string baseDir string
} }
func (cfg *Config) marshal() []byte {
data, err := yaml.Marshal(cfg)
if err != nil {
logger.Panicf("BUG: cannot marshal Config: %s", err)
}
return data
}
func (cfg *Config) mustStart() { func (cfg *Config) mustStart() {
startTime := time.Now() startTime := time.Now()
logger.Infof("starting service discovery routines...") logger.Infof("starting service discovery routines...")
@ -229,16 +238,45 @@ func loadStaticConfigs(path string) ([]StaticConfig, error) {
} }
// loadConfig loads Prometheus config from the given path. // loadConfig loads Prometheus config from the given path.
func loadConfig(path string) (cfg *Config, data []byte, err error) { func loadConfig(path string) (*Config, error) {
data, err = ioutil.ReadFile(path) data, err := ioutil.ReadFile(path)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("cannot read Prometheus config from %q: %w", path, err) return nil, fmt.Errorf("cannot read Prometheus config from %q: %w", path, err)
} }
var cfgObj Config var c Config
if err := cfgObj.parse(data, path); err != nil { if err := c.parseData(data, path); err != nil {
return nil, nil, fmt.Errorf("cannot parse Prometheus config from %q: %w", path, err) return nil, fmt.Errorf("cannot parse Prometheus config from %q: %w", path, err)
} }
return &cfgObj, data, nil return &c, nil
}
func loadScrapeConfigFiles(baseDir string, scrapeConfigFiles []string) ([]ScrapeConfig, error) {
var scrapeConfigs []ScrapeConfig
for _, filePath := range scrapeConfigFiles {
filePath := getFilepath(baseDir, filePath)
paths := []string{filePath}
if strings.Contains(filePath, "*") {
ps, err := filepath.Glob(filePath)
if err != nil {
return nil, fmt.Errorf("invalid pattern %q in `scrape_config_files`: %w", filePath, err)
}
sort.Strings(ps)
paths = ps
}
for _, path := range paths {
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("cannot load %q from `scrape_config_files`: %w", filePath, err)
}
data = envtemplate.Replace(data)
var scs []ScrapeConfig
if err = yaml.UnmarshalStrict(data, &scs); err != nil {
return nil, fmt.Errorf("cannot parse %q from `scrape_config_files`: %w", filePath, err)
}
scrapeConfigs = append(scrapeConfigs, scs...)
}
}
return scrapeConfigs, nil
} }
// IsDryRun returns true if -promscrape.config.dryRun command-line flag is set // IsDryRun returns true if -promscrape.config.dryRun command-line flag is set
@ -246,7 +284,7 @@ func IsDryRun() bool {
return *dryRun return *dryRun
} }
func (cfg *Config) parse(data []byte, path string) error { func (cfg *Config) parseData(data []byte, path string) error {
if err := unmarshalMaybeStrict(data, cfg); err != nil { if err := unmarshalMaybeStrict(data, cfg); err != nil {
return fmt.Errorf("cannot unmarshal data: %w", err) return fmt.Errorf("cannot unmarshal data: %w", err)
} }
@ -255,6 +293,26 @@ func (cfg *Config) parse(data []byte, path string) error {
return fmt.Errorf("cannot obtain abs path for %q: %w", path, err) return fmt.Errorf("cannot obtain abs path for %q: %w", path, err)
} }
cfg.baseDir = filepath.Dir(absPath) cfg.baseDir = filepath.Dir(absPath)
// Load cfg.ScrapeConfigFiles into c.ScrapeConfigs
scs, err := loadScrapeConfigFiles(cfg.baseDir, cfg.ScrapeConfigFiles)
if err != nil {
return fmt.Errorf("cannot load `scrape_config_files` from %q: %w", path, err)
}
cfg.ScrapeConfigFiles = nil
cfg.ScrapeConfigs = append(cfg.ScrapeConfigs, scs...)
// Check that all the scrape configs have unique JobName
m := make(map[string]struct{}, len(cfg.ScrapeConfigs))
for i := range cfg.ScrapeConfigs {
jobName := cfg.ScrapeConfigs[i].JobName
if _, ok := m[jobName]; ok {
return fmt.Errorf("duplicate `job_name` in `scrape_configs` loaded from %q: %q", path, jobName)
}
m[jobName] = struct{}{}
}
// Initialize cfg.ScrapeConfigs
for i := range cfg.ScrapeConfigs { for i := range cfg.ScrapeConfigs {
sc := &cfg.ScrapeConfigs[i] sc := &cfg.ScrapeConfigs[i]
swc, err := getScrapeWorkConfig(sc, cfg.baseDir, &cfg.Global) swc, err := getScrapeWorkConfig(sc, cfg.baseDir, &cfg.Global)

View file

@ -67,7 +67,15 @@ func TestLoadStaticConfigs(t *testing.T) {
} }
func TestLoadConfig(t *testing.T) { func TestLoadConfig(t *testing.T) {
cfg, _, err := loadConfig("testdata/prometheus.yml") cfg, err := loadConfig("testdata/prometheus.yml")
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if cfg == nil {
t.Fatalf("expecting non-nil config")
}
cfg, err = loadConfig("testdata/prometheus-with-scrape-config-files.yml")
if err != nil { if err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
@ -76,7 +84,7 @@ func TestLoadConfig(t *testing.T) {
} }
// Try loading non-existing file // Try loading non-existing file
cfg, _, err = loadConfig("testdata/non-existing-file") cfg, err = loadConfig("testdata/non-existing-file")
if err == nil { if err == nil {
t.Fatalf("expecting non-nil error") t.Fatalf("expecting non-nil error")
} }
@ -85,7 +93,7 @@ func TestLoadConfig(t *testing.T) {
} }
// Try loading invalid file // Try loading invalid file
cfg, _, err = loadConfig("testdata/file_sd_1.yml") cfg, err = loadConfig("testdata/file_sd_1.yml")
if err == nil { if err == nil {
t.Fatalf("expecting non-nil error") t.Fatalf("expecting non-nil error")
} }
@ -114,7 +122,7 @@ scrape_configs:
replacement: black:9115 # The blackbox exporter's real hostname:port.% replacement: black:9115 # The blackbox exporter's real hostname:port.%
` `
var cfg Config var cfg Config
if err := cfg.parse([]byte(data), "sss"); err != nil { if err := cfg.parseData([]byte(data), "sss"); err != nil {
t.Fatalf("cannot parase data: %s", err) t.Fatalf("cannot parase data: %s", err)
} }
sws := cfg.getStaticScrapeWork() sws := cfg.getStaticScrapeWork()
@ -170,7 +178,7 @@ scrape_configs:
- files: [testdata/file_sd.json] - files: [testdata/file_sd.json]
` `
var cfg Config var cfg Config
if err := cfg.parse([]byte(data), "sss"); err != nil { if err := cfg.parseData([]byte(data), "sss"); err != nil {
t.Fatalf("cannot parase data: %s", err) t.Fatalf("cannot parase data: %s", err)
} }
sws := cfg.getFileSDScrapeWork(nil) sws := cfg.getFileSDScrapeWork(nil)
@ -186,7 +194,7 @@ scrape_configs:
- files: [testdata/file_sd_1.yml] - files: [testdata/file_sd_1.yml]
` `
var cfgNew Config var cfgNew Config
if err := cfgNew.parse([]byte(dataNew), "sss"); err != nil { if err := cfgNew.parseData([]byte(dataNew), "sss"); err != nil {
t.Fatalf("cannot parse data: %s", err) t.Fatalf("cannot parse data: %s", err)
} }
swsNew := cfgNew.getFileSDScrapeWork(sws) swsNew := cfgNew.getFileSDScrapeWork(sws)
@ -201,7 +209,7 @@ scrape_configs:
file_sd_configs: file_sd_configs:
- files: [testdata/prometheus.yml] - files: [testdata/prometheus.yml]
` `
if err := cfg.parse([]byte(data), "sss"); err != nil { if err := cfg.parseData([]byte(data), "sss"); err != nil {
t.Fatalf("cannot parse data: %s", err) t.Fatalf("cannot parse data: %s", err)
} }
sws = cfg.getFileSDScrapeWork(swsNew) sws = cfg.getFileSDScrapeWork(swsNew)
@ -216,7 +224,7 @@ scrape_configs:
file_sd_configs: file_sd_configs:
- files: [testdata/empty_target_file_sd.yml] - files: [testdata/empty_target_file_sd.yml]
` `
if err := cfg.parse([]byte(data), "sss"); err != nil { if err := cfg.parseData([]byte(data), "sss"); err != nil {
t.Fatalf("cannot parse data: %s", err) t.Fatalf("cannot parse data: %s", err)
} }
sws = cfg.getFileSDScrapeWork(swsNew) sws = cfg.getFileSDScrapeWork(swsNew)
@ -227,7 +235,7 @@ scrape_configs:
func getFileSDScrapeWork(data []byte, path string) ([]*ScrapeWork, error) { func getFileSDScrapeWork(data []byte, path string) ([]*ScrapeWork, error) {
var cfg Config var cfg Config
if err := cfg.parse(data, path); err != nil { if err := cfg.parseData(data, path); err != nil {
return nil, fmt.Errorf("cannot parse data: %w", err) return nil, fmt.Errorf("cannot parse data: %w", err)
} }
return cfg.getFileSDScrapeWork(nil), nil return cfg.getFileSDScrapeWork(nil), nil
@ -235,7 +243,7 @@ func getFileSDScrapeWork(data []byte, path string) ([]*ScrapeWork, error) {
func getStaticScrapeWork(data []byte, path string) ([]*ScrapeWork, error) { func getStaticScrapeWork(data []byte, path string) ([]*ScrapeWork, error) {
var cfg Config var cfg Config
if err := cfg.parse(data, path); err != nil { if err := cfg.parseData(data, path); err != nil {
return nil, fmt.Errorf("cannot parse data: %w", err) return nil, fmt.Errorf("cannot parse data: %w", err)
} }
return cfg.getStaticScrapeWork(), nil return cfg.getStaticScrapeWork(), nil
@ -263,6 +271,17 @@ scrape_configs:
- targets: ["foo"] - targets: ["foo"]
`) `)
// Duplicate job_name
f(`
scrape_configs:
- job_name: foo
static_configs:
targets: ["foo"]
- job_name: foo
static_configs:
targets: ["bar"]
`)
// Invalid scheme // Invalid scheme
f(` f(`
scrape_configs: scrape_configs:
@ -487,6 +506,14 @@ scrape_configs:
static_configs: static_configs:
- targets: ["s"] - targets: ["s"]
`) `)
// Invalid scrape_config_files contents
f(`
scrape_config_files:
- job_name: aa
static_configs:
- targets: ["s"]
`)
} }
func resetNonEssentialFields(sws []*ScrapeWork) { func resetNonEssentialFields(sws []*ScrapeWork) {

View file

@ -42,7 +42,7 @@ func CheckConfig() error {
if *promscrapeConfigFile == "" { if *promscrapeConfigFile == "" {
return fmt.Errorf("missing -promscrape.config option") return fmt.Errorf("missing -promscrape.config option")
} }
_, _, err := loadConfig(*promscrapeConfigFile) _, err := loadConfig(*promscrapeConfigFile)
return err return err
} }
@ -84,10 +84,11 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
sighupCh := procutil.NewSighupChan() sighupCh := procutil.NewSighupChan()
logger.Infof("reading Prometheus configs from %q", configFile) logger.Infof("reading Prometheus configs from %q", configFile)
cfg, data, err := loadConfig(configFile) cfg, err := loadConfig(configFile)
if err != nil { if err != nil {
logger.Fatalf("cannot read %q: %s", configFile, err) logger.Fatalf("cannot read %q: %s", configFile, err)
} }
data := cfg.marshal()
cfg.mustStart() cfg.mustStart()
scs := newScrapeConfigs(pushData) scs := newScrapeConfigs(pushData)
@ -117,11 +118,12 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
select { select {
case <-sighupCh: case <-sighupCh:
logger.Infof("SIGHUP received; reloading Prometheus configs from %q", configFile) logger.Infof("SIGHUP received; reloading Prometheus configs from %q", configFile)
cfgNew, dataNew, err := loadConfig(configFile) cfgNew, err := loadConfig(configFile)
if err != nil { if err != nil {
logger.Errorf("cannot read %q on SIGHUP: %s; continuing with the previous config", configFile, err) logger.Errorf("cannot read %q on SIGHUP: %s; continuing with the previous config", configFile, err)
goto waitForChans goto waitForChans
} }
dataNew := cfgNew.marshal()
if bytes.Equal(data, dataNew) { if bytes.Equal(data, dataNew) {
logger.Infof("nothing changed in %q", configFile) logger.Infof("nothing changed in %q", configFile)
goto waitForChans goto waitForChans
@ -131,11 +133,12 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
cfg = cfgNew cfg = cfgNew
data = dataNew data = dataNew
case <-tickerCh: case <-tickerCh:
cfgNew, dataNew, err := loadConfig(configFile) cfgNew, err := loadConfig(configFile)
if err != nil { if err != nil {
logger.Errorf("cannot read %q: %s; continuing with the previous config", configFile, err) logger.Errorf("cannot read %q: %s; continuing with the previous config", configFile, err)
goto waitForChans goto waitForChans
} }
dataNew := cfgNew.marshal()
if bytes.Equal(data, dataNew) { if bytes.Equal(data, dataNew) {
// Nothing changed since the previous loadConfig // Nothing changed since the previous loadConfig
goto waitForChans goto waitForChans

View file

@ -0,0 +1,8 @@
scrape_configs:
- job_name: foo
kubernetes_sd_configs:
- role: pod
scrape_config_files:
- scrape_configs.yml
- scrape_config_files/*.yml

View file

@ -0,0 +1,3 @@
- job_name: job1
static_configs:
- targets: [foo, bar]

View file

@ -0,0 +1,3 @@
- job_name: job2
static_configs:
- targets: [foo, bar]

View file

@ -0,0 +1,3 @@
- job_name: bar
static_configs:
- targets: [foo, bar]