mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-01 14:47:38 +00:00
f160693c99
* lib/promscrape: fix the problem with scrape work duplicates when file_sd_config can't be read * lib/promscrape: clarified comment * lib/promscrape: made better approach to handle a problem with growing []*ScrapeWork on each error when loading config * lib/promscrape: added CHANGELOG.md * Update docs/CHANGELOG.md --------- Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
1431 lines
49 KiB
Go
1431 lines
49 KiB
Go
package promscrape
|
|
|
|
import (
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"net/url"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/azure"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/digitalocean"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/docker"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dockerswarm"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/ec2"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/eureka"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/gce"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/http"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/kubernetes"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/openstack"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/proxy"
|
|
"github.com/VictoriaMetrics/metrics"
|
|
"github.com/cespare/xxhash/v2"
|
|
"gopkg.in/yaml.v2"
|
|
)
|
|
|
|
var (
|
|
strictParse = flag.Bool("promscrape.config.strictParse", true, "Whether to deny unsupported fields in -promscrape.config . Set to false in order to silently skip unsupported fields")
|
|
dryRun = flag.Bool("promscrape.config.dryRun", false, "Checks -promscrape.config file for errors and unsupported fields and then exits. "+
|
|
"Returns non-zero exit code on parsing errors and emits these errors to stderr. "+
|
|
"See also -promscrape.config.strictParse command-line flag. "+
|
|
"Pass -loggerLevel=ERROR if you don't need to see info messages in the output.")
|
|
dropOriginalLabels = flag.Bool("promscrape.dropOriginalLabels", false, "Whether to drop original labels for scrape targets at /targets and /api/v1/targets pages. "+
|
|
"This may be needed for reducing memory usage when original labels for big number of scrape targets occupy big amounts of memory. "+
|
|
"Note that this reduces debuggability for improper per-target relabeling configs")
|
|
clusterMembersCount = flag.Int("promscrape.cluster.membersCount", 0, "The number of members in a cluster of scrapers. "+
|
|
"Each member must have an unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . "+
|
|
"Each member then scrapes roughly 1/N of all the targets. By default cluster scraping is disabled, i.e. a single scraper scrapes all the targets")
|
|
clusterMemberNum = flag.String("promscrape.cluster.memberNum", "0", "The number of number in the cluster of scrapers. "+
|
|
"It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. "+
|
|
"Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name")
|
|
clusterReplicationFactor = flag.Int("promscrape.cluster.replicationFactor", 1, "The number of members in the cluster, which scrape the same targets. "+
|
|
"If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/#deduplication")
|
|
clusterName = flag.String("promscrape.cluster.name", "", "Optional name of the cluster. If multiple vmagent clusters scrape the same targets, "+
|
|
"then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. "+
|
|
"See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2679")
|
|
)
|
|
|
|
var clusterMemberID int
|
|
|
|
func mustInitClusterMemberID() {
|
|
s := *clusterMemberNum
|
|
// special case for kubernetes deployment, where pod-name formatted at some-pod-name-1
|
|
// obtain memberNum from last segment
|
|
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2359
|
|
if idx := strings.LastIndexByte(s, '-'); idx >= 0 {
|
|
s = s[idx+1:]
|
|
}
|
|
n, err := strconv.Atoi(s)
|
|
if err != nil {
|
|
logger.Fatalf("cannot parse -promscrape.cluster.memberNum=%q: %s", *clusterMemberNum, err)
|
|
}
|
|
clusterMemberID = n
|
|
}
|
|
|
|
// Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
|
type Config struct {
|
|
Global GlobalConfig `yaml:"global,omitempty"`
|
|
ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"`
|
|
ScrapeConfigFiles []string `yaml:"scrape_config_files,omitempty"`
|
|
|
|
// This is set to the directory from where the config has been loaded.
|
|
baseDir string
|
|
}
|
|
|
|
func (cfg *Config) unmarshal(data []byte, isStrict bool) error {
|
|
data = envtemplate.Replace(data)
|
|
var err error
|
|
if isStrict {
|
|
if err = yaml.UnmarshalStrict(data, cfg); err != nil {
|
|
err = fmt.Errorf("%w; pass -promscrape.config.strictParse=false command-line flag for ignoring unknown fields in yaml config", err)
|
|
}
|
|
} else {
|
|
err = yaml.Unmarshal(data, cfg)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (cfg *Config) marshal() []byte {
|
|
data, err := yaml.Marshal(cfg)
|
|
if err != nil {
|
|
logger.Panicf("BUG: cannot marshal Config: %s", err)
|
|
}
|
|
return data
|
|
}
|
|
|
|
func (cfg *Config) mustStart() {
|
|
startTime := time.Now()
|
|
logger.Infof("starting service discovery routines...")
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
sc.mustStart(cfg.baseDir)
|
|
}
|
|
jobNames := cfg.getJobNames()
|
|
tsmGlobal.registerJobNames(jobNames)
|
|
logger.Infof("started service discovery routines in %.3f seconds", time.Since(startTime).Seconds())
|
|
}
|
|
|
|
func (cfg *Config) mustRestart(prevCfg *Config) {
|
|
startTime := time.Now()
|
|
logger.Infof("restarting service discovery routines...")
|
|
|
|
prevScrapeCfgByName := make(map[string]*ScrapeConfig, len(prevCfg.ScrapeConfigs))
|
|
for _, scPrev := range prevCfg.ScrapeConfigs {
|
|
prevScrapeCfgByName[scPrev.JobName] = scPrev
|
|
}
|
|
|
|
// Restart all the scrape jobs on Global config change.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2884
|
|
needGlobalRestart := !areEqualGlobalConfigs(&cfg.Global, &prevCfg.Global)
|
|
|
|
// Loop over the the new jobs, start new ones and restart updated ones.
|
|
var started, stopped, restarted int
|
|
currentJobNames := make(map[string]struct{}, len(cfg.ScrapeConfigs))
|
|
for i, sc := range cfg.ScrapeConfigs {
|
|
currentJobNames[sc.JobName] = struct{}{}
|
|
scPrev := prevScrapeCfgByName[sc.JobName]
|
|
if scPrev == nil {
|
|
// New scrape config has been appeared. Start it.
|
|
sc.mustStart(cfg.baseDir)
|
|
started++
|
|
continue
|
|
}
|
|
if !needGlobalRestart && areEqualScrapeConfigs(scPrev, sc) {
|
|
// The scrape config didn't change, so no need to restart it.
|
|
// Use the reference to the previous job, so it could be stopped properly later.
|
|
cfg.ScrapeConfigs[i] = scPrev
|
|
} else {
|
|
// The scrape config has been changed. Stop the previous scrape config and start new one.
|
|
scPrev.mustStop()
|
|
sc.mustStart(cfg.baseDir)
|
|
restarted++
|
|
}
|
|
}
|
|
// Stop preious jobs which weren't found in the current configuration.
|
|
for _, scPrev := range prevCfg.ScrapeConfigs {
|
|
if _, ok := currentJobNames[scPrev.JobName]; !ok {
|
|
scPrev.mustStop()
|
|
stopped++
|
|
}
|
|
}
|
|
jobNames := cfg.getJobNames()
|
|
tsmGlobal.registerJobNames(jobNames)
|
|
logger.Infof("restarted service discovery routines in %.3f seconds, stopped=%d, started=%d, restarted=%d", time.Since(startTime).Seconds(), stopped, started, restarted)
|
|
}
|
|
|
|
func areEqualGlobalConfigs(a, b *GlobalConfig) bool {
|
|
sa := a.marshalJSON()
|
|
sb := b.marshalJSON()
|
|
return string(sa) == string(sb)
|
|
}
|
|
|
|
func areEqualScrapeConfigs(a, b *ScrapeConfig) bool {
|
|
sa := a.marshalJSON()
|
|
sb := b.marshalJSON()
|
|
return string(sa) == string(sb)
|
|
}
|
|
|
|
func (sc *ScrapeConfig) unmarshalJSON(data []byte) error {
|
|
return json.Unmarshal(data, sc)
|
|
}
|
|
|
|
func (sc *ScrapeConfig) marshalJSON() []byte {
|
|
data, err := json.Marshal(sc)
|
|
if err != nil {
|
|
logger.Panicf("BUG: cannot marshal ScrapeConfig: %s", err)
|
|
}
|
|
return data
|
|
}
|
|
|
|
func (gc *GlobalConfig) marshalJSON() []byte {
|
|
data, err := json.Marshal(gc)
|
|
if err != nil {
|
|
logger.Panicf("BUG: cannot marshal GlobalConfig: %s", err)
|
|
}
|
|
return data
|
|
}
|
|
|
|
func (cfg *Config) mustStop() {
|
|
startTime := time.Now()
|
|
logger.Infof("stopping service discovery routines...")
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
sc.mustStop()
|
|
}
|
|
logger.Infof("stopped service discovery routines in %.3f seconds", time.Since(startTime).Seconds())
|
|
}
|
|
|
|
// getJobNames returns all the scrape job names from the cfg.
|
|
func (cfg *Config) getJobNames() []string {
|
|
a := make([]string, 0, len(cfg.ScrapeConfigs))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
a = append(a, sc.JobName)
|
|
}
|
|
return a
|
|
}
|
|
|
|
// GlobalConfig represents essential parts for `global` section of Prometheus config.
|
|
//
|
|
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
|
type GlobalConfig struct {
|
|
ScrapeInterval *promutils.Duration `yaml:"scrape_interval,omitempty"`
|
|
ScrapeTimeout *promutils.Duration `yaml:"scrape_timeout,omitempty"`
|
|
ExternalLabels map[string]string `yaml:"external_labels,omitempty"`
|
|
}
|
|
|
|
// ScrapeConfig represents essential parts for `scrape_config` section of Prometheus config.
|
|
//
|
|
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
|
|
type ScrapeConfig struct {
|
|
JobName string `yaml:"job_name"`
|
|
ScrapeInterval *promutils.Duration `yaml:"scrape_interval,omitempty"`
|
|
ScrapeTimeout *promutils.Duration `yaml:"scrape_timeout,omitempty"`
|
|
MetricsPath string `yaml:"metrics_path,omitempty"`
|
|
HonorLabels bool `yaml:"honor_labels,omitempty"`
|
|
HonorTimestamps *bool `yaml:"honor_timestamps,omitempty"`
|
|
FollowRedirects *bool `yaml:"follow_redirects,omitempty"`
|
|
Scheme string `yaml:"scheme,omitempty"`
|
|
Params map[string][]string `yaml:"params,omitempty"`
|
|
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
|
ProxyURL *proxy.URL `yaml:"proxy_url,omitempty"`
|
|
RelabelConfigs []promrelabel.RelabelConfig `yaml:"relabel_configs,omitempty"`
|
|
MetricRelabelConfigs []promrelabel.RelabelConfig `yaml:"metric_relabel_configs,omitempty"`
|
|
SampleLimit int `yaml:"sample_limit,omitempty"`
|
|
|
|
AzureSDConfigs []azure.SDConfig `yaml:"azure_sd_configs,omitempty"`
|
|
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
|
|
DigitaloceanSDConfigs []digitalocean.SDConfig `yaml:"digitalocean_sd_configs,omitempty"`
|
|
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
|
|
DockerSDConfigs []docker.SDConfig `yaml:"docker_sd_configs,omitempty"`
|
|
DockerSwarmSDConfigs []dockerswarm.SDConfig `yaml:"dockerswarm_sd_configs,omitempty"`
|
|
EC2SDConfigs []ec2.SDConfig `yaml:"ec2_sd_configs,omitempty"`
|
|
EurekaSDConfigs []eureka.SDConfig `yaml:"eureka_sd_configs,omitempty"`
|
|
FileSDConfigs []FileSDConfig `yaml:"file_sd_configs,omitempty"`
|
|
GCESDConfigs []gce.SDConfig `yaml:"gce_sd_configs,omitempty"`
|
|
HTTPSDConfigs []http.SDConfig `yaml:"http_sd_configs,omitempty"`
|
|
KubernetesSDConfigs []kubernetes.SDConfig `yaml:"kubernetes_sd_configs,omitempty"`
|
|
OpenStackSDConfigs []openstack.SDConfig `yaml:"openstack_sd_configs,omitempty"`
|
|
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
|
|
|
// These options are supported only by lib/promscrape.
|
|
RelabelDebug bool `yaml:"relabel_debug,omitempty"`
|
|
MetricRelabelDebug bool `yaml:"metric_relabel_debug,omitempty"`
|
|
DisableCompression bool `yaml:"disable_compression,omitempty"`
|
|
DisableKeepAlive bool `yaml:"disable_keepalive,omitempty"`
|
|
StreamParse bool `yaml:"stream_parse,omitempty"`
|
|
ScrapeAlignInterval *promutils.Duration `yaml:"scrape_align_interval,omitempty"`
|
|
ScrapeOffset *promutils.Duration `yaml:"scrape_offset,omitempty"`
|
|
SeriesLimit int `yaml:"series_limit,omitempty"`
|
|
ProxyClientConfig promauth.ProxyClientConfig `yaml:",inline"`
|
|
|
|
// This is set in loadConfig
|
|
swc *scrapeWorkConfig
|
|
}
|
|
|
|
func (sc *ScrapeConfig) mustStart(baseDir string) {
|
|
swosFunc := func(metaLabels map[string]string) interface{} {
|
|
target := metaLabels["__address__"]
|
|
sw, err := sc.swc.getScrapeWork(target, nil, metaLabels)
|
|
if err != nil {
|
|
logger.Errorf("cannot create kubernetes_sd_config target %q for job_name %q: %s", target, sc.swc.jobName, err)
|
|
return nil
|
|
}
|
|
return sw
|
|
}
|
|
for i := range sc.KubernetesSDConfigs {
|
|
sc.KubernetesSDConfigs[i].MustStart(baseDir, swosFunc)
|
|
}
|
|
}
|
|
|
|
func (sc *ScrapeConfig) mustStop() {
|
|
for i := range sc.AzureSDConfigs {
|
|
sc.AzureSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.ConsulSDConfigs {
|
|
sc.ConsulSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.DigitaloceanSDConfigs {
|
|
sc.DigitaloceanSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.DNSSDConfigs {
|
|
sc.DNSSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.DockerSDConfigs {
|
|
sc.DockerSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.DockerSwarmSDConfigs {
|
|
sc.DockerSwarmSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.EC2SDConfigs {
|
|
sc.EC2SDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.EurekaSDConfigs {
|
|
sc.EurekaSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.GCESDConfigs {
|
|
sc.GCESDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.HTTPSDConfigs {
|
|
sc.HTTPSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.KubernetesSDConfigs {
|
|
sc.KubernetesSDConfigs[i].MustStop()
|
|
}
|
|
for i := range sc.OpenStackSDConfigs {
|
|
sc.OpenStackSDConfigs[i].MustStop()
|
|
}
|
|
}
|
|
|
|
// FileSDConfig represents file-based service discovery config.
|
|
//
|
|
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#file_sd_config
|
|
type FileSDConfig struct {
|
|
Files []string `yaml:"files"`
|
|
// `refresh_interval` is ignored. See `-promscrape.fileSDCheckInterval`
|
|
}
|
|
|
|
// StaticConfig represents essential parts for `static_config` section of Prometheus config.
|
|
//
|
|
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config
|
|
type StaticConfig struct {
|
|
Targets []string `yaml:"targets"`
|
|
Labels map[string]string `yaml:"labels,omitempty"`
|
|
}
|
|
|
|
func loadStaticConfigs(path string) ([]StaticConfig, error) {
|
|
data, err := fs.ReadFileOrHTTP(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot read `static_configs` from %q: %w", path, err)
|
|
}
|
|
data = envtemplate.Replace(data)
|
|
var stcs []StaticConfig
|
|
if err := yaml.UnmarshalStrict(data, &stcs); err != nil {
|
|
return nil, fmt.Errorf("cannot unmarshal `static_configs` from %q: %w", path, err)
|
|
}
|
|
return stcs, nil
|
|
}
|
|
|
|
// loadConfig loads Prometheus config from the given path.
|
|
func loadConfig(path string) (*Config, []byte, error) {
|
|
data, err := fs.ReadFileOrHTTP(path)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("cannot read Prometheus config from %q: %w", path, err)
|
|
}
|
|
var c Config
|
|
dataNew, err := c.parseData(data, path)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("cannot parse Prometheus config from %q: %w", path, err)
|
|
}
|
|
return &c, dataNew, nil
|
|
}
|
|
|
|
func loadScrapeConfigFiles(baseDir string, scrapeConfigFiles []string) ([]*ScrapeConfig, []byte, error) {
|
|
var scrapeConfigs []*ScrapeConfig
|
|
var scsData []byte
|
|
for _, filePath := range scrapeConfigFiles {
|
|
filePath := fs.GetFilepath(baseDir, filePath)
|
|
paths := []string{filePath}
|
|
if strings.Contains(filePath, "*") {
|
|
ps, err := filepath.Glob(filePath)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("invalid pattern %q: %w", filePath, err)
|
|
}
|
|
sort.Strings(ps)
|
|
paths = ps
|
|
}
|
|
for _, path := range paths {
|
|
data, err := fs.ReadFileOrHTTP(path)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("cannot load %q: %w", path, err)
|
|
}
|
|
data = envtemplate.Replace(data)
|
|
var scs []*ScrapeConfig
|
|
if err = yaml.UnmarshalStrict(data, &scs); err != nil {
|
|
return nil, nil, fmt.Errorf("cannot parse %q: %w", path, err)
|
|
}
|
|
scrapeConfigs = append(scrapeConfigs, scs...)
|
|
scsData = append(scsData, '\n')
|
|
scsData = append(scsData, data...)
|
|
}
|
|
}
|
|
return scrapeConfigs, scsData, nil
|
|
}
|
|
|
|
// IsDryRun returns true if -promscrape.config.dryRun command-line flag is set
|
|
func IsDryRun() bool {
|
|
return *dryRun
|
|
}
|
|
|
|
func (cfg *Config) parseData(data []byte, path string) ([]byte, error) {
|
|
if err := cfg.unmarshal(data, *strictParse); err != nil {
|
|
return nil, fmt.Errorf("cannot unmarshal data: %w", err)
|
|
}
|
|
absPath, err := filepath.Abs(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot obtain abs path for %q: %w", path, err)
|
|
}
|
|
cfg.baseDir = filepath.Dir(absPath)
|
|
|
|
// Load cfg.ScrapeConfigFiles into c.ScrapeConfigs
|
|
scs, scsData, err := loadScrapeConfigFiles(cfg.baseDir, cfg.ScrapeConfigFiles)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot load `scrape_config_files` from %q: %w", path, err)
|
|
}
|
|
cfg.ScrapeConfigFiles = nil
|
|
cfg.ScrapeConfigs = append(cfg.ScrapeConfigs, scs...)
|
|
dataNew := append(data, scsData...)
|
|
|
|
// Check that all the scrape configs have unique JobName
|
|
m := make(map[string]struct{}, len(cfg.ScrapeConfigs))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
jobName := sc.JobName
|
|
if _, ok := m[jobName]; ok {
|
|
return nil, fmt.Errorf("duplicate `job_name` in `scrape_configs` loaded from %q: %q", path, jobName)
|
|
}
|
|
m[jobName] = struct{}{}
|
|
}
|
|
|
|
// Initialize cfg.ScrapeConfigs
|
|
for i, sc := range cfg.ScrapeConfigs {
|
|
// Make a copy of sc in order to remove references to `data` memory.
|
|
// This should prevent from memory leaks on config reload.
|
|
sc = sc.clone()
|
|
cfg.ScrapeConfigs[i] = sc
|
|
|
|
swc, err := getScrapeWorkConfig(sc, cfg.baseDir, &cfg.Global)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse `scrape_config`: %w", err)
|
|
}
|
|
sc.swc = swc
|
|
}
|
|
return dataNew, nil
|
|
}
|
|
|
|
func (sc *ScrapeConfig) clone() *ScrapeConfig {
|
|
data := sc.marshalJSON()
|
|
var scCopy ScrapeConfig
|
|
if err := scCopy.unmarshalJSON(data); err != nil {
|
|
logger.Panicf("BUG: cannot unmarshal scrape config: %s", err)
|
|
}
|
|
return &scCopy
|
|
}
|
|
|
|
func getSWSByJob(sws []*ScrapeWork) map[string][]*ScrapeWork {
|
|
m := make(map[string][]*ScrapeWork)
|
|
for _, sw := range sws {
|
|
m[sw.jobNameOriginal] = append(m[sw.jobNameOriginal], sw)
|
|
}
|
|
return m
|
|
}
|
|
|
|
// getAzureSDScrapeWork returns `azure_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getAzureSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.AzureSDConfigs {
|
|
sdc := &sc.AzureSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "azure_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering azure targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getConsulSDScrapeWork returns `consul_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getConsulSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.ConsulSDConfigs {
|
|
sdc := &sc.ConsulSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "consul_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering consul targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getDigitalOceanDScrapeWork returns `digitalocean_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getDigitalOceanDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.DigitaloceanSDConfigs {
|
|
sdc := &sc.DigitaloceanSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "digitalocean_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering digitalocean targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getDNSSDScrapeWork returns `dns_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getDNSSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.DNSSDConfigs {
|
|
sdc := &sc.DNSSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "dns_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering dns targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getDockerSDScrapeWork returns `docker_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getDockerSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.DockerSDConfigs {
|
|
sdc := &sc.DockerSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "docker_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering docker targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getDockerSwarmSDScrapeWork returns `dockerswarm_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getDockerSwarmSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.DockerSwarmSDConfigs {
|
|
sdc := &sc.DockerSwarmSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "dockerswarm_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering dockerswarm targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getEC2SDScrapeWork returns `ec2_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getEC2SDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.EC2SDConfigs {
|
|
sdc := &sc.EC2SDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "ec2_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering ec2 targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getEurekaSDScrapeWork returns `eureka_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getEurekaSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.EurekaSDConfigs {
|
|
sdc := &sc.EurekaSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "eureka_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering eureka targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getFileSDScrapeWork returns `file_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getFileSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
// Create a map for the previous scrape work.
|
|
swsMapPrev := make(map[string][]*ScrapeWork)
|
|
for _, sw := range prev {
|
|
filepath := promrelabel.GetLabelValueByName(sw.Labels, "__vm_filepath")
|
|
if len(filepath) == 0 {
|
|
logger.Panicf("BUG: missing `__vm_filepath` label")
|
|
} else {
|
|
// user can define many file_sd_config with the same path and it will produce the same ScrapeWorks
|
|
// in this case we just make key for map as job name and filepath with ":" delimiter,
|
|
// it will create each job with its ScrapeWorks
|
|
key := fmt.Sprintf("%s:%s", sw.Job(), filepath)
|
|
swsMapPrev[key] = append(swsMapPrev[key], sw)
|
|
}
|
|
}
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
configPaths := make(map[string]struct{}, len(sc.FileSDConfigs))
|
|
for j := range sc.FileSDConfigs {
|
|
sdc := &sc.FileSDConfigs[j]
|
|
dst = sdc.appendScrapeWork(dst, swsMapPrev, cfg.baseDir, sc.swc, configPaths)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getGCESDScrapeWork returns `gce_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getGCESDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.GCESDConfigs {
|
|
sdc := &sc.GCESDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "gce_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering gce targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getHTTPDScrapeWork returns `http_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getHTTPDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.HTTPSDConfigs {
|
|
sdc := &sc.HTTPSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "http_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering http targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getKubernetesSDScrapeWork returns `kubernetes_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getKubernetesSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.KubernetesSDConfigs {
|
|
sdc := &sc.KubernetesSDConfigs[j]
|
|
swos, err := sdc.GetScrapeWorkObjects()
|
|
if err != nil {
|
|
logger.Errorf("skipping kubernetes_sd_config targets for job_name %q because of error: %s", sc.swc.jobName, err)
|
|
ok = false
|
|
break
|
|
}
|
|
for _, swo := range swos {
|
|
sw := swo.(*ScrapeWork)
|
|
dst = append(dst, sw)
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering kubernetes_sd_config targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getOpenStackSDScrapeWork returns `openstack_sd_configs` ScrapeWork from cfg.
|
|
func (cfg *Config) getOpenStackSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
|
|
swsPrevByJob := getSWSByJob(prev)
|
|
dst := make([]*ScrapeWork, 0, len(prev))
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
dstLen := len(dst)
|
|
ok := true
|
|
for j := range sc.OpenStackSDConfigs {
|
|
sdc := &sc.OpenStackSDConfigs[j]
|
|
var okLocal bool
|
|
dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "openstack_sd_config")
|
|
if ok {
|
|
ok = okLocal
|
|
}
|
|
}
|
|
if ok {
|
|
continue
|
|
}
|
|
swsPrev := swsPrevByJob[sc.swc.jobName]
|
|
if len(swsPrev) > 0 {
|
|
logger.Errorf("there were errors when discovering openstack targets for job %q, so preserving the previous targets", sc.swc.jobName)
|
|
dst = append(dst[:dstLen], swsPrev...)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
// getStaticScrapeWork returns `static_configs` ScrapeWork from from cfg.
|
|
func (cfg *Config) getStaticScrapeWork() []*ScrapeWork {
|
|
var dst []*ScrapeWork
|
|
for _, sc := range cfg.ScrapeConfigs {
|
|
for j := range sc.StaticConfigs {
|
|
stc := &sc.StaticConfigs[j]
|
|
dst = stc.appendScrapeWork(dst, sc.swc, nil)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConfig) (*scrapeWorkConfig, error) {
|
|
jobName := sc.JobName
|
|
if jobName == "" {
|
|
return nil, fmt.Errorf("missing `job_name` field in `scrape_config`")
|
|
}
|
|
scrapeInterval := sc.ScrapeInterval.Duration()
|
|
if scrapeInterval <= 0 {
|
|
scrapeInterval = globalCfg.ScrapeInterval.Duration()
|
|
if scrapeInterval <= 0 {
|
|
scrapeInterval = defaultScrapeInterval
|
|
}
|
|
}
|
|
scrapeTimeout := sc.ScrapeTimeout.Duration()
|
|
if scrapeTimeout <= 0 {
|
|
scrapeTimeout = globalCfg.ScrapeTimeout.Duration()
|
|
if scrapeTimeout <= 0 {
|
|
scrapeTimeout = defaultScrapeTimeout
|
|
}
|
|
}
|
|
if scrapeTimeout > scrapeInterval {
|
|
// Limit the `scrape_timeout` with `scrape_interval` like Prometheus does.
|
|
// This guarantees that the scraper can miss only a single scrape if the target sometimes responds slowly.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1281#issuecomment-840538907
|
|
scrapeTimeout = scrapeInterval
|
|
}
|
|
honorLabels := sc.HonorLabels
|
|
honorTimestamps := true
|
|
if sc.HonorTimestamps != nil {
|
|
honorTimestamps = *sc.HonorTimestamps
|
|
}
|
|
denyRedirects := false
|
|
if sc.FollowRedirects != nil {
|
|
denyRedirects = !*sc.FollowRedirects
|
|
}
|
|
metricsPath := sc.MetricsPath
|
|
if metricsPath == "" {
|
|
metricsPath = "/metrics"
|
|
}
|
|
scheme := sc.Scheme
|
|
if scheme == "" {
|
|
scheme = "http"
|
|
}
|
|
if scheme != "http" && scheme != "https" {
|
|
return nil, fmt.Errorf("unexpected `scheme` for `job_name` %q: %q; supported values: http or https", jobName, scheme)
|
|
}
|
|
params := sc.Params
|
|
ac, err := sc.HTTPClientConfig.NewConfig(baseDir)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse auth config for `job_name` %q: %w", jobName, err)
|
|
}
|
|
proxyAC, err := sc.ProxyClientConfig.NewConfig(baseDir)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse proxy auth config for `job_name` %q: %w", jobName, err)
|
|
}
|
|
relabelConfigs, err := promrelabel.ParseRelabelConfigs(sc.RelabelConfigs, sc.RelabelDebug)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse `relabel_configs` for `job_name` %q: %w", jobName, err)
|
|
}
|
|
metricRelabelConfigs, err := promrelabel.ParseRelabelConfigs(sc.MetricRelabelConfigs, sc.MetricRelabelDebug)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse `metric_relabel_configs` for `job_name` %q: %w", jobName, err)
|
|
}
|
|
if (*streamParse || sc.StreamParse) && sc.SampleLimit > 0 {
|
|
return nil, fmt.Errorf("cannot use stream parsing mode when `sample_limit` is set for `job_name` %q", jobName)
|
|
}
|
|
if (*streamParse || sc.StreamParse) && sc.SeriesLimit > 0 {
|
|
return nil, fmt.Errorf("cannot use stream parsing mode when `series_limit` is set for `job_name` %q", jobName)
|
|
}
|
|
swc := &scrapeWorkConfig{
|
|
scrapeInterval: scrapeInterval,
|
|
scrapeIntervalString: scrapeInterval.String(),
|
|
scrapeTimeout: scrapeTimeout,
|
|
scrapeTimeoutString: scrapeTimeout.String(),
|
|
jobName: jobName,
|
|
metricsPath: metricsPath,
|
|
scheme: scheme,
|
|
params: params,
|
|
proxyURL: sc.ProxyURL,
|
|
proxyAuthConfig: proxyAC,
|
|
authConfig: ac,
|
|
honorLabels: honorLabels,
|
|
honorTimestamps: honorTimestamps,
|
|
denyRedirects: denyRedirects,
|
|
externalLabels: globalCfg.ExternalLabels,
|
|
relabelConfigs: relabelConfigs,
|
|
metricRelabelConfigs: metricRelabelConfigs,
|
|
sampleLimit: sc.SampleLimit,
|
|
disableCompression: sc.DisableCompression,
|
|
disableKeepAlive: sc.DisableKeepAlive,
|
|
streamParse: sc.StreamParse,
|
|
scrapeAlignInterval: sc.ScrapeAlignInterval.Duration(),
|
|
scrapeOffset: sc.ScrapeOffset.Duration(),
|
|
seriesLimit: sc.SeriesLimit,
|
|
}
|
|
return swc, nil
|
|
}
|
|
|
|
type scrapeWorkConfig struct {
|
|
scrapeInterval time.Duration
|
|
scrapeIntervalString string
|
|
scrapeTimeout time.Duration
|
|
scrapeTimeoutString string
|
|
jobName string
|
|
metricsPath string
|
|
scheme string
|
|
params map[string][]string
|
|
proxyURL *proxy.URL
|
|
proxyAuthConfig *promauth.Config
|
|
authConfig *promauth.Config
|
|
honorLabels bool
|
|
honorTimestamps bool
|
|
denyRedirects bool
|
|
externalLabels map[string]string
|
|
relabelConfigs *promrelabel.ParsedConfigs
|
|
metricRelabelConfigs *promrelabel.ParsedConfigs
|
|
sampleLimit int
|
|
disableCompression bool
|
|
disableKeepAlive bool
|
|
streamParse bool
|
|
scrapeAlignInterval time.Duration
|
|
scrapeOffset time.Duration
|
|
seriesLimit int
|
|
}
|
|
|
|
type targetLabelsGetter interface {
|
|
GetLabels(baseDir string) ([]map[string]string, error)
|
|
}
|
|
|
|
func appendSDScrapeWork(dst []*ScrapeWork, sdc targetLabelsGetter, baseDir string, swc *scrapeWorkConfig, discoveryType string) ([]*ScrapeWork, bool) {
|
|
targetLabels, err := sdc.GetLabels(baseDir)
|
|
if err != nil {
|
|
logger.Errorf("skipping %s targets for job_name %q because of error: %s", discoveryType, swc.jobName, err)
|
|
return dst, false
|
|
}
|
|
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, discoveryType), true
|
|
}
|
|
|
|
func appendScrapeWorkForTargetLabels(dst []*ScrapeWork, swc *scrapeWorkConfig, targetLabels []map[string]string, discoveryType string) []*ScrapeWork {
|
|
startTime := time.Now()
|
|
// Process targetLabels in parallel in order to reduce processing time for big number of targetLabels.
|
|
type result struct {
|
|
sw *ScrapeWork
|
|
err error
|
|
}
|
|
goroutines := cgroup.AvailableCPUs()
|
|
resultCh := make(chan result, len(targetLabels))
|
|
workCh := make(chan map[string]string, goroutines)
|
|
for i := 0; i < goroutines; i++ {
|
|
go func() {
|
|
for metaLabels := range workCh {
|
|
target := metaLabels["__address__"]
|
|
sw, err := swc.getScrapeWork(target, nil, metaLabels)
|
|
if err != nil {
|
|
err = fmt.Errorf("skipping %s target %q for job_name %q because of error: %w", discoveryType, target, swc.jobName, err)
|
|
}
|
|
resultCh <- result{
|
|
sw: sw,
|
|
err: err,
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
for _, metaLabels := range targetLabels {
|
|
workCh <- metaLabels
|
|
}
|
|
close(workCh)
|
|
for range targetLabels {
|
|
r := <-resultCh
|
|
if r.err != nil {
|
|
logger.Errorf("%s", r.err)
|
|
continue
|
|
}
|
|
if r.sw != nil {
|
|
dst = append(dst, r.sw)
|
|
}
|
|
}
|
|
metrics.GetOrCreateHistogram(fmt.Sprintf("vm_promscrape_target_relabel_duration_seconds{type=%q}", discoveryType)).UpdateDuration(startTime)
|
|
return dst
|
|
}
|
|
|
|
func (sdc *FileSDConfig) appendScrapeWork(dst []*ScrapeWork, swsMapPrev map[string][]*ScrapeWork, baseDir string, swc *scrapeWorkConfig, configPaths map[string]struct{}) []*ScrapeWork {
|
|
for _, file := range sdc.Files {
|
|
pathPattern := fs.GetFilepath(baseDir, file)
|
|
paths := []string{pathPattern}
|
|
if strings.Contains(pathPattern, "*") {
|
|
var err error
|
|
paths, err = filepath.Glob(pathPattern)
|
|
if err != nil {
|
|
// Do not return this error, since other files may contain valid scrape configs.
|
|
logger.Errorf("invalid pattern %q in `files` section: %s; skipping it", file, err)
|
|
continue
|
|
}
|
|
}
|
|
for _, path := range paths {
|
|
// make a key as for previous ScrapeWorks (swsMapPrev map[string][]*ScrapeWork) and show to user
|
|
// warning about identical file_sd_config.
|
|
// We skip it because it will make dst with duplicated ScrapeWork.
|
|
key := fmt.Sprintf("%s:%s", swc.jobName, path)
|
|
if _, ok := configPaths[key]; ok {
|
|
logger.Warnf("file_sd_config contains multiple references to %q, ignoring duplicated entry. please check -promscrape.config and remove duplicated configurations", path)
|
|
continue
|
|
}
|
|
configPaths[key] = struct{}{}
|
|
|
|
stcs, err := loadStaticConfigs(path)
|
|
if err != nil {
|
|
// Do not return this error, since other paths may contain valid scrape configs.
|
|
if sws := swsMapPrev[key]; sws != nil {
|
|
// Re-use the previous valid scrape work for this path.
|
|
logger.Errorf("keeping the previously loaded `static_configs` from %q because of error when re-loading the file: %s", path, err)
|
|
dst = append(dst, sws...)
|
|
} else {
|
|
logger.Errorf("skipping loading `static_configs` from %q because of error: %s", path, err)
|
|
}
|
|
continue
|
|
}
|
|
pathShort := path
|
|
if strings.HasPrefix(pathShort, baseDir) {
|
|
pathShort = path[len(baseDir):]
|
|
if len(pathShort) > 0 && pathShort[0] == filepath.Separator {
|
|
pathShort = pathShort[1:]
|
|
}
|
|
}
|
|
metaLabels := map[string]string{
|
|
"__meta_filepath": pathShort,
|
|
"__vm_filepath": path, // This label is needed for internal promscrape logic
|
|
}
|
|
for i := range stcs {
|
|
dst = stcs[i].appendScrapeWork(dst, swc, metaLabels)
|
|
}
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
func (stc *StaticConfig) appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConfig, metaLabels map[string]string) []*ScrapeWork {
|
|
for _, target := range stc.Targets {
|
|
if target == "" {
|
|
// Do not return this error, since other targets may be valid
|
|
logger.Errorf("`static_configs` target for `job_name` %q cannot be empty; skipping it", swc.jobName)
|
|
continue
|
|
}
|
|
sw, err := swc.getScrapeWork(target, stc.Labels, metaLabels)
|
|
if err != nil {
|
|
// Do not return this error, since other targets may be valid
|
|
logger.Errorf("error when parsing `static_configs` target %q for `job_name` %q: %s; skipping it", target, swc.jobName, err)
|
|
continue
|
|
}
|
|
if sw != nil {
|
|
dst = append(dst, sw)
|
|
}
|
|
}
|
|
return dst
|
|
}
|
|
|
|
func appendScrapeWorkKey(dst []byte, labels []prompbmarshal.Label) []byte {
|
|
for _, label := range labels {
|
|
// Do not use strconv.AppendQuote, since it is slow according to CPU profile.
|
|
dst = append(dst, label.Name...)
|
|
dst = append(dst, '=')
|
|
dst = append(dst, label.Value...)
|
|
dst = append(dst, ',')
|
|
}
|
|
return dst
|
|
}
|
|
|
|
func needSkipScrapeWork(key string, membersCount, replicasCount, memberNum int) bool {
|
|
if membersCount <= 1 {
|
|
return false
|
|
}
|
|
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(key))
|
|
idx := int(h % uint64(membersCount))
|
|
if replicasCount < 1 {
|
|
replicasCount = 1
|
|
}
|
|
for i := 0; i < replicasCount; i++ {
|
|
if idx == memberNum {
|
|
return false
|
|
}
|
|
idx++
|
|
if idx >= membersCount {
|
|
idx = 0
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
type labelsContext struct {
|
|
labels []prompbmarshal.Label
|
|
}
|
|
|
|
func getLabelsContext() *labelsContext {
|
|
v := labelsContextPool.Get()
|
|
if v == nil {
|
|
return &labelsContext{}
|
|
}
|
|
return v.(*labelsContext)
|
|
}
|
|
|
|
func putLabelsContext(lctx *labelsContext) {
|
|
labels := lctx.labels
|
|
for i := range labels {
|
|
labels[i].Name = ""
|
|
labels[i].Value = ""
|
|
}
|
|
lctx.labels = lctx.labels[:0]
|
|
labelsContextPool.Put(lctx)
|
|
}
|
|
|
|
var labelsContextPool sync.Pool
|
|
|
|
var scrapeWorkKeyBufPool bytesutil.ByteBufferPool
|
|
|
|
func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabels map[string]string) (*ScrapeWork, error) {
|
|
lctx := getLabelsContext()
|
|
lctx.labels = mergeLabels(lctx.labels[:0], swc, target, extraLabels, metaLabels)
|
|
var originalLabels []prompbmarshal.Label
|
|
if !*dropOriginalLabels {
|
|
originalLabels = append([]prompbmarshal.Label{}, lctx.labels...)
|
|
}
|
|
lctx.labels = swc.relabelConfigs.Apply(lctx.labels, 0, false)
|
|
lctx.labels = promrelabel.RemoveMetaLabels(lctx.labels[:0], lctx.labels)
|
|
// Remove references to already deleted labels, so GC could clean strings for label name and label value past len(labels).
|
|
// This should reduce memory usage when relabeling creates big number of temporary labels with long names and/or values.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825 for details.
|
|
labels := append([]prompbmarshal.Label{}, lctx.labels...)
|
|
putLabelsContext(lctx)
|
|
|
|
// Verify whether the scrape work must be skipped because of `-promscrape.cluster.*` configs.
|
|
// Perform the verification on labels after the relabeling in order to guarantee that targets with the same set of labels
|
|
// go to the same vmagent shard.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1687#issuecomment-940629495
|
|
if *clusterMembersCount > 1 {
|
|
bb := scrapeWorkKeyBufPool.Get()
|
|
bb.B = appendScrapeWorkKey(bb.B[:0], labels)
|
|
needSkip := needSkipScrapeWork(bytesutil.ToUnsafeString(bb.B), *clusterMembersCount, *clusterReplicationFactor, clusterMemberID)
|
|
scrapeWorkKeyBufPool.Put(bb)
|
|
if needSkip {
|
|
return nil, nil
|
|
}
|
|
}
|
|
if !*dropOriginalLabels {
|
|
promrelabel.SortLabels(originalLabels)
|
|
// Reduce memory usage by interning all the strings in originalLabels.
|
|
internLabelStrings(originalLabels)
|
|
}
|
|
if len(labels) == 0 {
|
|
// Drop target without labels.
|
|
droppedTargetsMap.Register(originalLabels)
|
|
return nil, nil
|
|
}
|
|
// See https://www.robustperception.io/life-of-a-label
|
|
schemeRelabeled := promrelabel.GetLabelValueByName(labels, "__scheme__")
|
|
if len(schemeRelabeled) == 0 {
|
|
schemeRelabeled = "http"
|
|
}
|
|
addressRelabeled := promrelabel.GetLabelValueByName(labels, "__address__")
|
|
if len(addressRelabeled) == 0 {
|
|
// Drop target without scrape address.
|
|
droppedTargetsMap.Register(originalLabels)
|
|
return nil, nil
|
|
}
|
|
if strings.Contains(addressRelabeled, "/") {
|
|
// Drop target with '/'
|
|
droppedTargetsMap.Register(originalLabels)
|
|
return nil, nil
|
|
}
|
|
addressRelabeled = addMissingPort(addressRelabeled, schemeRelabeled == "https")
|
|
metricsPathRelabeled := promrelabel.GetLabelValueByName(labels, "__metrics_path__")
|
|
if metricsPathRelabeled == "" {
|
|
metricsPathRelabeled = "/metrics"
|
|
}
|
|
if !strings.HasPrefix(metricsPathRelabeled, "/") {
|
|
metricsPathRelabeled = "/" + metricsPathRelabeled
|
|
}
|
|
paramsRelabeled := getParamsFromLabels(labels, swc.params)
|
|
optionalQuestion := "?"
|
|
if len(paramsRelabeled) == 0 || strings.Contains(metricsPathRelabeled, "?") {
|
|
optionalQuestion = ""
|
|
}
|
|
paramsStr := url.Values(paramsRelabeled).Encode()
|
|
scrapeURL := fmt.Sprintf("%s://%s%s%s%s", schemeRelabeled, addressRelabeled, metricsPathRelabeled, optionalQuestion, paramsStr)
|
|
if _, err := url.Parse(scrapeURL); err != nil {
|
|
return nil, fmt.Errorf("invalid url %q for scheme=%q (%q), target=%q (%q), metrics_path=%q (%q) for `job_name` %q: %w",
|
|
scrapeURL, swc.scheme, schemeRelabeled, target, addressRelabeled, swc.metricsPath, metricsPathRelabeled, swc.jobName, err)
|
|
}
|
|
// Set missing "instance" label according to https://www.robustperception.io/life-of-a-label
|
|
if promrelabel.GetLabelByName(labels, "instance") == nil {
|
|
labels = append(labels, prompbmarshal.Label{
|
|
Name: "instance",
|
|
Value: addressRelabeled,
|
|
})
|
|
promrelabel.SortLabels(labels)
|
|
}
|
|
// Read __scrape_interval__ and __scrape_timeout__ from labels.
|
|
scrapeInterval := swc.scrapeInterval
|
|
if s := promrelabel.GetLabelValueByName(labels, "__scrape_interval__"); len(s) > 0 {
|
|
d, err := promutils.ParseDuration(s)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse __scrape_interval__=%q: %w", s, err)
|
|
}
|
|
scrapeInterval = d
|
|
}
|
|
scrapeTimeout := swc.scrapeTimeout
|
|
if s := promrelabel.GetLabelValueByName(labels, "__scrape_timeout__"); len(s) > 0 {
|
|
d, err := promutils.ParseDuration(s)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse __scrape_timeout__=%q: %w", s, err)
|
|
}
|
|
scrapeTimeout = d
|
|
}
|
|
// Read series_limit option from __series_limit__ label.
|
|
// See https://docs.victoriametrics.com/vmagent.html#cardinality-limiter
|
|
seriesLimit := swc.seriesLimit
|
|
if s := promrelabel.GetLabelValueByName(labels, "__series_limit__"); len(s) > 0 {
|
|
n, err := strconv.Atoi(s)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse __series_limit__=%q: %w", s, err)
|
|
}
|
|
seriesLimit = n
|
|
}
|
|
// Read stream_parse option from __stream_parse__ label.
|
|
// See https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode
|
|
streamParse := swc.streamParse
|
|
if s := promrelabel.GetLabelValueByName(labels, "__stream_parse__"); len(s) > 0 {
|
|
b, err := strconv.ParseBool(s)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot parse __stream_parse__=%q: %w", s, err)
|
|
}
|
|
streamParse = b
|
|
}
|
|
// Reduce memory usage by interning all the strings in labels.
|
|
internLabelStrings(labels)
|
|
sw := &ScrapeWork{
|
|
ScrapeURL: scrapeURL,
|
|
ScrapeInterval: scrapeInterval,
|
|
ScrapeTimeout: scrapeTimeout,
|
|
HonorLabels: swc.honorLabels,
|
|
HonorTimestamps: swc.honorTimestamps,
|
|
DenyRedirects: swc.denyRedirects,
|
|
OriginalLabels: originalLabels,
|
|
Labels: labels,
|
|
ProxyURL: swc.proxyURL,
|
|
ProxyAuthConfig: swc.proxyAuthConfig,
|
|
AuthConfig: swc.authConfig,
|
|
MetricRelabelConfigs: swc.metricRelabelConfigs,
|
|
SampleLimit: swc.sampleLimit,
|
|
DisableCompression: swc.disableCompression,
|
|
DisableKeepAlive: swc.disableKeepAlive,
|
|
StreamParse: streamParse,
|
|
ScrapeAlignInterval: swc.scrapeAlignInterval,
|
|
ScrapeOffset: swc.scrapeOffset,
|
|
SeriesLimit: seriesLimit,
|
|
|
|
jobNameOriginal: swc.jobName,
|
|
}
|
|
return sw, nil
|
|
}
|
|
|
|
func internLabelStrings(labels []prompbmarshal.Label) {
|
|
for i := range labels {
|
|
label := &labels[i]
|
|
label.Name = internString(label.Name)
|
|
label.Value = internString(label.Value)
|
|
}
|
|
}
|
|
|
|
func internString(s string) string {
|
|
m := internStringsMap.Load().(*sync.Map)
|
|
if v, ok := m.Load(s); ok {
|
|
sp := v.(*string)
|
|
return *sp
|
|
}
|
|
// Make a new copy for s in order to remove references from possible bigger string s refers to.
|
|
sCopy := string(append([]byte{}, s...))
|
|
m.Store(sCopy, &sCopy)
|
|
n := atomic.AddUint64(&internStringsMapLen, 1)
|
|
if n > 100e3 {
|
|
atomic.StoreUint64(&internStringsMapLen, 0)
|
|
internStringsMap.Store(&sync.Map{})
|
|
}
|
|
return sCopy
|
|
}
|
|
|
|
var (
|
|
internStringsMap atomic.Value
|
|
internStringsMapLen uint64
|
|
)
|
|
|
|
func init() {
|
|
internStringsMap.Store(&sync.Map{})
|
|
}
|
|
|
|
func getParamsFromLabels(labels []prompbmarshal.Label, paramsOrig map[string][]string) map[string][]string {
|
|
// See https://www.robustperception.io/life-of-a-label
|
|
m := make(map[string][]string)
|
|
for i := range labels {
|
|
label := &labels[i]
|
|
if !strings.HasPrefix(label.Name, "__param_") {
|
|
continue
|
|
}
|
|
name := label.Name[len("__param_"):]
|
|
values := []string{label.Value}
|
|
if p := paramsOrig[name]; len(p) > 1 {
|
|
values = append(values, p[1:]...)
|
|
}
|
|
m[name] = values
|
|
}
|
|
return m
|
|
}
|
|
|
|
func mergeLabels(dst []prompbmarshal.Label, swc *scrapeWorkConfig, target string, extraLabels, metaLabels map[string]string) []prompbmarshal.Label {
|
|
if len(dst) > 0 {
|
|
logger.Panicf("BUG: len(dst) must be 0; got %d", len(dst))
|
|
}
|
|
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
|
|
for k, v := range swc.externalLabels {
|
|
dst = appendLabel(dst, k, v)
|
|
}
|
|
dst = appendLabel(dst, "job", swc.jobName)
|
|
dst = appendLabel(dst, "__address__", target)
|
|
dst = appendLabel(dst, "__scheme__", swc.scheme)
|
|
dst = appendLabel(dst, "__metrics_path__", swc.metricsPath)
|
|
dst = appendLabel(dst, "__scrape_interval__", swc.scrapeIntervalString)
|
|
dst = appendLabel(dst, "__scrape_timeout__", swc.scrapeTimeoutString)
|
|
for k, args := range swc.params {
|
|
if len(args) == 0 {
|
|
continue
|
|
}
|
|
k = "__param_" + k
|
|
v := args[0]
|
|
dst = appendLabel(dst, k, v)
|
|
}
|
|
for k, v := range extraLabels {
|
|
dst = appendLabel(dst, k, v)
|
|
}
|
|
for k, v := range metaLabels {
|
|
dst = appendLabel(dst, k, v)
|
|
}
|
|
if len(dst) < 2 {
|
|
return dst
|
|
}
|
|
// Remove duplicate labels if any.
|
|
// Stable sorting is needed in order to preserve the order for labels with identical names.
|
|
// This is needed in order to remove labels with duplicate names other than the last one.
|
|
promrelabel.SortLabelsStable(dst)
|
|
prevName := dst[0].Name
|
|
hasDuplicateLabels := false
|
|
for _, label := range dst[1:] {
|
|
if label.Name == prevName {
|
|
hasDuplicateLabels = true
|
|
break
|
|
}
|
|
prevName = label.Name
|
|
}
|
|
if !hasDuplicateLabels {
|
|
return dst
|
|
}
|
|
prevName = dst[0].Name
|
|
tmp := dst[:1]
|
|
for _, label := range dst[1:] {
|
|
if label.Name == prevName {
|
|
tmp[len(tmp)-1] = label
|
|
} else {
|
|
tmp = append(tmp, label)
|
|
prevName = label.Name
|
|
}
|
|
}
|
|
tail := dst[len(tmp):]
|
|
for i := range tail {
|
|
label := &tail[i]
|
|
label.Name = ""
|
|
label.Value = ""
|
|
}
|
|
return tmp
|
|
}
|
|
|
|
func appendLabel(dst []prompbmarshal.Label, name, value string) []prompbmarshal.Label {
|
|
return append(dst, prompbmarshal.Label{
|
|
Name: name,
|
|
Value: value,
|
|
})
|
|
}
|
|
|
|
const (
|
|
defaultScrapeInterval = time.Minute
|
|
defaultScrapeTimeout = 10 * time.Second
|
|
)
|