lib/promscrape: add Prometheus-compatible DNS-based service discovery aka dns_sd_configs

This commit is contained in:
Aliaksandr Valialkin 2020-05-06 00:01:49 +03:00
parent 364789c24c
commit 3f52a97f9b
7 changed files with 174 additions and 14 deletions

View file

@ -263,6 +263,7 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
In the future other `*_sd_config` types will be supported.

View file

@ -138,17 +138,15 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
`vmagent` doesn't support `role_arn` config param yet.
* `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
`vmagent` provides the following additional functionality `gce_sd_config`:
`vmagent` provides the following additional functionality for `gce_sd_config`:
* if `project` arg is missing, then `vmagent` uses the project for the instance where it runs;
* if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs;
* if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project;
* `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
* `consul_sd_configs` - for scraping targets registered in Consul.
See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details.
The following service discovery mechanisms will be added to `vmagent` soon:
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA).
See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details.
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.

View file

@ -263,6 +263,7 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la
* [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config)
* [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config)
* [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config)
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
In the future other `*_sd_config` types will be supported.

View file

@ -138,17 +138,15 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh
`vmagent` doesn't support `role_arn` config param yet.
* `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE).
See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details.
`vmagent` provides the following additional functionality `gce_sd_config`:
`vmagent` provides the following additional functionality for `gce_sd_config`:
* if `project` arg is missing, then `vmagent` uses the project for the instance where it runs;
* if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs;
* if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project;
* `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`.
* `consul_sd_configs` - for scraping targets registered in Consul.
See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details.
The following service discovery mechanisms will be added to `vmagent` soon:
* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config)
* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA).
See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details.
File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`.

View file

@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/ec2"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/gce"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/kubernetes"
@ -64,6 +65,7 @@ type ScrapeConfig struct {
FileSDConfigs []FileSDConfig `yaml:"file_sd_configs"`
KubernetesSDConfigs []kubernetes.SDConfig `yaml:"kubernetes_sd_configs"`
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs"`
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs"`
EC2SDConfigs []ec2.SDConfig `yaml:"ec2_sd_configs"`
GCESDConfigs []gce.SDConfig `yaml:"gce_sd_configs"`
RelabelConfigs []promrelabel.RelabelConfig `yaml:"relabel_configs"`
@ -171,6 +173,19 @@ func (cfg *Config) getConsulSDScrapeWork() []ScrapeWork {
return dst
}
// getDNSSDScrapeWork returns `dns_sd_configs` ScrapeWork from cfg.
func (cfg *Config) getDNSSDScrapeWork() []ScrapeWork {
var dst []ScrapeWork
for i := range cfg.ScrapeConfigs {
sc := &cfg.ScrapeConfigs[i]
for j := range sc.DNSSDConfigs {
sdc := &sc.DNSSDConfigs[j]
dst = appendDNSScrapeWork(dst, sdc, sc.swc)
}
}
return dst
}
// getEC2SDScrapeWork returns `ec2_sd_configs` ScrapeWork from cfg.
func (cfg *Config) getEC2SDScrapeWork() []ScrapeWork {
var dst []ScrapeWork
@ -318,7 +333,7 @@ type scrapeWorkConfig struct {
func appendKubernetesScrapeWork(dst []ScrapeWork, sdc *kubernetes.SDConfig, baseDir string, swc *scrapeWorkConfig) []ScrapeWork {
targetLabels, err := kubernetes.GetLabels(sdc, baseDir)
if err != nil {
logger.Errorf("error when discovering kubernetes nodes for `job_name` %q: %s; skipping it", swc.jobName, err)
logger.Errorf("error when discovering kubernetes targets for `job_name` %q: %s; skipping it", swc.jobName, err)
return dst
}
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "kubernetes_sd_config")
@ -327,16 +342,25 @@ func appendKubernetesScrapeWork(dst []ScrapeWork, sdc *kubernetes.SDConfig, base
func appendConsulScrapeWork(dst []ScrapeWork, sdc *consul.SDConfig, baseDir string, swc *scrapeWorkConfig) []ScrapeWork {
targetLabels, err := consul.GetLabels(sdc, baseDir)
if err != nil {
logger.Errorf("error when discovering consul nodes for `job_name` %q: %s; skipping it", swc.jobName, err)
logger.Errorf("error when discovering consul targets for `job_name` %q: %s; skipping it", swc.jobName, err)
return dst
}
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "consul_sd_config")
}
func appendDNSScrapeWork(dst []ScrapeWork, sdc *dns.SDConfig, swc *scrapeWorkConfig) []ScrapeWork {
targetLabels, err := dns.GetLabels(sdc)
if err != nil {
logger.Errorf("error when discovering dns targets for `job_name` %q: %s; skipping it", swc.jobName, err)
return dst
}
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "dns_sd_config")
}
func appendEC2ScrapeWork(dst []ScrapeWork, sdc *ec2.SDConfig, swc *scrapeWorkConfig) []ScrapeWork {
targetLabels, err := ec2.GetLabels(sdc)
if err != nil {
logger.Errorf("error when discovering ec2 nodes for `job_name` %q: %s; skipping it", swc.jobName, err)
logger.Errorf("error when discovering ec2 targets for `job_name` %q: %s; skipping it", swc.jobName, err)
return dst
}
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "ec2_sd_config")
@ -345,7 +369,7 @@ func appendEC2ScrapeWork(dst []ScrapeWork, sdc *ec2.SDConfig, swc *scrapeWorkCon
func appendGCEScrapeWork(dst []ScrapeWork, sdc *gce.SDConfig, swc *scrapeWorkConfig) []ScrapeWork {
targetLabels, err := gce.GetLabels(sdc)
if err != nil {
logger.Errorf("error when discovering gce nodes for `job_name` %q: %s; skippint it", swc.jobName, err)
logger.Errorf("error when discovering gce targets for `job_name` %q: %s; skippint it", swc.jobName, err)
return dst
}
return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "gce_sd_config")

View file

@ -0,0 +1,134 @@
package dns
import (
"context"
"fmt"
"net"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
)
// SDConfig represents service discovery config for DNS.
//
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
type SDConfig struct {
Names []string `yaml:"names"`
Type string `yaml:"type"`
Port *int `yaml:"port"`
// RefreshInterval time.Duration `yaml:"refresh_interval"`
// refresh_interval is obtained from `-promscrape.dnsSDCheckInterval` command-line option.
}
// GetLabels returns DNS labels according to sdc.
func GetLabels(sdc *SDConfig) ([]map[string]string, error) {
if len(sdc.Names) == 0 {
return nil, fmt.Errorf("`names` cannot be empty in `dns_sd_config`")
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
typ := sdc.Type
if typ == "" {
typ = "SRV"
}
typ = strings.ToUpper(typ)
switch typ {
case "SRV":
ms := getSRVAddrLabels(ctx, sdc)
return ms, nil
case "A", "AAAA":
return getAAddrLabels(ctx, sdc, typ)
default:
return nil, fmt.Errorf("unexpected `type` in `dns_sd_config`: %q; supported values: SRV, A, AAAA", typ)
}
}
func getSRVAddrLabels(ctx context.Context, sdc *SDConfig) []map[string]string {
type result struct {
name string
as []*net.SRV
err error
}
ch := make(chan result, len(sdc.Names))
for _, name := range sdc.Names {
go func(name string) {
_, as, err := resolver.LookupSRV(ctx, "", "", name)
ch <- result{
name: name,
as: as,
err: err,
}
}(name)
}
var ms []map[string]string
for range sdc.Names {
r := <-ch
if r.err != nil {
logger.Errorf("error in SRV lookup for %q; skipping it; error: %s", r.name, r.err)
continue
}
for _, a := range r.as {
target := a.Target
for strings.HasSuffix(target, ".") {
target = target[:len(target)-1]
}
ms = appendAddrLabels(ms, r.name, target, int(a.Port))
}
}
return ms
}
func getAAddrLabels(ctx context.Context, sdc *SDConfig, lookupType string) ([]map[string]string, error) {
if sdc.Port == nil {
return nil, fmt.Errorf("missing `port` in `dns_sd_config`")
}
port := *sdc.Port
type result struct {
name string
ips []net.IPAddr
err error
}
ch := make(chan result, len(sdc.Names))
for _, name := range sdc.Names {
go func(name string) {
ips, err := resolver.LookupIPAddr(ctx, name)
ch <- result{
name: name,
ips: ips,
err: err,
}
}(name)
}
var ms []map[string]string
for range sdc.Names {
r := <-ch
if r.err != nil {
logger.Errorf("error in %s lookup for %q: %s", lookupType, r.name, r.err)
continue
}
for _, ip := range r.ips {
isIPv4 := ip.IP.To4() != nil
if lookupType == "AAAA" && isIPv4 || lookupType == "A" && !isIPv4 {
continue
}
ms = appendAddrLabels(ms, r.name, ip.IP.String(), port)
}
}
return ms, nil
}
func appendAddrLabels(ms []map[string]string, name, target string, port int) []map[string]string {
addr := discoveryutils.JoinHostPort(target, port)
m := map[string]string{
"__address__": addr,
"__meta_dns_name": name,
}
return append(ms, m)
}
var resolver = &net.Resolver{
PreferGo: true,
StrictErrors: true,
}

View file

@ -24,6 +24,9 @@ var (
consulSDCheckInterval = flag.Duration("promscrape.consulSDCheckInterval", 30*time.Second, "Interval for checking for changes in consul. "+
"This works only if `consul_sd_configs` is configured in '-promscrape.config' file. "+
"See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config for details")
dnsSDCheckInterval = flag.Duration("promscrape.dnsSDCheckInterval", 30*time.Second, "Interval for checking for changes in dns. "+
"This works only if `dns_sd_configs` is configured in '-promscrape.config' file. "+
"See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config for details")
ec2SDCheckInterval = flag.Duration("promscrape.ec2SDCheckInterval", time.Minute, "Interval for checking for changes in ec2. "+
"This works only if `ec2_sd_configs` is configured in '-promscrape.config' file. "+
"See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config for details")
@ -74,6 +77,7 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
scs.add("file_sd_configs", *fileSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getFileSDScrapeWork(swsPrev) })
scs.add("kubernetes_sd_configs", *kubernetesSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getKubernetesSDScrapeWork() })
scs.add("consul_sd_configs", *consulSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getConsulSDScrapeWork() })
scs.add("dns_sd_configs", *dnsSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getDNSSDScrapeWork() })
scs.add("ec2_sd_configs", *ec2SDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getEC2SDScrapeWork() })
scs.add("gce_sd_configs", *gceSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getGCESDScrapeWork() })