2020-12-03 17:47:40 +00:00
|
|
|
package consul
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
2020-12-03 17:50:50 +00:00
|
|
|
"flag"
|
2020-12-03 17:47:40 +00:00
|
|
|
"fmt"
|
|
|
|
"net/url"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
|
2020-12-03 17:50:50 +00:00
|
|
|
"github.com/VictoriaMetrics/metrics"
|
2020-12-03 17:47:40 +00:00
|
|
|
)
|
|
|
|
|
2020-12-03 17:50:50 +00:00
|
|
|
// SDCheckInterval is check interval for Consul service discovery.
|
|
|
|
var SDCheckInterval = flag.Duration("promscrape.consulSDCheckInterval", 30*time.Second, "Interval for checking for changes in Consul. "+
|
|
|
|
"This works only if `consul_sd_configs` is configured in '-promscrape.config' file. "+
|
|
|
|
"See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config for details")
|
|
|
|
|
|
|
|
// consulWatcher is a watcher for consul api, updates services map in background with long-polling.
|
|
|
|
type consulWatcher struct {
|
|
|
|
client *discoveryutils.Client
|
|
|
|
|
|
|
|
serviceNamesQueryArgs string
|
|
|
|
serviceNodesQueryArgs string
|
|
|
|
watchServices []string
|
|
|
|
watchTags []string
|
|
|
|
|
|
|
|
// servicesLock protects services and servicesLastAccessTime
|
|
|
|
servicesLock sync.Mutex
|
|
|
|
services map[string]*serviceWatcher
|
|
|
|
servicesLastAccessTime time.Time
|
|
|
|
|
|
|
|
wg sync.WaitGroup
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
|
|
|
|
2020-12-03 17:50:50 +00:00
|
|
|
type serviceWatcher struct {
|
|
|
|
serviceName string
|
|
|
|
serviceNodes []ServiceNode
|
2020-12-03 17:47:40 +00:00
|
|
|
stopCh chan struct{}
|
|
|
|
}
|
|
|
|
|
2020-12-03 17:50:50 +00:00
|
|
|
// newConsulWatcher creates new watcher and start background service discovery for Consul.
|
|
|
|
func newConsulWatcher(client *discoveryutils.Client, sdc *SDConfig, datacenter string) *consulWatcher {
|
2020-12-08 19:49:11 +00:00
|
|
|
baseQueryArgs := "?dc=" + url.QueryEscape(datacenter)
|
2020-12-03 17:47:40 +00:00
|
|
|
if sdc.AllowStale {
|
|
|
|
baseQueryArgs += "&stale"
|
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
for k, v := range sdc.NodeMeta {
|
|
|
|
baseQueryArgs += "&node-meta=" + url.QueryEscape(k+":"+v)
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
serviceNodesQueryArgs := baseQueryArgs
|
|
|
|
for _, tag := range sdc.Tags {
|
|
|
|
serviceNodesQueryArgs += "&tag=" + url.QueryEscape(tag)
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
cw := &consulWatcher{
|
|
|
|
client: client,
|
|
|
|
serviceNamesQueryArgs: baseQueryArgs,
|
|
|
|
serviceNodesQueryArgs: serviceNodesQueryArgs,
|
|
|
|
watchServices: sdc.Services,
|
|
|
|
watchTags: sdc.Tags,
|
|
|
|
services: make(map[string]*serviceWatcher),
|
|
|
|
servicesLastAccessTime: time.Now(),
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
go cw.watchForServicesUpdates()
|
|
|
|
return cw
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
|
|
|
|
2020-12-03 17:50:50 +00:00
|
|
|
// watchForServicesUpdates watches for new services and updates it in cw.
|
|
|
|
func (cw *consulWatcher) watchForServicesUpdates() {
|
|
|
|
checkInterval := getCheckInterval()
|
|
|
|
ticker := time.NewTicker(checkInterval / 2)
|
|
|
|
defer ticker.Stop()
|
|
|
|
index := int64(0)
|
|
|
|
clientAddr := cw.client.Addr()
|
|
|
|
f := func() {
|
|
|
|
serviceNames, newIndex, err := cw.getBlockingServiceNames(index)
|
|
|
|
if err != nil {
|
|
|
|
logger.Errorf("cannot obtain Consul serviceNames from %q: %s", clientAddr, err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if index == newIndex {
|
|
|
|
// Nothing changed.
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
cw.servicesLock.Lock()
|
|
|
|
// Start watchers for new services.
|
|
|
|
for _, serviceName := range serviceNames {
|
|
|
|
if _, ok := cw.services[serviceName]; ok {
|
|
|
|
// The watcher for serviceName already exists.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
sw := &serviceWatcher{
|
|
|
|
serviceName: serviceName,
|
|
|
|
stopCh: make(chan struct{}),
|
|
|
|
}
|
|
|
|
cw.services[serviceName] = sw
|
|
|
|
cw.wg.Add(1)
|
|
|
|
serviceWatchersCreated.Inc()
|
|
|
|
go func() {
|
|
|
|
serviceWatchersCount.Inc()
|
|
|
|
sw.watchForServiceNodesUpdates(cw)
|
|
|
|
serviceWatchersCount.Dec()
|
|
|
|
cw.wg.Done()
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
// Stop watchers for removed services.
|
|
|
|
newServiceNamesMap := make(map[string]struct{}, len(serviceNames))
|
|
|
|
for _, serviceName := range serviceNames {
|
|
|
|
newServiceNamesMap[serviceName] = struct{}{}
|
|
|
|
}
|
|
|
|
for serviceName, sw := range cw.services {
|
|
|
|
if _, ok := newServiceNamesMap[serviceName]; ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
close(sw.stopCh)
|
|
|
|
delete(cw.services, serviceName)
|
|
|
|
serviceWatchersStopped.Inc()
|
|
|
|
|
|
|
|
// Do not wait for the watcher goroutine to exit, since this may take for up to maxWaitTime
|
|
|
|
// if it is blocked in Consul API request.
|
|
|
|
}
|
|
|
|
cw.servicesLock.Unlock()
|
|
|
|
|
|
|
|
index = newIndex
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
|
|
|
|
2020-12-03 17:50:50 +00:00
|
|
|
logger.Infof("started Consul service watcher for %q", clientAddr)
|
|
|
|
f()
|
|
|
|
for range ticker.C {
|
|
|
|
cw.servicesLock.Lock()
|
|
|
|
lastAccessTime := cw.servicesLastAccessTime
|
|
|
|
cw.servicesLock.Unlock()
|
|
|
|
if time.Since(lastAccessTime) > 3*checkInterval {
|
|
|
|
// The given cw is no longer used. Stop all service watchers and exit.
|
|
|
|
logger.Infof("starting to stop Consul service watchers for %q", clientAddr)
|
2020-12-03 18:14:17 +00:00
|
|
|
startTime := time.Now()
|
2020-12-03 17:50:50 +00:00
|
|
|
cw.servicesLock.Lock()
|
|
|
|
for _, sw := range cw.services {
|
|
|
|
close(sw.stopCh)
|
|
|
|
}
|
|
|
|
cw.servicesLock.Unlock()
|
|
|
|
cw.wg.Wait()
|
2020-12-03 18:14:17 +00:00
|
|
|
logger.Infof("stopped Consul service watcher for %q in %.3f seconds", clientAddr, time.Since(startTime).Seconds())
|
2020-12-03 17:50:50 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
f()
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
serviceWatchersCreated = metrics.NewCounter("vm_promscrape_discovery_consul_service_watchers_created_total")
|
|
|
|
serviceWatchersStopped = metrics.NewCounter("vm_promscrape_discovery_consul_service_watchers_stopped_total")
|
|
|
|
serviceWatchersCount = metrics.NewCounter("vm_promscrape_discovery_consul_service_watchers")
|
|
|
|
)
|
|
|
|
|
|
|
|
// getBlockingServiceNames obtains serviceNames via blocking request to Consul.
|
|
|
|
//
|
|
|
|
// It returns an empty serviceNames list if response contains the same index.
|
|
|
|
func (cw *consulWatcher) getBlockingServiceNames(index int64) ([]string, int64, error) {
|
|
|
|
path := "/v1/catalog/services" + cw.serviceNamesQueryArgs
|
2020-12-03 17:47:40 +00:00
|
|
|
data, newIndex, err := getBlockingAPIResponse(cw.client, path, index)
|
|
|
|
if err != nil {
|
|
|
|
return nil, index, err
|
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
if index == newIndex {
|
|
|
|
// Nothing changed - return an empty serviceNames list.
|
|
|
|
return nil, index, nil
|
|
|
|
}
|
2020-12-03 17:47:40 +00:00
|
|
|
var m map[string][]string
|
|
|
|
if err := json.Unmarshal(data, &m); err != nil {
|
2020-12-03 17:50:50 +00:00
|
|
|
return nil, index, fmt.Errorf("cannot parse response from %q: %w; data=%q", path, err, data)
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
serviceNames := make([]string, 0, len(m))
|
|
|
|
for serviceName, tags := range m {
|
|
|
|
if !shouldCollectServiceByName(cw.watchServices, serviceName) {
|
2020-12-03 17:47:40 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
if !shouldCollectServiceByTags(cw.watchTags, tags) {
|
2020-12-03 17:47:40 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
serviceNames = append(serviceNames, serviceName)
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
return serviceNames, newIndex, nil
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
|
|
|
|
2020-12-03 17:50:50 +00:00
|
|
|
// watchForServiceNodesUpdates watches for Consul serviceNode changes for the given serviceName.
|
|
|
|
func (sw *serviceWatcher) watchForServiceNodesUpdates(cw *consulWatcher) {
|
|
|
|
checkInterval := getCheckInterval()
|
|
|
|
ticker := time.NewTicker(checkInterval / 2)
|
2020-12-03 17:47:40 +00:00
|
|
|
defer ticker.Stop()
|
2020-12-03 17:50:50 +00:00
|
|
|
clientAddr := cw.client.Addr()
|
|
|
|
index := int64(0)
|
|
|
|
path := "/v1/health/service/" + sw.serviceName + cw.serviceNodesQueryArgs
|
|
|
|
f := func() {
|
|
|
|
data, newIndex, err := getBlockingAPIResponse(cw.client, path, index)
|
|
|
|
if err != nil {
|
|
|
|
logger.Errorf("cannot obtain Consul serviceNodes for serviceName=%q from %q: %s", sw.serviceName, clientAddr, err)
|
2020-12-03 17:47:40 +00:00
|
|
|
return
|
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
if index == newIndex {
|
|
|
|
// Nothing changed.
|
|
|
|
return
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
sns, err := parseServiceNodes(data)
|
|
|
|
if err != nil {
|
|
|
|
logger.Errorf("cannot parse Consul serviceNodes response for serviceName=%q from %q: %s", sw.serviceName, clientAddr, err)
|
|
|
|
return
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
|
2020-12-03 17:47:40 +00:00
|
|
|
cw.servicesLock.Lock()
|
2020-12-03 17:50:50 +00:00
|
|
|
sw.serviceNodes = sns
|
2020-12-03 17:47:40 +00:00
|
|
|
cw.servicesLock.Unlock()
|
2020-12-03 17:50:50 +00:00
|
|
|
|
|
|
|
index = newIndex
|
|
|
|
}
|
|
|
|
|
|
|
|
f()
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ticker.C:
|
|
|
|
f()
|
|
|
|
case <-sw.stopCh:
|
|
|
|
return
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
}
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
|
|
|
|
2020-12-03 17:50:50 +00:00
|
|
|
// getServiceNodesSnapshot returns a snapshot of discovered ServiceNodes.
|
|
|
|
func (cw *consulWatcher) getServiceNodesSnapshot() []ServiceNode {
|
2020-12-03 17:47:40 +00:00
|
|
|
var sns []ServiceNode
|
|
|
|
cw.servicesLock.Lock()
|
2020-12-03 17:50:50 +00:00
|
|
|
for _, sw := range cw.services {
|
|
|
|
sns = append(sns, sw.serviceNodes...)
|
2020-12-03 17:47:40 +00:00
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
cw.servicesLastAccessTime = time.Now()
|
2020-12-03 17:47:40 +00:00
|
|
|
cw.servicesLock.Unlock()
|
|
|
|
return sns
|
|
|
|
}
|
|
|
|
|
2020-12-03 17:50:50 +00:00
|
|
|
func shouldCollectServiceByName(filterServices []string, serviceName string) bool {
|
2020-12-03 17:47:40 +00:00
|
|
|
if len(filterServices) == 0 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
for _, filterService := range filterServices {
|
2020-12-03 17:50:50 +00:00
|
|
|
if filterService == serviceName {
|
2020-12-03 17:47:40 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func shouldCollectServiceByTags(filterTags, tags []string) bool {
|
|
|
|
if len(filterTags) == 0 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
for _, filterTag := range filterTags {
|
|
|
|
hasTag := false
|
|
|
|
for _, tag := range tags {
|
|
|
|
if tag == filterTag {
|
|
|
|
hasTag = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !hasTag {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
2020-12-03 17:50:50 +00:00
|
|
|
|
|
|
|
func getCheckInterval() time.Duration {
|
|
|
|
d := *SDCheckInterval
|
|
|
|
if d <= time.Second {
|
|
|
|
return time.Second
|
|
|
|
}
|
|
|
|
return d
|
|
|
|
}
|