mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
app/vmagent: support for DNS SRV urls at -remoteWrite.url, scrape target urls and service discovery urls
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6053
This commit is contained in:
parent
b426d10847
commit
dc326f70b4
7 changed files with 44 additions and 62 deletions
|
@ -3,34 +3,15 @@ package remotewrite
|
|||
import (
|
||||
"context"
|
||||
"net"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
func getStdDialer() *net.Dialer {
|
||||
stdDialerOnce.Do(func() {
|
||||
stdDialer = &net.Dialer{
|
||||
Timeout: 30 * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
DualStack: netutil.TCP6Enabled(),
|
||||
}
|
||||
})
|
||||
return stdDialer
|
||||
}
|
||||
|
||||
var (
|
||||
stdDialer *net.Dialer
|
||||
stdDialerOnce sync.Once
|
||||
)
|
||||
|
||||
func statDial(ctx context.Context, _, addr string) (conn net.Conn, err error) {
|
||||
network := netutil.GetTCPNetwork()
|
||||
d := getStdDialer()
|
||||
conn, err = d.DialContext(ctx, network, addr)
|
||||
conn, err = netutil.DialMaybeSRV(ctx, network, addr)
|
||||
dialsTotal.Inc()
|
||||
if err != nil {
|
||||
dialErrors.Inc()
|
||||
|
|
|
@ -30,9 +30,11 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
|||
|
||||
## tip
|
||||
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): in the Select component, user-entered values are now preserved on blur if they match options in the list.
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): support regex matching when routing incoming requests based on HTTP [query args](https://en.wikipedia.org/wiki/Query_string) via `src_query_args` option at `url_map`. See [these docs](https://docs.victoriametrics.com/vmauth/#generic-http-proxy-for-different-backends) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6070).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) addresses in `-remoteWrite.url` command-line option and in scrape target urls. For example, `-remoteWrite.url=http://srv+victoria-metrics/api/v1/write` automatically resolves the `victoria-metrics` DNS SRV to a list of hostnames with TCP ports and then sends the collected metrics to these TCP addresses. See [these docs](https://docs.victoriametrics.com/vmagent/#srv-urls) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6053).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): support automatic discovering and load balancing for TCP addresses behind DNS SRV addresses. These addresses can be put inside `url_prefix` urls in the form `http://srv+addr/path`, where the `addr` is the [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) address, which is automatically resolved to hostnames with TCP ports. See [these docs](https://docs.victoriametrics.com/vmauth/#srv-urls) for details.
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): support specifying client TLS certificates and TLS ServerName for requests to HTTPS backends. See [these docs](https://docs.victoriametrics.com/vmauth/#backend-tls-setup).
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): support regex matching when routing incoming requests based on HTTP [query args](https://en.wikipedia.org/wiki/Query_string) via `src_query_args` option at `url_map`. See [these docs](https://docs.victoriametrics.com/vmauth/#generic-http-proxy-for-different-backends) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6070).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): in the Select component, user-entered values are now preserved on blur if they match options in the list.
|
||||
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): supported any status codes from the range 200-299 from alertmanager. Previously, only 200 status code considered a successful action. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6110).
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth/): don't treat concurrency limit hit as an error of the backend. Previously, hitting the concurrency limit would increment both `vmauth_concurrent_requests_limit_reached_total` and `vmauth_user_request_backend_errors_total` counters. Now, only concurrency limit counter is incremented. Updates [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5565).
|
||||
|
|
|
@ -68,6 +68,7 @@ and sending the data to the Prometheus-compatible remote storage:
|
|||
to run `vmagent` with `-promscrape.config.strictParse=false` command-line flag.
|
||||
In this case `vmagent` ignores unsupported sections. See [the list of unsupported sections](#unsupported-prometheus-config-sections).
|
||||
* `-remoteWrite.url` with Prometheus-compatible remote storage endpoint such as VictoriaMetrics, where to send the data to.
|
||||
The `-remoteWrite.url` may refer to [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) address. See [these docs](#srv-urls) for details.
|
||||
|
||||
Example command for writing the data received via [supported push-based protocols](#how-to-push-data-to-vmagent)
|
||||
to [single-node VictoriaMetrics](https://docs.victoriametrics.com/) located at `victoria-metrics-host:8428`:
|
||||
|
@ -129,7 +130,7 @@ additionally to pull-based Prometheus-compatible targets' scraping:
|
|||
|
||||
* Sending HTTP request to `http://vmagent:8429/-/reload` endpoint. This endpoint can be protected with `-reloadAuthKey` command-line flag.
|
||||
|
||||
There is also `-promscrape.configCheckInterval` command-line option, which can be used for automatic reloading configs from updated `-promscrape.config` file.
|
||||
There is also `-promscrape.configCheckInterval` command-line flag, which can be used for automatic reloading configs from updated `-promscrape.config` file.
|
||||
|
||||
## Use cases
|
||||
|
||||
|
@ -272,6 +273,24 @@ for the collected samples. Examples:
|
|||
./vmagent -remoteWrite=http://remote-storage/api/v1/write -streamAggr.dropInputLabels=replica -remoteWrite.streamAggr.dedupInterval=60s
|
||||
```
|
||||
|
||||
## SRV urls
|
||||
|
||||
If `vmagent` encounters urls with `srv+` prefix in hostname (such as `http://srv+some-addr/some/path`), then it resolves `some-addr` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record)
|
||||
record into TCP address with hostname and TCP port, and then uses the resulting url when it needs connecting to it.
|
||||
|
||||
SRV urls are supported in the following places:
|
||||
|
||||
- In `-remoteWrite.url` command-line flags. For example, if `victoria-metrics` [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) record contains
|
||||
`victoria-metrics-host:8428` TCP address, then `-remoteWrite.url=http://srv+victoria-metrics/api/v1/write` is automatically resolved into
|
||||
`-remoteWrite.url=http://victoria-metrics-host:8428/api/v1/write`. If the DNS SRV record is resolved into multiple TCP addresses, then `vmauth`
|
||||
uses randomly chosen address per each connection it establishes to the remote storage.
|
||||
|
||||
- In scrape target addresses aka `__address__` label - see [these docs](https://docs.victoriametrics.com/relabeling/#how-to-modify-scrape-urls-in-targets) for details.
|
||||
|
||||
- In urls used for [service discovery](https://docs.victoriametrics.com/sd_configs/).
|
||||
|
||||
SRV urls are useful when HTTP services run on different TCP ports or when they can change TCP ports over time (for instance, after the restart).
|
||||
|
||||
## VictoriaMetrics remote write protocol
|
||||
|
||||
`vmagent` supports sending data to the configured `-remoteWrite.url` either via Prometheus remote write protocol
|
||||
|
@ -419,7 +438,7 @@ There is no need in specifying top-level `scrape_configs` section in these files
|
|||
The list of supported service discovery types is available [here](#how-to-collect-metrics-in-prometheus-format).
|
||||
|
||||
Additionally, `vmagent` doesn't support `refresh_interval` option at service discovery sections.
|
||||
This option is substituted with `-promscrape.*CheckInterval` command-line options, which are specific per each service discovery type.
|
||||
This option is substituted with `-promscrape.*CheckInterval` command-line flags, which are specific per each service discovery type.
|
||||
See [the full list of command-line flags for vmagent](#advanced-usage).
|
||||
|
||||
## Adding labels to metrics
|
||||
|
@ -506,7 +525,7 @@ and attaches `instance`, `job` and other target-specific labels to these metrics
|
|||
sum_over_time(scrape_series_added[1h]) > 1000
|
||||
```
|
||||
|
||||
`vmagent` sets `scrape_series_added` to zero when it runs with `-promscrape.noStaleMarkers` command-line option
|
||||
`vmagent` sets `scrape_series_added` to zero when it runs with `-promscrape.noStaleMarkers` command-line flag
|
||||
or when it scrapes target with `no_stale_markers: true` option, e.g. when [staleness markers](#prometheus-staleness-markers) are disabled.
|
||||
|
||||
* `scrape_series_limit` - the limit on the number of unique time series the given target can expose according to [these docs](#cardinality-limiter).
|
||||
|
@ -1117,14 +1136,14 @@ If you have suggestions for improvements or have found a bug - please open an is
|
|||
as `vmagent` establishes at least a single TCP connection per target.
|
||||
|
||||
* If `vmagent` uses too big amounts of memory, then the following options can help:
|
||||
* Reducing the amounts of RAM vmagent can use for in-memory buffering with `-memory.allowedPercent` or `-memory.allowedBytes` command-line option.
|
||||
* Reducing the amounts of RAM vmagent can use for in-memory buffering with `-memory.allowedPercent` or `-memory.allowedBytes` command-line flag.
|
||||
Another option is to reduce memory limits in Docker and/or Kubernetes if `vmagent` runs under these systems.
|
||||
* Reducing the number of CPU cores vmagent can use by passing `GOMAXPROCS=N` environment variable to `vmagent`,
|
||||
where `N` is the desired limit on CPU cores. Another option is to reduce CPU limits in Docker or Kubernetes if `vmagent` runs under these systems.
|
||||
* Disabling staleness tracking with `-promscrape.noStaleMarkers` option. See [these docs](#prometheus-staleness-markers).
|
||||
* Enabling stream parsing mode if `vmagent` scrapes targets with millions of metrics per target. See [these docs](#stream-parsing-mode).
|
||||
* Reducing the number of tcp connections to remote storage systems with `-remoteWrite.queues` command-line option.
|
||||
* Passing `-promscrape.dropOriginalLabels` command-line option to `vmagent` if it [discovers](https://docs.victoriametrics.com/sd_configs.html)
|
||||
* Reducing the number of tcp connections to remote storage systems with `-remoteWrite.queues` command-line flag.
|
||||
* Passing `-promscrape.dropOriginalLabels` command-line flag to `vmagent` if it [discovers](https://docs.victoriametrics.com/sd_configs.html)
|
||||
big number of targets and many of these targets are [dropped](https://docs.victoriametrics.com/relabeling.html#how-to-drop-discovered-targets)
|
||||
before scraping. In this case `vmagent` drops `"discoveredLabels"` and `"droppedTargets"`
|
||||
lists at `http://vmagent-host:8429/service-discovery` page. This reduces memory usage when scraping big number of targets at the cost
|
||||
|
@ -1142,7 +1161,7 @@ If you have suggestions for improvements or have found a bug - please open an is
|
|||
may result in increased memory usage if a big number of scrape targets are dropped during relabeling.
|
||||
|
||||
* It is recommended increaseing `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` [metric](#monitoring)
|
||||
grows constantly. It is also recommended increasing `-remoteWrite.maxBlockSize` and `-remoteWrite.maxRowsPerBlock` command-line options in this case.
|
||||
grows constantly. It is also recommended increasing `-remoteWrite.maxBlockSize` and `-remoteWrite.maxRowsPerBlock` command-line flags in this case.
|
||||
This can improve data ingestion performance to the configured remote storage systems at the cost of higher memory usage.
|
||||
|
||||
* If you see gaps in the data pushed by `vmagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set,
|
||||
|
@ -1387,7 +1406,7 @@ See how to request a free trial license [here](https://victoriametrics.com/produ
|
|||
### Reading metrics from Kafka
|
||||
|
||||
[Enterprise version](https://docs.victoriametrics.com/enterprise/) of `vmagent` can read metrics in various formats from Kafka messages.
|
||||
These formats can be configured with `-kafka.consumer.topic.defaultFormat` or `-kafka.consumer.topic.format` command-line options. The following formats are supported:
|
||||
These formats can be configured with `-kafka.consumer.topic.defaultFormat` or `-kafka.consumer.topic.format` command-line flags. The following formats are supported:
|
||||
|
||||
* `promremotewrite` - [Prometheus remote_write](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write).
|
||||
Messages in this format can be sent by vmagent - see [these docs](#writing-metrics-to-kafka).
|
||||
|
|
|
@ -26,6 +26,10 @@ func IsTrivialNetworkError(err error) bool {
|
|||
func DialMaybeSRV(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
if strings.HasPrefix(addr, "srv+") {
|
||||
addr = strings.TrimPrefix(addr, "srv+")
|
||||
if n := strings.IndexByte(addr, ':'); n >= 0 {
|
||||
// Drop port, since it should be automatically resolved via DNS SRV lookup below.
|
||||
addr = addr[:n]
|
||||
}
|
||||
_, addrs, err := Resolver.LookupSRV(ctx, "", "", addr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot resolve SRV addr %s: %w", addr, err)
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
@ -74,7 +75,7 @@ func getMXAddrLabels(ctx context.Context, sdc *SDConfig) []*promutils.Labels {
|
|||
ch := make(chan result, len(sdc.Names))
|
||||
for _, name := range sdc.Names {
|
||||
go func(name string) {
|
||||
mx, err := resolver.LookupMX(ctx, name)
|
||||
mx, err := netutil.Resolver.LookupMX(ctx, name)
|
||||
ch <- result{
|
||||
name: name,
|
||||
mx: mx,
|
||||
|
@ -109,7 +110,7 @@ func getSRVAddrLabels(ctx context.Context, sdc *SDConfig) []*promutils.Labels {
|
|||
ch := make(chan result, len(sdc.Names))
|
||||
for _, name := range sdc.Names {
|
||||
go func(name string) {
|
||||
_, as, err := resolver.LookupSRV(ctx, "", "", name)
|
||||
_, as, err := netutil.Resolver.LookupSRV(ctx, "", "", name)
|
||||
ch <- result{
|
||||
name: name,
|
||||
as: as,
|
||||
|
@ -148,7 +149,7 @@ func getAAddrLabels(ctx context.Context, sdc *SDConfig, lookupType string) ([]*p
|
|||
ch := make(chan result, len(sdc.Names))
|
||||
for _, name := range sdc.Names {
|
||||
go func(name string) {
|
||||
ips, err := resolver.LookupIPAddr(ctx, name)
|
||||
ips, err := netutil.Resolver.LookupIPAddr(ctx, name)
|
||||
ch <- result{
|
||||
name: name,
|
||||
ips: ips,
|
||||
|
@ -192,8 +193,3 @@ func appendAddrLabels(ms []*promutils.Labels, name, target string, port int) []*
|
|||
m.Add("__meta_dns_srv_record_port", strconv.Itoa(port))
|
||||
return append(ms, m)
|
||||
}
|
||||
|
||||
var resolver = &net.Resolver{
|
||||
PreferGo: true,
|
||||
StrictErrors: true,
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/proxy"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
|
@ -87,8 +88,6 @@ func (hc *HTTPClient) stop() {
|
|||
hc.client.CloseIdleConnections()
|
||||
}
|
||||
|
||||
var defaultDialer = &net.Dialer{}
|
||||
|
||||
// NewClient returns new Client for the given args.
|
||||
func NewClient(apiServer string, ac *promauth.Config, proxyURL *proxy.URL, proxyAC *promauth.Config, httpCfg *promauth.HTTPClientConfig) (*Client, error) {
|
||||
u, err := url.Parse(apiServer)
|
||||
|
@ -96,13 +95,13 @@ func NewClient(apiServer string, ac *promauth.Config, proxyURL *proxy.URL, proxy
|
|||
return nil, fmt.Errorf("cannot parse apiServer=%q: %w", apiServer, err)
|
||||
}
|
||||
|
||||
dialFunc := defaultDialer.DialContext
|
||||
dialFunc := netutil.DialMaybeSRV
|
||||
if u.Scheme == "unix" {
|
||||
// special case for unix socket connection
|
||||
dialAddr := u.Path
|
||||
apiServer = "http://unix"
|
||||
dialFunc = func(ctx context.Context, _, _ string) (net.Conn, error) {
|
||||
return defaultDialer.DialContext(ctx, "unix", dialAddr)
|
||||
return netutil.Dialer.DialContext(ctx, "unix", dialAddr)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,18 +6,15 @@ import (
|
|||
"net"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
func statStdDial(ctx context.Context, _, addr string) (net.Conn, error) {
|
||||
d := getStdDialer()
|
||||
network := netutil.GetTCPNetwork()
|
||||
conn, err := d.DialContext(ctx, network, addr)
|
||||
conn, err := netutil.DialMaybeSRV(ctx, network, addr)
|
||||
dialsTotal.Inc()
|
||||
if err != nil {
|
||||
dialErrors.Inc()
|
||||
|
@ -33,22 +30,6 @@ func statStdDial(ctx context.Context, _, addr string) (net.Conn, error) {
|
|||
return sc, nil
|
||||
}
|
||||
|
||||
func getStdDialer() *net.Dialer {
|
||||
stdDialerOnce.Do(func() {
|
||||
stdDialer = &net.Dialer{
|
||||
Timeout: 30 * time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
DualStack: netutil.TCP6Enabled(),
|
||||
}
|
||||
})
|
||||
return stdDialer
|
||||
}
|
||||
|
||||
var (
|
||||
stdDialer *net.Dialer
|
||||
stdDialerOnce sync.Once
|
||||
)
|
||||
|
||||
var (
|
||||
dialsTotal = metrics.NewCounter(`vm_promscrape_dials_total`)
|
||||
dialErrors = metrics.NewCounter(`vm_promscrape_dial_errors_total`)
|
||||
|
|
Loading…
Reference in a new issue