app/vmagent/remotewrite: add -remoteWrite.shardByURL.labels command-line flag

This command-line flag can be used for specifying a list of labels used for sharding
among -remoteWrite.url entries when -remoteWrite.shardByURL command-line flag is set.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4942
This commit is contained in:
Aliaksandr Valialkin 2023-11-01 23:05:11 +01:00
parent ffeec24811
commit 369d37749d
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
5 changed files with 54 additions and 4 deletions

View file

@ -173,6 +173,13 @@ by routing outgoing samples for the same time series of [counter](https://docs.v
and [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) types from top-level `vmagent` instances
to the same second-level `vmagent` instance, so they are aggregated properly.
If `-remoteWrite.shardByURL` command-line flag is set, then all the metric labels are used for even sharding
among remote storage systems specified in `-remoteWrite.url`. Sometimes it may be needed to use only a particular
set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label
to the same `-remoteWrite.url`. In this case you can specify comma-separated list of these labels in the `-remoteWrite.shardByURLLabels`
command-line flag. For example, `-remoteWrite.shardByURLLabels=instance,__name__` would shard metrics with the same name and `instance`
label to the same `-remoteWrite.url`.
See also [how to scrape big number of targets](#scraping-big-number-of-targets).
### Relabeling and filtering
@ -1601,6 +1608,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.shardByURL
Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages
-remoteWrite.shardByURL.labels array
Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.showURL
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
-remoteWrite.significantFigures array

View file

@ -23,6 +23,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/tenantmetrics"
"github.com/VictoriaMetrics/metrics"
@ -40,6 +41,9 @@ var (
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
"even distribution of series over the specified -remoteWrite.url systems")
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
"See also -remoteWrite.maxDiskUsagePerURL")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
@ -116,6 +120,8 @@ func InitSecretFlags() {
}
}
var shardByURLLabelsMap map[string]struct{}
// Init initializes remotewrite.
//
// It must be called after flag.Parse().
@ -152,6 +158,13 @@ func Init() {
if *queues <= 0 {
*queues = 1
}
if len(*shardByURLLabels) > 0 {
m := make(map[string]struct{}, len(*shardByURLLabels))
for _, label := range *shardByURLLabels {
m[label] = struct{}{}
}
shardByURLLabelsMap = m
}
initLabelsGlobal()
// Register SIGHUP handler for config reload before loadRelabelConfigs.
@ -418,11 +431,23 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
if *shardByURL {
// Shard the data among rwctxs
tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
tmpLabels := promutils.GetLabels()
for _, ts := range tssBlock {
h := getLabelsHash(ts.Labels)
hashLabels := ts.Labels
if len(shardByURLLabelsMap) > 0 {
hashLabels = tmpLabels.Labels[:0]
for _, label := range ts.Labels {
if _, ok := shardByURLLabelsMap[label.Name]; ok {
hashLabels = append(hashLabels, label)
}
}
}
h := getLabelsHash(hashLabels)
idx := h % uint64(len(tssByURL))
tssByURL[idx] = append(tssByURL[idx], ts)
}
promutils.PutLabels(tmpLabels)
// Push sharded data to remote storages in parallel in order to reduce
// the time needed for sending the data to multiple remote storage systems.
var wg sync.WaitGroup

View file

@ -51,6 +51,7 @@ The sandbox cluster installation is running under the constant load generated by
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `-rule.evalDelay` flag and `eval_delay` attribute for [Groups](https://docs.victoriametrics.com/vmalert.html#groups). The new flag and param can be used to adjust the `time` parameter for rule evaluation requests to match [intentional query delay](https://docs.victoriametrics.com/keyConcepts.html#query-latency) from the datasource. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): allow specifying full url in notifier static_configs target address, like `http://alertmanager:9093/test/api/v2/alerts`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5184).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support data ingestion from [NewRelic infrastructure agent](https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-newrelic-agent), [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3520) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4712).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.labels` command-line flag, which can be used for specifying a list of labels for sharding outgoing samples among the configured `-remoteWrite.url` destinations if `-remoteWrite.shardByURL` command-line flag is set. See [these docs](https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4942) for details.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not exit on startup when [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) refer to non-existing or invalid files with auth configs, since these files may appear / updated later. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4959) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5153).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): allow loading TLS certificates from HTTP and HTTPS urls by specifying these urls at `cert_file` and `key_file` options inside `tls_config` and `proxy_tls_config` sections at [http client settings](https://docs.victoriametrics.com/sd_configs.html#http-api-client-options).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): reduce CPU load when big number of targets are scraped over HTTPS with the same custom TLS certificate configured via `tls_config->cert_file` and `tls_config->key_file` at [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).

View file

@ -58,13 +58,17 @@ and send data to multiple remote storage systems, vmagent has the following addi
* vmagent may accept, relabel and filter data obtained via multiple data ingestion protocols in addition to data scraped from Prometheus targets.
That means it supports both `pull` and `push` protocols for data ingestion.
See [these docs](https://docs.victoriametrics.com/vmagent.html#features) for details.
* vmagent may be used in different use cases:
* vmagent may be used in different [use cases](https://docs.victoriametrics.com/vmagent.html#use-cases):
* [IoT and edge monitoring](https://docs.victoriametrics.com/vmagent.html#iot-and-edge-monitoring)
* [Drop-in replacement for Prometheus](https://docs.victoriametrics.com/vmagent.html#drop-in-replacement-for-prometheus)
* [Replication and High Availability](https://docs.victoriametrics.com/vmagent.html#replication-and-high-availability)
* [Relabeling and Filtering](https://docs.victoriametrics.com/vmagent.html#relabeling-and-filtering)
* [Statsd alternative](https://docs.victoriametrics.com/vmagent.html#statsd-alternative)
* [Flexible metrics relay](https://docs.victoriametrics.com/vmagent.html#flexible-metrics-relay)
* [Replication and high availability](https://docs.victoriametrics.com/vmagent.html#replication-and-high-availability)
* [Sharding among remote storages](https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages)
* [Relabeling and filtering](https://docs.victoriametrics.com/vmagent.html#relabeling-and-filtering)
* [Splitting data streams among multiple systems](https://docs.victoriametrics.com/vmagent.html#splitting-data-streams-among-multiple-systems)
* [Prometheus remote_write proxy](https://docs.victoriametrics.com/vmagent.html#prometheus-remote_write-proxy)
* [remote_write for clustered version](https://docs.victoriametrics.com/vmagent.html#remote_write-for-clustered-version)
## What is the difference between vmagent and Prometheus agent?

View file

@ -184,6 +184,13 @@ by routing outgoing samples for the same time series of [counter](https://docs.v
and [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) types from top-level `vmagent` instances
to the same second-level `vmagent` instance, so they are aggregated properly.
If `-remoteWrite.shardByURL` command-line flag is set, then all the metric labels are used for even sharding
among remote storage systems specified in `-remoteWrite.url`. Sometimes it may be needed to use only a particular
set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label
to the same `-remoteWrite.url`. In this case you can specify comma-separated list of these labels in the `-remoteWrite.shardByURLLabels`
command-line flag. For example, `-remoteWrite.shardByURLLabels=instance,__name__` would shard metrics with the same name and `instance`
label to the same `-remoteWrite.url`.
See also [how to scrape big number of targets](#scraping-big-number-of-targets).
### Relabeling and filtering
@ -1612,6 +1619,9 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Supports array of values separated by comma or specified via multiple flags.
-remoteWrite.shardByURL
Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages
-remoteWrite.shardByURL.labels array
Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems
Supports an array of values separated by comma or specified via multiple flags.
-remoteWrite.showURL
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
-remoteWrite.significantFigures array