app/vmagent: add -enableMultitenantHandlers command-line flag

This flag allows converting tenant id to (vm_account_id, vm_project_id) labels.
this flag deprecates `-remoteWrite.multitenantURL` command-line flag,
because `-enableMultitenantHandlers` is easier to use and combine with multitenant url
at vminsert - https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels

See https://docs.victoriametrics.com/vmagent.html#multitenancy

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1505
This commit is contained in:
Aliaksandr Valialkin 2023-12-05 01:20:44 +02:00
parent 902f1e5fdc
commit fdbbbf33ca
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
4 changed files with 76 additions and 44 deletions

View file

@ -3,6 +3,7 @@ package remotewrite
import (
"flag"
"fmt"
"strconv"
"strings"
"sync"
@ -92,6 +93,7 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro
// Nothing to change.
return tss
}
rctx.reset()
tssDst := tss[:0]
labels := rctx.labels[:0]
for i := range tss {
@ -120,6 +122,7 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
if len(extraLabels) == 0 {
return
}
rctx.reset()
labels := rctx.labels[:0]
for i := range tss {
ts := &tss[i]
@ -139,6 +142,34 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
rctx.labels = labels
}
func (rctx *relabelCtx) tenantToLabels(tss []prompbmarshal.TimeSeries, accountID, projectID uint32) {
rctx.reset()
accountIDStr := strconv.FormatUint(uint64(accountID), 10)
projectIDStr := strconv.FormatUint(uint64(projectID), 10)
labels := rctx.labels[:0]
for i := range tss {
ts := &tss[i]
labelsLen := len(labels)
for _, label := range ts.Labels {
labelName := label.Name
if labelName == "vm_account_id" || labelName == "vm_project_id" {
continue
}
labels = append(labels, label)
}
labels = append(labels, prompbmarshal.Label{
Name: "vm_account_id",
Value: accountIDStr,
})
labels = append(labels, prompbmarshal.Label{
Name: "vm_project_id",
Value: projectIDStr,
})
ts.Labels = labels[labelsLen:]
}
rctx.labels = labels
}
type relabelCtx struct {
// pool for labels, which are used during the relabeling.
labels []prompbmarshal.Label
@ -160,7 +191,7 @@ func getRelabelCtx() *relabelCtx {
}
func putRelabelCtx(rctx *relabelCtx) {
rctx.labels = rctx.labels[:0]
rctx.reset()
relabelCtxPool.Put(rctx)
}

View file

@ -37,11 +37,15 @@ var (
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+
"or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. "+
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set. "+
"See also -remoteWrite.multitenantURL")
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set")
remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. "+
"This flag is deprecated in favor of -enableMultitenantHandlers . See https://docs.victoriametrics.com/vmagent.html#multitenancy")
enableMultitenantHandlers = flag.Bool("enableMultitenantHandlers", false, "Whether to process incoming data via multitenant insert handlers according to "+
"https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format . By default incoming data is processed via single-node insert handlers "+
"according to https://docs.victoriametrics.com/#how-to-import-time-series-data ."+
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details")
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
@ -114,9 +118,9 @@ var (
}
)
// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
// MultitenancyEnabled returns true if -enableMultitenantHandlers or -remoteWrite.multitenantURL is specified.
func MultitenancyEnabled() bool {
return len(*remoteWriteMultitenantURLs) > 0
return *enableMultitenantHandlers || len(*remoteWriteMultitenantURLs) > 0
}
// Contains the current relabelConfigs.
@ -384,17 +388,23 @@ func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
}
func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool {
if at == nil && len(*remoteWriteMultitenantURLs) > 0 {
// Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set.
tss := wr.Timeseries
if at == nil && MultitenancyEnabled() {
// Write data to default tenant if at isn't set when multitenancy is enabled.
at = defaultAuthToken
}
var tenantRctx *relabelCtx
var rwctxs []*remoteWriteCtx
if at == nil {
rwctxs = rwctxsDefault
} else if len(*remoteWriteMultitenantURLs) == 0 {
// Convert at to (vm_account_id, vm_project_id) labels.
tenantRctx = getRelabelCtx()
defer putRelabelCtx(tenantRctx)
rwctxs = rwctxsDefault
} else {
if len(*remoteWriteMultitenantURLs) == 0 {
logger.Panicf("BUG: -remoteWrite.multitenantURL command-line flag must be set when __tenant_id__=%q label is set", at)
}
rwctxsMapLock.Lock()
tenantID := tenantmetrics.TenantID{
AccountID: at.AccountID,
@ -408,7 +418,6 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur
rwctxsMapLock.Unlock()
}
tss := wr.Timeseries
rowsCount := getRowsCount(tss)
if *disableOnDiskQueue {
@ -433,10 +442,7 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur
pcsGlobal := rcs.global
if pcsGlobal.Len() > 0 {
rctx = getRelabelCtx()
defer func() {
rctx.reset()
putRelabelCtx(rctx)
}()
defer putRelabelCtx(rctx)
}
globalRowsPushedBeforeRelabel.Add(rowsCount)
maxSamplesPerBlock := *maxRowsPerBlock
@ -463,6 +469,9 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur
} else {
tss = nil
}
if tenantRctx != nil {
tenantRctx.tenantToLabels(tssBlock, at.AccountID, at.ProjectID)
}
if rctx != nil {
rowsCountBeforeRelabel := getRowsCount(tssBlock)
tssBlock = rctx.applyRelabeling(tssBlock, pcsGlobal)
@ -482,9 +491,6 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur
}
return false
}
if rctx != nil {
rctx.reset()
}
}
return true
}

View file

@ -31,6 +31,7 @@ The sandbox cluster installation is running under the constant load generated by
* SECURITY: upgrade base docker image (Alpine) from 3.18.4 to 3.18.5. See [alpine 3.18.5 release notes](https://www.alpinelinux.org/posts/Alpine-3.15.11-3.16.8-3.17.6-3.18.5-released.html).
* FEATURE: `vmselect`: allow opening [vmui](https://docs.victoriametrics.com/#vmui) and investigating [Top queries](https://docs.victoriametrics.com/#top-queries) and [Active queries](https://docs.victoriametrics.com/#active-queries) when the `vmselect` is overloaded with concurrent queries (e.g. when more than `-search.maxConcurrentRequests` concurrent queries are executed). Previously an attempt to open `Top queries` or `Active queries` at `vmui` could result in `couldn't start executing the request in ... seconds, since -search.maxConcurrentRequests=... concurrent requests are executed` error, which could complicate debugging of overloaded `vmselect` or single-node VictoriaMetrics.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-enableMultitenantHandlers` command-line flag, which allows receiving data via [VictoriaMetrics cluster urls](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) at `vmagent` and converting [tenant ids](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) to (`vm_account_id`, `vm_project_id`) labels before sending the data to the configured `-remoteWrite.url`. See [these docs](https://docs.victoriametrics.com/vmagent.html#multitenancy) for details.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.disableOnDiskQueue` command-line flag, which can be used for disabling data queueing to disk when the remote storage cannot keep up with the data ingestion rate. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for reading and writing samples via [Google PubSub](https://cloud.google.com/pubsub). See [these docs](https://docs.victoriametrics.com/vmagent.html#google-pubsub-integration).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for Datadog `/api/v2/series` and `/api/beta/sketches` ingestion protocols to vmagent/vminsert components. See this [doc](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) for examples. Thanks to @AndrewChubatiuk for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5094).

View file

@ -250,41 +250,31 @@ by specifying `-remoteWrite.forcePromProto` command-line flag for the correspond
## Multitenancy
By default `vmagent` collects the data without tenant identifiers and routes it to the configured `-remoteWrite.url`.
By default `vmagent` collects the data without [tenant](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) identifiers
and routes it to the remote storage specified via `-remoteWrite.url` command-line flag. The `-remoteWrite.url` can point to `/insert/<tenant_id>/prometheus/api/v1/write` path
at `vminsert` according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format). In this case all the metrics are written to the given `<tenant_id>` tenant.
[VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) supports writing data to multiple tenants
specified via special labels - see [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels).
This allows specifying tenant ids via [relabeling](#relabeling) and writing multitenant data
to a single `-remoteWrite.url=http://<vminsert-addr>/insert/multitenant/prometheus/api/v1/write`.
The easiest way to write data to multiple distinct tenants is to specify the needed tenants via `vm_account_id` and `vm_project_id` labels
and then to push metrics with these labels to [multitenant url at VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels).
The `vm_account_id` and `vm_project_id` labels can be specified via [relabeling](#relabeling) before sending the metrics to `-remoteWrite.url`.
`vmagent` can accept data from the same multitenant endpoints as `vminsert` from [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html)
does according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) and route the accepted data
to the corresponding [tenants](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) in VictoriaMetrics cluster
pointed by the `-remoteWrite.multitenantURL` command-line flag. For example, if `-remoteWrite.multitenantURL` is set to `http://vminsert-service`,
then `vmagent` would accept multitenant data at `http://vmagent:8429/insert/<accountID>/...` endpoints in the same way
as [VictoriaMetrics cluster does](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) and route
it to `http://vminsert-service/insert/<accountID>/prometheus/api/v1/write`.
If multiple `-remoteWrite.multitenantURL` command-line options are set, then `vmagent` replicates the collected data across all the configured urls.
This allows using a single `vmagent` instance in front of multiple VictoriaMetrics clusters.
If `-remoteWrite.multitenantURL` command-line flag is set and `vmagent` is configured to scrape Prometheus-compatible targets
(e.g. if `-promscrape.config` command-line flag is set) then `vmagent` reads tenantID from `__tenant_id__` label
for the discovered targets and routes all the metrics from this target to the given `__tenant_id__`,
e.g. to the url `<-remoteWrite.multitenantURL>/insert/<__tenant_id__>/prometheus/api/v1/write`.
For example, the following relabeling rule instructs sending metrics to tenantID defined in the `prometheus.io/tenant` annotation of Kubernetes pod deployment:
For example, the following relabeling rule instructs sending metrics to `<account_id>:0` [tenant](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy)
defined in the `prometheus.io/account_id` annotation of Kubernetes pod deployment:
```yaml
scrape_configs:
- kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_tenant]
target_label: __tenant_id__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_account_id]
target_label: vm_account_id
```
If the target has no associated `__tenant_id__` label, then its' metrics are routed to zero tenantID, e.g. to `<-remoteWrite.multitenantURL>/insert/0/prometheus/api/v1/write`.
`vmagent` can accept data via the same multitenant endpoints as `vminsert` at [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html)
does according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) if `-enableMultitenantHandlers` command-line flag is set.
In this case it automatically converts tenant identifiers to `vm_account_id` and `vm_project_id` labels before applying [relabeling](#relabeling) specified via `-remoteWrite.relabelConfig`
and `-remoteWrite.urlRelabelConfig` command-line flags. Metrics with `vm_account_id` and `vm_project_id` labels can be routed to the corresponding tenants
when specifying `-remoteWrite.url` to [multitenant url at VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels).
## How to collect metrics in Prometheus format
@ -1190,6 +1180,8 @@ with the following config:
In this case it may be useful to disable on-disk data persistence in order to prevent from unbounded growth of the on-disk queue.
See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence).
See also [how to write metrics to multiple distinct tenants](https://docs.victoriametrics.com/vmagent.html#multitenancy).
#### Consume metrics from multiple topics
`vmagent` can read messages from different topics in different formats. For example, the following command starts `vmagent`, which reads plaintext
@ -1322,6 +1314,8 @@ data_format = "influx"
In this case it may be useful to disable on-disk data persistence in order to prevent from unbounded growth of the on-disk queue.
See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence).
See also [how to write metrics to multiple distinct tenants](https://docs.victoriametrics.com/vmagent.html#multitenancy).
#### Command-line flags for Kafka consumer
These command-line flags are available only in [enterprise](https://docs.victoriametrics.com/enterprise.html) version of `vmagent`,