From 166b97b8d03e80830befd6512b872a3c1c28bc15 Mon Sep 17 00:00:00 2001 From: Eugene Ma Date: Fri, 29 Mar 2024 05:26:02 -0700 Subject: [PATCH] vmagent: support sharding by excluded labels (#5938) To horizontally scale streaming aggregation, you might want to deploy a separate hashing tier of vmagents that route to a separate aggregation tier. The hashing tier should shard by all labels except the instance-level labels, to ensure the input metrics are routed correctly to the aggregator instance responsible for those labels. For this to achieve we introduce `remoteWrite.shardByURL.inverseLabels` flag to inverse logic of `remoteWrite.shardByURL.labels` --------- Co-authored-by: Eugene Ma Co-authored-by: Roman Khavronenko --- app/vmagent/remotewrite/remotewrite.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/app/vmagent/remotewrite/remotewrite.go b/app/vmagent/remotewrite/remotewrite.go index ed8f56ff4..cdd7de0dd 100644 --- a/app/vmagent/remotewrite/remotewrite.go +++ b/app/vmagent/remotewrite/remotewrite.go @@ -51,7 +51,8 @@ var ( shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+ "among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+ "even distribution of series over the specified -remoteWrite.url systems") - tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+ + shardByURLInverseLabels = flag.Bool("remoteWrite.shardByURL.inverseLabels", false, "Inverse the behavior of remoteWrite.shardByURL.labels so that series are sharded using all labels except the ones specified in remoteWrite.shardByURL.labels.") + tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+ "See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue") keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+ "Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.") @@ -537,12 +538,14 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar // Shard the data among rwctxs tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs)) tmpLabels := promutils.GetLabels() + inverseLabels := *shardByURLInverseLabels for _, ts := range tssBlock { hashLabels := ts.Labels if len(shardByURLLabelsMap) > 0 { hashLabels = tmpLabels.Labels[:0] for _, label := range ts.Labels { - if _, ok := shardByURLLabelsMap[label.Name]; ok { + _, ok := shardByURLLabelsMap[label.Name] + if ok && !inverseLabels || !ok && inverseLabels { hashLabels = append(hashLabels, label) } }