From d0706c8c953abe86723eede6dcc6e6e1e78f8e7d Mon Sep 17 00:00:00 2001 From: Gard Rimestad Date: Mon, 2 May 2022 21:20:05 +0200 Subject: [PATCH] app/vmagent add metric for rate limit (#2521) This adds a metric for the rate limit. The limit is present as a flag currently: `flag{name="remoteWrite.rateLimit", value="500000", is_set="true"} 1` We are running many instances of vmagent and when creating alerts it is harder than it needs to be when extracting the value from the flag. With this change it should be easier to monitor how close to the limit we are. `((100/vmagent_remotewrite_rate_limit{account="account"})*sum (rate(vmagent_remotewrite_conn_bytes_written_total{account="account"}))) and ON (account) flag{name="remoteWrite.rateLimit"} == 1` --- app/vmagent/remotewrite/client.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/vmagent/remotewrite/client.go b/app/vmagent/remotewrite/client.go index d265ac0ff..30cbacbed 100644 --- a/app/vmagent/remotewrite/client.go +++ b/app/vmagent/remotewrite/client.go @@ -78,6 +78,7 @@ type client struct { requestsOKCount *metrics.Counter errorsCount *metrics.Counter packetsDropped *metrics.Counter + rateLimit *metrics.Gauge retriesCount *metrics.Counter sendDuration *metrics.FloatCounter @@ -135,6 +136,9 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) { c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL)) c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL)) + c.rateLimit = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_rate_limit{url=%q}`, c.sanitizedURL), func() float64 { + return float64(rateLimit.GetOptionalArgOrDefault(argIdx, 0)) + }) c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL)) c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.sanitizedURL)) c.errorsCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_errors_total{url=%q}`, c.sanitizedURL))