From ceadcb7f8e276914f923de5dc46407be680aca67 Mon Sep 17 00:00:00 2001 From: Gard Rimestad Date: Mon, 2 May 2022 21:20:05 +0200 Subject: [PATCH] app/vmagent add metric for rate limit (#2521) This adds a metric for the rate limit. The limit is present as a flag currently: `flag{name="remoteWrite.rateLimit", value="500000", is_set="true"} 1` We are running many instances of vmagent and when creating alerts it is harder than it needs to be when extracting the value from the flag. With this change it should be easier to monitor how close to the limit we are. `((100/vmagent_remotewrite_rate_limit{account="account"})*sum (rate(vmagent_remotewrite_conn_bytes_written_total{account="account"}))) and ON (account) flag{name="remoteWrite.rateLimit"} == 1` --- app/vmagent/remotewrite/client.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/vmagent/remotewrite/client.go b/app/vmagent/remotewrite/client.go index d265ac0ff7..30cbacbedf 100644 --- a/app/vmagent/remotewrite/client.go +++ b/app/vmagent/remotewrite/client.go @@ -78,6 +78,7 @@ type client struct { requestsOKCount *metrics.Counter errorsCount *metrics.Counter packetsDropped *metrics.Counter + rateLimit *metrics.Gauge retriesCount *metrics.Counter sendDuration *metrics.FloatCounter @@ -135,6 +136,9 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) { c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL)) c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL)) + c.rateLimit = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_rate_limit{url=%q}`, c.sanitizedURL), func() float64 { + return float64(rateLimit.GetOptionalArgOrDefault(argIdx, 0)) + }) c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vmagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL)) c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.sanitizedURL)) c.errorsCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_errors_total{url=%q}`, c.sanitizedURL))