From 9e49a9e9241650b4760e8d05cc6829de0b84a672 Mon Sep 17 00:00:00 2001 From: Haleygo Date: Mon, 26 Jun 2023 13:34:51 +0800 Subject: [PATCH] vmalert: add `vmalert_remotewrite_sent_duration_seconds_total` metric (#4517) add `vmalert_remotewrite_sent_duration_seconds_total` metric --- app/vmalert/remotewrite/remotewrite.go | 6 ++++++ docs/CHANGELOG.md | 1 + 2 files changed, 7 insertions(+) diff --git a/app/vmalert/remotewrite/remotewrite.go b/app/vmalert/remotewrite/remotewrite.go index 160cfd590..8d4f935c1 100644 --- a/app/vmalert/remotewrite/remotewrite.go +++ b/app/vmalert/remotewrite/remotewrite.go @@ -183,9 +183,14 @@ func (c *Client) run(ctx context.Context) { var ( sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`) sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`) + sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`) droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`) droppedBytes = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`) bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`) + + _ = metrics.NewGauge(`vmalert_remotewrite_concurrency`, func() float64 { + return float64(*concurrency) + }) ) // flush is a blocking function that marshals WriteRequest and sends @@ -211,6 +216,7 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) { retryInterval = maxRetryInterval } timeStart := time.Now() + defer sendDuration.Add(time.Since(timeStart).Seconds()) L: for attempts := 0; ; attempts++ { err := c.send(ctx, b) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 570812e0d..44aa0b0cf 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -33,6 +33,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: [Official Grafana dashboards for VictoriaMetrics](https://grafana.com/orgs/victoriametrics): add panel for tracking rate of syscalls while writing or reading from disk via `process_io_(read|write)_syscalls_total` metrics. * FEATURE: accept timestamps in milliseconds at `start`, `end` and `time` query args in [Prometheus querying API](https://docs.victoriametrics.com/#prometheus-querying-api-usage). See [these docs](https://docs.victoriametrics.com/#timestamp-formats) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4459). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): update retry policy for pushing data to `-remoteWrite.url`. By default, vmalert will make multiple retry attempts with exponential delay. The total time spent during retry attempts shouldn't exceed `-remoteWrite.retryMaxTime` (default is 30s). When retry time is exceeded vmalert drops the data dedicated for `-remoteWrite.url`. Before, vmalert dropped data after 5 retry attempts with 1s delay between attempts (not configurable). See `-remoteWrite.retryMinInterval` and `-remoteWrite.retryMaxTime` cmd-line flags. +* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): expose `vmalert_remotewrite_send_duration_seconds_total` counter, which can be used for determining high saturation of every connection to remote storage with an alerting query `sum(rate(vmalert_remotewrite_send_duration_seconds_total[5m])) by(job, instance) > 0.9 * max(vmalert_remotewrite_concurrency) by(job, instance)`. This query triggers when a connection is saturated by more than 90%. This usually means that `-remoteWrite.concurrency` command-line flag must be increased in order to increase the number of concurrent writings into remote endpoint. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4516). * FEATURE: add ability to fine-tune Graphite API limits via the following command-line flags: `-search.maxGraphiteTagKeys` for limiting the number of tag keys returned from [Graphite API for tags](https://docs.victoriametrics.com/#graphite-tags-api-usage) `-search.maxGraphiteTagValues` for limiting the number of tag values returned from [Graphite API for tag values](https://docs.victoriametrics.com/#graphite-tags-api-usage)