From a47127c1a633aa80ffc3ab23764a7159c954a7e7 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Sun, 14 Jan 2024 22:52:47 +0200
Subject: [PATCH] app/vmalert/remotewrite: properly calculate
 vmalert_remotewrite_dropped_rows_total

It was calculating the number of dropped time series instead of the number of dropped samples.

While at it, drop vmalert_remotewrite_dropped_bytes_total metric, since it was inconsistently calculated -
at one place it was calculating raw protobuf-encoded sample sizes, while at another place it was calculating
the size of snappy-compressed prompbmarshal.WriteRequest protobuf message.
Additionally, this metric has zero practical sense, so just drop it in order to reduce the level of confusion.
---
 app/vmalert/remotewrite/client.go | 10 +++++-----
 docs/CHANGELOG.md                 |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/app/vmalert/remotewrite/client.go b/app/vmalert/remotewrite/client.go
index 6c012a350d..ac3dffa652 100644
--- a/app/vmalert/remotewrite/client.go
+++ b/app/vmalert/remotewrite/client.go
@@ -123,14 +123,12 @@ func (c *Client) Push(s prompbmarshal.TimeSeries) error {
 	case <-c.doneCh:
 		rwErrors.Inc()
 		droppedRows.Add(len(s.Samples))
-		droppedBytes.Add(s.Size())
 		return fmt.Errorf("client is closed")
 	case c.input <- s:
 		return nil
 	default:
 		rwErrors.Inc()
 		droppedRows.Add(len(s.Samples))
-		droppedBytes.Add(s.Size())
 		return fmt.Errorf("failed to push timeseries - queue is full (%d entries). "+
 			"Queue size is controlled by -remoteWrite.maxQueueSize flag",
 			c.maxQueueSize)
@@ -195,7 +193,6 @@ var (
 	sentRows            = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
 	sentBytes           = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
 	droppedRows         = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
-	droppedBytes        = metrics.NewCounter(`vmalert_remotewrite_dropped_bytes_total`)
 	sendDuration        = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
 	bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
 
@@ -276,8 +273,11 @@ L:
 	}
 
 	rwErrors.Inc()
-	droppedRows.Add(len(wr.Timeseries))
-	droppedBytes.Add(len(b))
+	rows := 0
+	for _, ts := range wr.Timeseries {
+		rows += len(ts.Samples)
+	}
+	droppedRows.Add(rows)
 	logger.Errorf("attempts to send remote-write request failed - dropping %d time series",
 		len(wr.Timeseries))
 }
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 15bb378d37..aaa01c9d20 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -180,7 +180,7 @@ Released at 2023-11-15
 * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): do not send requests to configured remote systems when `-datasource.*`, `-remoteWrite.*`, `-remoteRead.*` or `-notifier.*` command-line flags refer files with invalid auth configs. Previously such requests were sent without properly set auth headers. Now the requests are sent only after the files are updated with valid auth configs. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5153).
 * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly maintain alerts state in [replay mode](https://docs.victoriametrics.com/vmalert.html#rules-backfilling) if alert's `for` param was bigger than replay request range (usually a couple of hours). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5186) for details.
 * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): increment `vmalert_remotewrite_errors_total` metric if all retries to send remote-write request failed. Before, this metric was incremented only if remote-write client's buffer is overloaded.
-* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): increment `vmalert_remotewrite_dropped_rows_total` and `vmalert_remotewrite_dropped_bytes_total` metrics if remote-write client's buffer is overloaded. Before, these metrics were incremented only after unsuccessful HTTP calls.
+* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): increment `vmalert_remotewrite_dropped_rows_total` metric if remote-write client's buffer is overloaded. Before, these metrics were incremented only after unsuccessful HTTP calls.
 * BUGFIX: `vmselect`: improve performance and memory usage during query processing on machines with big number of CPU cores. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5087).
 * BUGFIX: dashboards: fix vminsert/vmstorage/vmselect metrics filtering when dashboard is used to display data from many sub-clusters with unique job names. Before, only one specific job could have been accounted for component-specific panels, instead of all available jobs for the component.
 * BUGFIX: dashboards: respect `job` and `instance` filters for `alerts` annotation in cluster and single-node dashboards.