From d8de26bbfd549937c839010f14d0060f93da1a8f Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Thu, 23 Sep 2021 14:47:20 +0300
Subject: [PATCH] lib/promscrape: add
 `vm_promscrape_max_scrape_size_exceeded_errors_total` metric for counting of
 the failed scrapes due to the exceeded response size

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1639
---
 docs/CHANGELOG.md        |  2 ++
 lib/promscrape/client.go | 13 ++++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index fcfcd832ed..6373cbe806 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -6,6 +6,8 @@ sort: 15
 
 ## tip
 
+* FEATURE: vmagent: add `vm_promscrape_max_scrape_size_exceeded_errors_total` metric for counting of the failed scrapes due to the exceeded response size (the response size limit can be configured via `-promscrape.maxScrapeSize` command-line flag). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1639).
+
 
 ## [v1.66.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.66.1)
 
diff --git a/lib/promscrape/client.go b/lib/promscrape/client.go
index 3d62f1d094..a2ad7e4d6f 100644
--- a/lib/promscrape/client.go
+++ b/lib/promscrape/client.go
@@ -259,6 +259,7 @@ func (c *client) ReadData(dst []byte) ([]byte, error) {
 			return dst, fmt.Errorf("error when scraping %q with timeout %s: %w", c.scrapeURL, c.hc.ReadTimeout, err)
 		}
 		if err == fasthttp.ErrBodyTooLarge {
+			maxScrapeSizeExceeded.Inc()
 			return dst, fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d; "+
 				"either reduce the response size for the target or increase -promscrape.maxScrapeSize", c.scrapeURL, maxScrapeSize.N)
 		}
@@ -296,11 +297,12 @@ func (c *client) ReadData(dst []byte) ([]byte, error) {
 var gunzipBufPool bytesutil.ByteBufferPool
 
 var (
-	scrapesTimedout     = metrics.NewCounter(`vm_promscrape_scrapes_timed_out_total`)
-	scrapesOK           = metrics.NewCounter(`vm_promscrape_scrapes_total{status_code="200"}`)
-	scrapesGunzipped    = metrics.NewCounter(`vm_promscrape_scrapes_gunziped_total`)
-	scrapesGunzipFailed = metrics.NewCounter(`vm_promscrape_scrapes_gunzip_failed_total`)
-	scrapeRetries       = metrics.NewCounter(`vm_promscrape_scrape_retries_total`)
+	maxScrapeSizeExceeded = metrics.NewCounter(`vm_promscrape_max_scrape_size_exceeded_errors_total`)
+	scrapesTimedout       = metrics.NewCounter(`vm_promscrape_scrapes_timed_out_total`)
+	scrapesOK             = metrics.NewCounter(`vm_promscrape_scrapes_total{status_code="200"}`)
+	scrapesGunzipped      = metrics.NewCounter(`vm_promscrape_scrapes_gunziped_total`)
+	scrapesGunzipFailed   = metrics.NewCounter(`vm_promscrape_scrapes_gunzip_failed_total`)
+	scrapeRetries         = metrics.NewCounter(`vm_promscrape_scrape_retries_total`)
 )
 
 func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error {
@@ -341,6 +343,7 @@ func (sr *streamReader) Read(p []byte) (int, error) {
 	n, err := sr.r.Read(p)
 	sr.bytesRead += int64(n)
 	if err == nil && sr.bytesRead > sr.maxBodySize {
+		maxScrapeSizeExceeded.Inc()
 		err = fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d; "+
 			"either reduce the response size for the target or increase -promscrape.maxScrapeSize", sr.scrapeURL, sr.maxBodySize)
 	}