app/vmagent: add max_scrape_size to scrape config (#6434)

Related to https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6429 ### Checklist The following checks are **mandatory**: - [ ] My change adheres [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/contributing/). --------- Signed-off-by: hagen1778 <roman@victoriametrics.com> Co-authored-by: hagen1778 <roman@victoriametrics.com>
2024-11-21 14:44:00 +00:00 · 2024-06-20 14:58:42 +03:00 · 2024-06-20 14:58:42 +03:00 · 1e83598be3
commit 1e83598be3
parent bc37b279aa
12 changed files with 770 additions and 661 deletions
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -40,6 +40,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
 * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): add `idleConnTimeout` flag set to 50s by default. It should reduce the probability of `broken pipe` or `connection reset by peer` errors in vmauth logs.
 * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): add auto request retry for trivial network errors, such as `broken pipe` and `connection reset` for requests to the configured backends.
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): increase default value of `-promscrape.maxDroppedTargets` command-line flag to 10_000 from 1000. This makes it easier to track down large number of dropped targets.
+* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `max_scrape_size` parameter to a scrape config for setting a custom scrape limit for a job. The new [automatically generated metric](https://docs.victoriametrics.com/vmagent/#automatically-generated-metrics) `scrape_response_size_bytes` was added to reflect the response size of the target.
 * FEATURE: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/): check for ranged vector arguments in non-rollup expressions when `-search.disableImplicitConversion` or `-search.logImplicitConversion` are enabled. For example, `sum(up[5m])` or `absent(up[5m])` will fail to execute if these flags are set.
 * FEATURE: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/): validate that rollup expressions has ranged vector arguments passed when `-search.disableImplicitConversion` or `-search.logImplicitConversion` are enabled. For example, `rate(metric)` or `count_over_time(metric)` will fail to execute if these flags are set.
 * FEATURE: [vmalert-tool](https://docs.victoriametrics.com/vmalert-tool/): support file path with hierarchical patterns and regexpes, and http url in unittest cmd-line flag `-files`, e.g. `-files="http://<some-server-addr>/path/to/rules"` or `-files="dir/**/*.yaml"`.
--- a/docs/sd_configs.md
+++ b/docs/sd_configs.md
@ -1692,6 +1692,14 @@ scrape_configs:
  #
  # scrape_timeout: <duration>

+  # max_scrape_size is an optional parameter for limiting the response size in bytes from scraped targets.
+  # By default, uses limit from -promscrape.maxScrapeSize command-line flag.
+  # Example values:
+  # - "10MiB" - 10 * 1024 * 1024 bytes
+  # - "100MB" - 100 * 1000 * 1000 bytes
+  #
+  # max_scrape_size: <size>
+
  # metrics_path is the path to fetch metrics from targets.
  # By default, metrics are fetched from "/metrics" path.
  #
--- a/docs/vmagent.md
+++ b/docs/vmagent.md
@ -486,6 +486,14 @@ and attaches `instance`, `job` and other target-specific labels to these metrics
  scrape_duration_seconds > 1.5
  ```

+* `scrape_response_size_bytes` - response size in bytes for the given target. This allows to monitor amount of data scraped
+  and to adjust `max_scrape_size` for scraped targets. For example, the following [MetricsQL query](https://docs.victoriametrics.com/metricsql/)
+  returns targets with scrape response > 10MiB:
+
+  ```metricsql
+  max_scrape_size > 10MiB
+  ```
+
 * `scrape_timeout_seconds` - the configured timeout for the current scrape target (aka `scrape_timeout`).
  This allows detecting targets with scrape durations close to the configured scrape timeout.
  For example, the following [MetricsQL query](https://docs.victoriametrics.com/metricsql/) returns targets (identified by `instance` label),
--- a/lib/flagutil/bytes.go
+++ b/lib/flagutil/bytes.go
@ -53,79 +53,77 @@ func (b *Bytes) Set(value string) error {
 		return nil
 	}
 	value = normalizeBytesString(value)
+	n, err := parseBytes(value)
+	if err != nil {
+		return err
+	}
+	b.N = n
+	b.valueString = value
+	return nil
+}
+
+// ParseBytes returns int64 in bytes of parsed string with unit suffix
+func ParseBytes(value string) (int64, error) {
+	value = normalizeBytesString(value)
+	return parseBytes(value)
+}
+
+func parseBytes(value string) (int64, error) {
 	switch {
 	case strings.HasSuffix(value, "KB"):
 		f, err := strconv.ParseFloat(value[:len(value)-2], 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f * 1000)
-		b.valueString = value
-		return nil
+		return int64(f * 1000), nil
 	case strings.HasSuffix(value, "MB"):
 		f, err := strconv.ParseFloat(value[:len(value)-2], 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f * 1000 * 1000)
-		b.valueString = value
-		return nil
+		return int64(f * 1000 * 1000), nil
 	case strings.HasSuffix(value, "GB"):
 		f, err := strconv.ParseFloat(value[:len(value)-2], 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f * 1000 * 1000 * 1000)
-		b.valueString = value
-		return nil
+		return int64(f * 1000 * 1000 * 1000), nil
 	case strings.HasSuffix(value, "TB"):
 		f, err := strconv.ParseFloat(value[:len(value)-2], 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f * 1000 * 1000 * 1000 * 1000)
-		b.valueString = value
-		return nil
+		return int64(f * 1000 * 1000 * 1000 * 1000), nil
 	case strings.HasSuffix(value, "KiB"):
 		f, err := strconv.ParseFloat(value[:len(value)-3], 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f * 1024)
-		b.valueString = value
-		return nil
+		return int64(f * 1024), nil
 	case strings.HasSuffix(value, "MiB"):
 		f, err := strconv.ParseFloat(value[:len(value)-3], 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f * 1024 * 1024)
-		b.valueString = value
-		return nil
+		return int64(f * 1024 * 1024), nil
 	case strings.HasSuffix(value, "GiB"):
 		f, err := strconv.ParseFloat(value[:len(value)-3], 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f * 1024 * 1024 * 1024)
-		b.valueString = value
-		return nil
+		return int64(f * 1024 * 1024 * 1024), nil
 	case strings.HasSuffix(value, "TiB"):
 		f, err := strconv.ParseFloat(value[:len(value)-3], 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f * 1024 * 1024 * 1024 * 1024)
-		b.valueString = value
-		return nil
+		return int64(f * 1024 * 1024 * 1024 * 1024), nil
 	default:
 		f, err := strconv.ParseFloat(value, 64)
 		if err != nil {
-			return err
+			return 0, err
 		}
-		b.N = int64(f)
-		b.valueString = value
-		return nil
+		return int64(f), nil
 	}
 }

--- a/lib/promscrape/client.go
+++ b/lib/promscrape/client.go
@ -18,8 +18,6 @@ import (
 )

 var (
-	maxScrapeSize = flagutil.NewBytes("promscrape.maxScrapeSize", 16*1024*1024, "The maximum size of scrape response in bytes to process from Prometheus targets. "+
-		"Bigger responses are rejected")
 	maxResponseHeadersSize = flagutil.NewBytes("promscrape.maxResponseHeadersSize", 4096, "The maximum size of http response headers from Prometheus scrape targets")
 	disableCompression     = flag.Bool("promscrape.disableCompression", false, "Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. "+
 		"This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. "+
@ -41,6 +39,7 @@ type client struct {
 	scrapeTimeoutSecondsStr string
 	setHeaders              func(req *http.Request) error
 	setProxyHeaders         func(req *http.Request) error
+	maxScrapeSize           int64
 }

 func newClient(ctx context.Context, sw *ScrapeWork) (*client, error) {
@ -91,6 +90,7 @@ func newClient(ctx context.Context, sw *ScrapeWork) (*client, error) {
 		scrapeTimeoutSecondsStr: fmt.Sprintf("%.3f", sw.ScrapeTimeout.Seconds()),
 		setHeaders:              setHeaders,
 		setProxyHeaders:         setProxyHeaders,
+		maxScrapeSize:           sw.MaxScrapeSize,
 	}
 	return c, nil
 }
@ -149,7 +149,7 @@ func (c *client) ReadData(dst *bytesutil.ByteBuffer) error {
 	// Read the data from resp.Body
 	r := &io.LimitedReader{
 		R: resp.Body,
-		N: maxScrapeSize.N,
+		N: c.maxScrapeSize,
 	}
 	_, err = dst.ReadFrom(r)
 	_ = resp.Body.Close()
@ -160,10 +160,11 @@ func (c *client) ReadData(dst *bytesutil.ByteBuffer) error {
 		}
 		return fmt.Errorf("cannot read data from %s: %w", c.scrapeURL, err)
 	}
-	if int64(len(dst.B)) >= maxScrapeSize.N {
+	if int64(len(dst.B)) >= c.maxScrapeSize {
 		maxScrapeSizeExceeded.Inc()
-		return fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d; "+
-			"either reduce the response size for the target or increase -promscrape.maxScrapeSize command-line flag value", c.scrapeURL, maxScrapeSize.N)
+		return fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d or max_scrape_size in a scrape config. "+
+			"Possible solutions are: reduce the response size for the target, increase -promscrape.maxScrapeSize command-line flag, "+
+			"increase max_scrape_size value in scrape config", c.scrapeURL, maxScrapeSize.N)
 	}
 	return nil
 }
--- a/lib/promscrape/config.go
+++ b/lib/promscrape/config.go
@ -16,6 +16,7 @@ import (
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
@ -76,6 +77,8 @@ var (
 	clusterName = flag.String("promscrape.cluster.name", "", "Optional name of the cluster. If multiple vmagent clusters scrape the same targets, "+
 		"then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. "+
 		"See https://docs.victoriametrics.com/vmagent/#scraping-big-number-of-targets for more info")
+	maxScrapeSize = flagutil.NewBytes("promscrape.maxScrapeSize", 16*1024*1024, "The maximum size of scrape response in bytes to process from Prometheus targets. "+
+		"Bigger responses are rejected")
 )

 var clusterMemberID int
@ -269,6 +272,7 @@ type ScrapeConfig struct {
 	JobName        string              `yaml:"job_name"`
 	ScrapeInterval *promutils.Duration `yaml:"scrape_interval,omitempty"`
 	ScrapeTimeout  *promutils.Duration `yaml:"scrape_timeout,omitempty"`
+	MaxScrapeSize  string              `yaml:"max_scrape_size,omitempty"`
 	MetricsPath    string              `yaml:"metrics_path,omitempty"`
 	HonorLabels    bool                `yaml:"honor_labels,omitempty"`

@ -845,6 +849,14 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
 		// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1281#issuecomment-840538907
 		scrapeTimeout = scrapeInterval
 	}
+	var err error
+	mss := maxScrapeSize.N
+	if len(sc.MaxScrapeSize) > 0 {
+		mss, err = flagutil.ParseBytes(sc.MaxScrapeSize)
+		if err != nil {
+			return nil, fmt.Errorf("unexpected `max_scrape_size` value %q for `job_name` %q`: %w", sc.MaxScrapeSize, jobName, err)
+		}
+	}
 	honorLabels := sc.HonorLabels
 	honorTimestamps := sc.HonorTimestamps
 	denyRedirects := false
@ -897,6 +909,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
 		scrapeIntervalString: scrapeInterval.String(),
 		scrapeTimeout:        scrapeTimeout,
 		scrapeTimeoutString:  scrapeTimeout.String(),
+		maxScrapeSize:        mss,
 		jobName:              jobName,
 		metricsPath:          metricsPath,
 		scheme:               scheme,
@ -927,6 +940,7 @@ type scrapeWorkConfig struct {
 	scrapeIntervalString string
 	scrapeTimeout        time.Duration
 	scrapeTimeoutString  string
+	maxScrapeSize        int64
 	jobName              string
 	metricsPath          string
 	scheme               string
@ -1201,6 +1215,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
 		ScrapeURL:            scrapeURL,
 		ScrapeInterval:       scrapeInterval,
 		ScrapeTimeout:        scrapeTimeout,
+		MaxScrapeSize:        swc.maxScrapeSize,
 		HonorLabels:          swc.honorLabels,
 		HonorTimestamps:      swc.honorTimestamps,
 		DenyRedirects:        swc.denyRedirects,
--- a/lib/promscrape/config_test.go
+++ b/lib/promscrape/config_test.go
@ -227,6 +227,7 @@ scrape_configs:
 			ScrapeURL:      "http://host1:80/metric/path1?x=y",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "host1:80",
 				"job":      "abc",
@ -237,6 +238,7 @@ scrape_configs:
 			ScrapeURL:      "https://host2:443/metric/path2?x=y",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "host2:443",
 				"job":      "abc",
@ -247,6 +249,7 @@ scrape_configs:
 			ScrapeURL:      "http://host3:1234/metric/path3?arg1=value1&x=y",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "host3:1234",
 				"job":      "abc",
@ -257,6 +260,7 @@ scrape_configs:
 			ScrapeURL:      "https://host4:1234/foo/bar?x=y",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "host4:1234",
 				"job":      "abc",
@ -295,6 +299,7 @@ scrape_configs:
 		ScrapeURL:      "http://black:9115/probe?module=dns_udp_example&target=8.8.8.8",
 		ScrapeInterval: defaultScrapeInterval,
 		ScrapeTimeout:  defaultScrapeTimeout,
+		MaxScrapeSize:  maxScrapeSize.N,
 		Labels: promutils.NewLabelsFromMap(map[string]string{
 			"instance": "8.8.8.8",
 			"job":      "blackbox",
@ -457,6 +462,7 @@ scrape_configs:
 			ScrapeURL:      "http://host1:80/abc/de",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "host1:80",
 				"job":      "foo",
@ -468,6 +474,7 @@ scrape_configs:
 			ScrapeURL:      "http://host2:80/abc/de",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "host2:80",
 				"job":      "foo",
@ -479,6 +486,7 @@ scrape_configs:
 			ScrapeURL:      "http://localhost:9090/abc/de",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "localhost:9090",
 				"job":      "foo",
@ -685,6 +693,7 @@ scrape_configs:
 			ScrapeURL:      "http://s:80/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "s:80",
 				"job":      "aa",
@ -706,6 +715,7 @@ scrape_configs:
 			ScrapeURL:      "http://s:80/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "s:80",
 				"job":      "aa",
@ -727,6 +737,7 @@ scrape_configs:
 			ScrapeURL:      "http://s:80/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "s:80",
 				"job":      "aa",
@ -748,6 +759,7 @@ scrape_configs:
 			ScrapeURL:      "http://s:80/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "s:80",
 				"job":      "aa",
@ -766,6 +778,7 @@ scrape_configs:
 			ScrapeURL:      "http://foo.bar:1234/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
 				"job":      "foo",
@ -787,6 +800,7 @@ scrape_configs:
 			ScrapeURL:      "http://foo.bar:1234/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
 				"job":      "foo",
@ -834,6 +848,7 @@ scrape_configs:
 			ScrapeURL:       "https://foo.bar:443/foo/bar?p=x%26y&p=%3D",
 			ScrapeInterval:  54 * time.Second,
 			ScrapeTimeout:   5 * time.Second,
+			MaxScrapeSize:   maxScrapeSize.N,
 			HonorLabels:     true,
 			HonorTimestamps: true,
 			DenyRedirects:   true,
@ -849,6 +864,7 @@ scrape_configs:
 			ScrapeURL:       "https://aaa:443/foo/bar?p=x%26y&p=%3D",
 			ScrapeInterval:  54 * time.Second,
 			ScrapeTimeout:   5 * time.Second,
+			MaxScrapeSize:   maxScrapeSize.N,
 			HonorLabels:     true,
 			HonorTimestamps: true,
 			DenyRedirects:   true,
@ -864,6 +880,7 @@ scrape_configs:
 			ScrapeURL:      "http://1.2.3.4:80/metrics",
 			ScrapeInterval: 8 * time.Second,
 			ScrapeTimeout:  8 * time.Second,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "1.2.3.4:80",
 				"job":      "qwer",
@ -874,6 +891,7 @@ scrape_configs:
 			ScrapeURL:      "http://foobar:80/metrics",
 			ScrapeInterval: 8 * time.Second,
 			ScrapeTimeout:  8 * time.Second,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foobar:80",
 				"job":      "asdf",
@ -921,6 +939,7 @@ scrape_configs:
 			ScrapeURL:      "http://foo.bar:1234/metrics?x=keep_me",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"hash":       "82",
 				"instance":   "foo.bar:1234",
@ -962,6 +981,7 @@ scrape_configs:
 			ScrapeURL:      "mailto://foo.bar:1234/abc.de?a=b",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "fake.addr",
 				"job":      "https",
@ -973,6 +993,7 @@ scrape_configs:
 scrape_configs:
 - job_name: foo
  scheme: https
+  max_scrape_size: 0
  relabel_configs:
  - action: keep
    source_labels: [__address__]
@ -994,6 +1015,7 @@ scrape_configs:
 			ScrapeURL:      "http://foo.bar:1234/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  0,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
 				"job":      "3",
@ -1005,6 +1027,7 @@ scrape_configs:
 	f(`
 scrape_configs:
 - job_name: foo
+  max_scrape_size: 8MiB
  metric_relabel_configs:
  - source_labels: [foo]
    target_label: abc
@ -1015,6 +1038,7 @@ scrape_configs:
 			ScrapeURL:      "http://foo.bar:1234/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  8 * 1024 * 1024,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
 				"job":      "foo",
@ -1032,6 +1056,7 @@ scrape_configs:
 			ScrapeURL:      "http://foo.bar:1234/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
 				"job":      "foo",
@ -1049,6 +1074,7 @@ scrape_configs:
 			ScrapeURL:      "http://foo.bar:1234/metrics",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
 				"job":      "foo",
@ -1080,6 +1106,7 @@ scrape_configs:
 			ScrapeURL:      "http://pp:80/metrics?a=c&a=xy",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"foo":      "bar",
 				"instance": "pp:80",
@ -1130,6 +1157,7 @@ scrape_configs:
 			ScrapeURL:      "http://127.0.0.1:9116/snmp?module=if_mib&target=192.168.1.2",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "192.168.1.2",
 				"job":      "snmp",
@ -1158,6 +1186,7 @@ scrape_configs:
 			ScrapeURL:      "http://foo.bar:1234/metricspath",
 			ScrapeInterval: defaultScrapeInterval,
 			ScrapeTimeout:  defaultScrapeTimeout,
+			MaxScrapeSize:  maxScrapeSize.N,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
 				"job":      "path wo slash",
@ -1184,6 +1213,7 @@ scrape_configs:
 			ScrapeTimeout:       time.Hour * 24,
 			ScrapeAlignInterval: time.Hour * 24,
 			ScrapeOffset:        time.Hour * 24 * 2,
+			MaxScrapeSize:       maxScrapeSize.N,
 			NoStaleMarkers:      true,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
@ -1206,6 +1236,7 @@ scrape_configs:
 			ScrapeURL:       "http://foo.bar:1234/metrics",
 			ScrapeInterval:  defaultScrapeInterval,
 			ScrapeTimeout:   defaultScrapeTimeout,
+			MaxScrapeSize:   maxScrapeSize.N,
 			jobNameOriginal: "foo",
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"instance": "foo.bar:1234",
--- a/lib/promscrape/scrapework.go
+++ b/lib/promscrape/scrapework.go
@ -53,6 +53,9 @@ type ScrapeWork struct {
 	// Timeout for scraping the ScrapeURL.
 	ScrapeTimeout time.Duration

+	// MaxScrapeSize sets max amount of data, that can be scraped by a job
+	MaxScrapeSize int64
+
 	// How to deal with conflicting labels.
 	// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
 	HonorLabels bool
@ -500,6 +503,7 @@ func (sw *scrapeWork) processDataOneShot(scrapeTimestamp, realTimestamp int64, b
 	am := &autoMetrics{
 		up:                        up,
 		scrapeDurationSeconds:     scrapeDurationSeconds,
+		scrapeResponseSize:        float64(len(bodyString)),
 		samplesScraped:            samplesScraped,
 		samplesPostRelabeling:     samplesPostRelabeling,
 		seriesAdded:               seriesAdded,
@ -519,7 +523,7 @@ func (sw *scrapeWork) processDataOneShot(scrapeTimestamp, realTimestamp int64, b
 		sw.storeLastScrape(body)
 	}
 	sw.finalizeLastScrape()
-	tsmGlobal.Update(sw, up == 1, realTimestamp, int64(scrapeDurationSeconds*1000), samplesScraped, err)
+	tsmGlobal.Update(sw, up == 1, realTimestamp, int64(scrapeDurationSeconds*1000), float64(len(bodyString)), samplesScraped, err)
 	return err
 }

@ -580,6 +584,7 @@ func (sw *scrapeWork) processDataInStreamMode(scrapeTimestamp, realTimestamp int
 	am := &autoMetrics{
 		up:                        up,
 		scrapeDurationSeconds:     scrapeDurationSeconds,
+		scrapeResponseSize:        float64(len(bodyString)),
 		samplesScraped:            samplesScraped,
 		samplesPostRelabeling:     samplesPostRelabeling,
 		seriesAdded:               seriesAdded,
@ -598,7 +603,7 @@ func (sw *scrapeWork) processDataInStreamMode(scrapeTimestamp, realTimestamp int
 		sw.storeLastScrape(body.B)
 	}
 	sw.finalizeLastScrape()
-	tsmGlobal.Update(sw, up == 1, realTimestamp, int64(scrapeDurationSeconds*1000), samplesScraped, err)
+	tsmGlobal.Update(sw, up == 1, realTimestamp, int64(scrapeDurationSeconds*1000), float64(len(bodyString)), samplesScraped, err)
 	// Do not track active series in streaming mode, since this may need too big amounts of memory
 	// when the target exports too big number of metrics.
 	return err
@ -812,6 +817,7 @@ func (sw *scrapeWork) getLabelsHash(labels []prompbmarshal.Label) uint64 {
 type autoMetrics struct {
 	up                        int
 	scrapeDurationSeconds     float64
+	scrapeResponseSize        float64
 	samplesScraped            int
 	samplesPostRelabeling     int
 	seriesAdded               int
@ -824,7 +830,7 @@ func isAutoMetric(s string) bool {
 		"scrape_samples_post_metric_relabeling", "scrape_series_added",
 		"scrape_timeout_seconds", "scrape_samples_limit",
 		"scrape_series_limit_samples_dropped", "scrape_series_limit",
-		"scrape_series_current":
+		"scrape_series_current", "scrape_response_size_bytes":
 		return true
 	}
 	return false
@ -833,6 +839,7 @@ func isAutoMetric(s string) bool {
 func (sw *scrapeWork) addAutoMetrics(am *autoMetrics, wc *writeRequestCtx, timestamp int64) {
 	sw.addAutoTimeseries(wc, "up", float64(am.up), timestamp)
 	sw.addAutoTimeseries(wc, "scrape_duration_seconds", am.scrapeDurationSeconds, timestamp)
+	sw.addAutoTimeseries(wc, "scrape_response_size_bytes", am.scrapeResponseSize, timestamp)
 	sw.addAutoTimeseries(wc, "scrape_samples_scraped", float64(am.samplesScraped), timestamp)
 	sw.addAutoTimeseries(wc, "scrape_samples_post_metric_relabeling", float64(am.samplesPostRelabeling), timestamp)
 	sw.addAutoTimeseries(wc, "scrape_series_added", float64(am.seriesAdded), timestamp)
--- a/lib/promscrape/scrapework_test.go
+++ b/lib/promscrape/scrapework_test.go
@ -77,6 +77,7 @@ func TestScrapeWorkScrapeInternalFailure(t *testing.T) {
 	dataExpected := `
 		up 0 123
 		scrape_samples_scraped 0 123
+		scrape_response_size_bytes 0 123
 		scrape_duration_seconds 0 123
 		scrape_samples_post_metric_relabeling 0 123
 		scrape_series_added 0 123
@ -181,6 +182,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 	}, `
 		up 1 123
 		scrape_samples_scraped 0 123
+		scrape_response_size_bytes 0 123
 		scrape_duration_seconds 0 123
 		scrape_samples_post_metric_relabeling 0 123
 		scrape_series_added 0 123
@ -196,6 +198,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		abc -2 123
 		up 1 123
 		scrape_samples_scraped 2 123
+		scrape_response_size_bytes 51 123
 		scrape_duration_seconds 0 123
 		scrape_samples_post_metric_relabeling 2 123
 		scrape_series_added 2 123
@ -215,6 +218,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		abc{foo="x"} -2 123
 		up{foo="x"} 1 123
 		scrape_samples_scraped{foo="x"} 2 123
+		scrape_response_size_bytes{foo="x"} 36 123
 		scrape_duration_seconds{foo="x"} 0 123
 		scrape_samples_post_metric_relabeling{foo="x"} 2 123
 		scrape_series_added{foo="x"} 2 123
@ -234,6 +238,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		bar{exported_job="aa",job="override",x="1",a="b",y="2"} -3e4 123
 		up{job="override"} 1 123
 		scrape_samples_scraped{job="override"} 2 123
+		scrape_response_size_bytes{job="override"} 80 123
 		scrape_duration_seconds{job="override"} 0 123
 		scrape_samples_post_metric_relabeling{job="override"} 2 123
 		scrape_series_added{job="override"} 2 123
@ -255,6 +260,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		test_with_instance{instance="some_instance",job="some_job",label="val2"} 1555 123
 		up{instance="foobar",job="xxx"} 1 123
 		scrape_samples_scraped{instance="foobar",job="xxx"} 2 123
+		scrape_response_size_bytes{instance="foobar",job="xxx"} 158 123
 		scrape_duration_seconds{instance="foobar",job="xxx"} 0 123
 		scrape_samples_post_metric_relabeling{instance="foobar",job="xxx"} 2 123
 		scrape_series_added{instance="foobar",job="xxx"} 2 123
@ -275,6 +281,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		test_with_instance{exported_instance="some_instance",exported_job="some_job",instance="foobar",job="xxx",label="val2"} 1555 123
 		up{instance="foobar",job="xxx"} 1 123
 		scrape_samples_scraped{instance="foobar",job="xxx"} 2 123
+		scrape_response_size_bytes{instance="foobar",job="xxx"} 158 123
 		scrape_duration_seconds{instance="foobar",job="xxx"} 0 123
 		scrape_samples_post_metric_relabeling{instance="foobar",job="xxx"} 2 123
 		scrape_series_added{instance="foobar",job="xxx"} 2 123
@ -294,6 +301,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		bar{job="aa",a="b"} -3e4 123
 		up{job="override"} 1 123
 		scrape_samples_scraped{job="override"} 2 123
+		scrape_response_size_bytes{job="override"} 68 123
 		scrape_duration_seconds{job="override"} 0 123
 		scrape_samples_post_metric_relabeling{job="override"} 2 123
 		scrape_series_added{job="override"} 2 123
@ -322,6 +330,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		bar{a="b",job="xx",instance="foo.com/xx"} -3e4 123
 		up{job="xx"} 1 123
 		scrape_samples_scraped{job="xx"} 2 123
+		scrape_response_size_bytes{job="xx"} 49 123
 		scrape_duration_seconds{job="xx"} 0 123
 		scrape_samples_post_metric_relabeling{job="xx"} 2 123
 		scrape_series_added{job="xx"} 2 123
@ -352,6 +361,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		foo{bar="baz",job="xx",instance="foo.com"} 34.44 123
 		up{job="xx",instance="foo.com"} 1 123
 		scrape_samples_scraped{job="xx",instance="foo.com"} 4 123
+		scrape_response_size_bytes{job="xx",instance="foo.com"} 106 123
 		scrape_duration_seconds{job="xx",instance="foo.com"} 0 123
 		scrape_samples_post_metric_relabeling{job="xx",instance="foo.com"} 1 123
 		scrape_series_added{job="xx",instance="foo.com"} 4 123
@ -371,6 +381,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		exported_scrape_series_added 3.435 123
 		up 1 123
 		scrape_duration_seconds 0 123
+		scrape_response_size_bytes 76 123
 		scrape_samples_scraped 3 123
 		scrape_samples_post_metric_relabeling 3 123
 		scrape_timeout_seconds 42 123
@ -389,6 +400,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		scrape_series_added 3.435 123
 		up 1 123
 		scrape_samples_scraped 3 123
+		scrape_response_size_bytes 76 123
 		scrape_duration_seconds 0 123
 		scrape_samples_post_metric_relabeling 3 123
 		scrape_series_added 3 123
@ -406,6 +418,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		bar{a="b",c="d"} -3e4 123
 		up 1 123
 		scrape_samples_limit 2 123
+		scrape_response_size_bytes 49 123
 		scrape_samples_scraped 2 123
 		scrape_duration_seconds 0 123
 		scrape_samples_post_metric_relabeling 2 123
@ -424,6 +437,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 	}, `
 		up 0 123
 		scrape_samples_scraped 2 123
+		scrape_response_size_bytes 0 123
 		scrape_duration_seconds 0 123
 		scrape_samples_post_metric_relabeling 2 123
 		scrape_samples_limit 1 123
@ -445,6 +459,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		bar{a="b",c="d"} -3e4 123
 		up 1 123
 		scrape_samples_scraped 2 123
+		scrape_response_size_bytes 49 123
 		scrape_duration_seconds 0 123
 		scrape_samples_post_metric_relabeling 2 123
 		scrape_series_added 2 123
@ -464,6 +479,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
 		foo{bar="baz"} 34.44 123
 		up 1 123
 		scrape_samples_scraped 2 123
+		scrape_response_size_bytes 49 123
 		scrape_duration_seconds 0 123
 		scrape_samples_post_metric_relabeling 2 123
 		scrape_series_added 2 123
--- a/lib/promscrape/targetstatus.go
+++ b/lib/promscrape/targetstatus.go
@ -178,7 +178,7 @@ func (tsm *targetStatusMap) Unregister(sw *scrapeWork) {
 	tsm.mu.Unlock()
 }

-func (tsm *targetStatusMap) Update(sw *scrapeWork, up bool, scrapeTime, scrapeDuration int64, samplesScraped int, err error) {
+func (tsm *targetStatusMap) Update(sw *scrapeWork, up bool, scrapeTime, scrapeDuration int64, scrapeResponseSize float64, samplesScraped int, err error) {
 	jobName := sw.Config.jobNameOriginal

 	tsm.mu.Lock()
@ -197,6 +197,7 @@ func (tsm *targetStatusMap) Update(sw *scrapeWork, up bool, scrapeTime, scrapeDu
 	ts.scrapeTime = scrapeTime
 	ts.scrapeDuration = scrapeDuration
 	ts.samplesScraped = samplesScraped
+	ts.scrapeResponseSize = scrapeResponseSize
 	ts.scrapesTotal++
 	if !up {
 		ts.scrapesFailed++
@ -299,6 +300,7 @@ type targetStatus struct {
 	up                 bool
 	scrapeTime         int64
 	scrapeDuration     int64
+	scrapeResponseSize float64
 	samplesScraped     int
 	scrapesTotal       int
 	scrapesFailed      int
@ -313,6 +315,13 @@ func (ts *targetStatus) getDurationFromLastScrape() string {
 	return fmt.Sprintf("%.3fs ago", d.Seconds())
 }

+func (ts *targetStatus) getSizeFromLastScrape() string {
+	if ts.scrapeResponseSize <= 0 {
+		return "never scraped"
+	}
+	return fmt.Sprintf("%.3f kb", float64(ts.scrapeResponseSize)/1024)
+}
+
 type droppedTargets struct {
 	mu sync.Mutex
 	m  map[uint64]droppedTarget
--- a/lib/promscrape/targetstatus.qtpl
+++ b/lib/promscrape/targetstatus.qtpl
@ -29,6 +29,7 @@
 		scrapes_failed={%d ts.scrapesFailed %},{% space %}
 		last_scrape={%s ts.getDurationFromLastScrape() %},{% space %}
 		scrape_duration={%d int(ts.scrapeDuration) %}ms,{% space %}
+                scrape_response_size={%s ts.getSizeFromLastScrape() %},{% space %}
 		samples_scraped={%d ts.samplesScraped %},{% space %}
 		error={% if ts.err != nil %}{%s= ts.err.Error() %}{% endif %}
 		{% newline %}
@ -217,6 +218,7 @@
                            <th scope="col" title="total scrape errors">Errors</th>
                            <th scope="col" title="the time of the last scrape">Last Scrape</th>
                            <th scope="col" title="the duration of the last scrape">Duration</th>
+                            <th scope="col" title="the size of the last scrape">Last Scrape Size</th>
                            <th scope="col" title="the number of metrics scraped during the last scrape">Samples</th>
                            <th scope="col" title="error from the last scrape (if any)">Last error</th>
                        </tr>
@ -269,8 +271,9 @@
                            {% endif %}
                            <td>{%d ts.scrapesTotal %}</td>
                            <td>{%d ts.scrapesFailed %}</td>
-                            <td>{%s ts.getDurationFromLastScrape() %}
+                            <td>{%s ts.getDurationFromLastScrape() %}</td>
                            <td>{%d int(ts.scrapeDuration) %}ms</td>
+                            <td>{%s ts.getSizeFromLastScrape() %}</td>
                            <td>{%d ts.samplesScraped %}</td>
                            <td>{% if ts.err != nil %}{%s ts.err.Error() %}{% endif %}</td>
                        </tr>
--- a/lib/promscrape/targetstatus.qtpl.go
+++ b/lib/promscrape/targetstatus.qtpl.go