2020-02-23 11:35:47 +00:00
package promscrape
import (
2020-11-01 21:12:13 +00:00
"context"
2020-02-23 11:35:47 +00:00
"crypto/tls"
"flag"
"fmt"
2020-11-01 21:12:13 +00:00
"io"
"net/http"
2020-12-24 08:56:10 +00:00
"net/url"
2020-02-23 11:35:47 +00:00
"strings"
"time"
2023-06-05 13:56:49 +00:00
"golang.org/x/net/http2"
2020-11-26 16:08:39 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
2020-08-16 14:05:52 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
2020-11-01 21:12:13 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2023-02-24 20:11:44 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
2021-04-03 21:40:08 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/proxy"
2020-04-29 13:20:23 +00:00
"github.com/VictoriaMetrics/fasthttp"
2020-02-23 11:35:47 +00:00
"github.com/VictoriaMetrics/metrics"
)
var (
2020-08-16 14:05:52 +00:00
maxScrapeSize = flagutil . NewBytes ( "promscrape.maxScrapeSize" , 16 * 1024 * 1024 , "The maximum size of scrape response in bytes to process from Prometheus targets. " +
2020-02-23 11:35:47 +00:00
"Bigger responses are rejected" )
2021-11-03 20:26:56 +00:00
maxResponseHeadersSize = flagutil . NewBytes ( "promscrape.maxResponseHeadersSize" , 4096 , "The maximum size of http response headers from Prometheus scrape targets" )
disableCompression = flag . Bool ( "promscrape.disableCompression" , false , "Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. " +
2020-07-02 11:19:11 +00:00
"This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. " +
2023-05-10 07:50:41 +00:00
"It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control" )
2020-07-02 11:19:11 +00:00
disableKeepAlive = flag . Bool ( "promscrape.disableKeepAlive" , false , "Whether to disable HTTP keep-alive connections when scraping all the targets. " +
"This may be useful when targets has no support for HTTP keep-alive connection. " +
2023-05-10 07:50:41 +00:00
"It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control. " +
2020-07-02 11:19:11 +00:00
"Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets" )
2020-11-01 21:12:13 +00:00
streamParse = flag . Bool ( "promscrape.streamParse" , false , "Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful " +
"for reducing memory usage when millions of metrics are exposed per each scrape target. " +
2023-05-10 07:50:41 +00:00
"It is possible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control" )
2020-02-23 11:35:47 +00:00
)
type client struct {
2020-11-01 21:12:13 +00:00
// hc is the default client optimized for common case of scraping targets with moderate number of metrics.
2020-02-23 11:35:47 +00:00
hc * fasthttp . HostClient
2023-02-13 18:51:55 +00:00
// sc (aka `stream client`) is used instead of hc if ScrapeWork.StreamParse is set.
2020-11-01 21:12:13 +00:00
// It may be useful for scraping targets with millions of metrics per target.
sc * http . Client
2023-02-09 19:13:06 +00:00
ctx context . Context
2021-04-05 09:15:07 +00:00
scrapeURL string
scrapeTimeoutSecondsStr string
2022-07-06 23:25:31 +00:00
hostPort string
2021-04-05 09:15:07 +00:00
requestURI string
2022-06-22 17:38:43 +00:00
setHeaders func ( req * http . Request )
setProxyHeaders func ( req * http . Request )
setFasthttpHeaders func ( req * fasthttp . Request )
setFasthttpProxyHeaders func ( req * fasthttp . Request )
2021-04-05 09:15:07 +00:00
denyRedirects bool
disableCompression bool
disableKeepAlive bool
2020-02-23 11:35:47 +00:00
}
2022-07-06 23:25:31 +00:00
func addMissingPort ( addr string , isTLS bool ) string {
if strings . Contains ( addr , ":" ) {
return addr
}
if isTLS {
2022-11-30 05:22:12 +00:00
return concatTwoStrings ( addr , ":443" )
2022-07-06 23:25:31 +00:00
}
2022-11-30 05:22:12 +00:00
return concatTwoStrings ( addr , ":80" )
}
func concatTwoStrings ( x , y string ) string {
bb := bbPool . Get ( )
b := bb . B [ : 0 ]
b = append ( b , x ... )
b = append ( b , y ... )
2023-01-04 06:14:20 +00:00
s := bytesutil . InternBytes ( b )
2022-11-30 05:22:12 +00:00
bb . B = b
bbPool . Put ( bb )
return s
2022-07-06 23:25:31 +00:00
}
2023-02-26 20:18:59 +00:00
func newClient ( ctx context . Context , sw * ScrapeWork ) * client {
2020-02-23 11:35:47 +00:00
var u fasthttp . URI
u . Update ( sw . ScrapeURL )
2022-07-06 23:25:31 +00:00
hostPort := string ( u . Host ( ) )
dialAddr := hostPort
2020-02-23 11:35:47 +00:00
requestURI := string ( u . RequestURI ( ) )
isTLS := string ( u . Scheme ( ) ) == "https"
var tlsCfg * tls . Config
2021-03-09 16:54:09 +00:00
if isTLS {
2020-04-13 09:59:05 +00:00
tlsCfg = sw . AuthConfig . NewTLSConfig ( )
2020-02-23 11:35:47 +00:00
}
2022-06-22 17:38:43 +00:00
setProxyHeaders := func ( req * http . Request ) { }
setFasthttpProxyHeaders := func ( req * fasthttp . Request ) { }
2021-04-03 21:40:08 +00:00
proxyURL := sw . ProxyURL
if ! isTLS && proxyURL . IsHTTPOrHTTPS ( ) {
// Send full sw.ScrapeURL in requests to a proxy host for non-TLS scrape targets
// like net/http package from Go does.
// See https://en.wikipedia.org/wiki/Proxy_server#Web_proxy_servers
2022-05-06 21:02:54 +00:00
pu := proxyURL . GetURL ( )
2022-07-06 23:25:31 +00:00
dialAddr = pu . Host
2021-04-03 21:40:08 +00:00
requestURI = sw . ScrapeURL
isTLS = pu . Scheme == "https"
if isTLS {
tlsCfg = sw . ProxyAuthConfig . NewTLSConfig ( )
}
2021-10-26 18:21:08 +00:00
proxyURLOrig := proxyURL
2022-06-22 17:38:43 +00:00
setProxyHeaders = func ( req * http . Request ) {
proxyURLOrig . SetHeaders ( sw . ProxyAuthConfig , req )
}
setFasthttpProxyHeaders = func ( req * fasthttp . Request ) {
proxyURLOrig . SetFasthttpHeaders ( sw . ProxyAuthConfig , req )
2021-05-14 17:00:05 +00:00
}
2021-10-26 18:21:08 +00:00
proxyURL = & proxy . URL { }
2021-04-03 21:40:08 +00:00
}
2022-07-06 23:25:31 +00:00
hostPort = addMissingPort ( hostPort , isTLS )
dialAddr = addMissingPort ( dialAddr , isTLS )
2021-04-03 21:40:08 +00:00
dialFunc , err := newStatDialFunc ( proxyURL , sw . ProxyAuthConfig )
2020-12-24 08:56:10 +00:00
if err != nil {
logger . Fatalf ( "cannot create dial func: %s" , err )
}
2020-02-23 11:35:47 +00:00
hc := & fasthttp . HostClient {
2022-07-06 23:25:31 +00:00
Addr : dialAddr ,
2020-04-29 13:20:23 +00:00
Name : "vm_promscrape" ,
2020-12-24 08:56:10 +00:00
Dial : dialFunc ,
2020-04-29 13:20:23 +00:00
IsTLS : isTLS ,
TLSConfig : tlsCfg ,
MaxIdleConnDuration : 2 * sw . ScrapeInterval ,
ReadTimeout : sw . ScrapeTimeout ,
WriteTimeout : 10 * time . Second ,
2022-12-15 03:26:24 +00:00
MaxResponseBodySize : maxScrapeSize . IntN ( ) ,
2020-04-29 13:20:23 +00:00
MaxIdempotentRequestAttempts : 1 ,
2022-12-15 03:26:24 +00:00
ReadBufferSize : maxResponseHeadersSize . IntN ( ) ,
2020-02-23 11:35:47 +00:00
}
2020-11-01 21:12:13 +00:00
var sc * http . Client
2021-10-16 10:18:20 +00:00
var proxyURLFunc func ( * http . Request ) ( * url . URL , error )
2022-05-06 21:02:54 +00:00
if pu := sw . ProxyURL . GetURL ( ) ; pu != nil {
2021-10-26 18:21:08 +00:00
proxyURLFunc = http . ProxyURL ( pu )
2021-10-16 10:18:20 +00:00
}
sc = & http . Client {
Transport : & http . Transport {
2021-11-03 20:26:56 +00:00
TLSClientConfig : tlsCfg ,
Proxy : proxyURLFunc ,
TLSHandshakeTimeout : 10 * time . Second ,
IdleConnTimeout : 2 * sw . ScrapeInterval ,
DisableCompression : * disableCompression || sw . DisableCompression ,
DisableKeepAlives : * disableKeepAlive || sw . DisableKeepAlive ,
DialContext : statStdDial ,
MaxIdleConnsPerHost : 100 ,
MaxResponseHeaderBytes : int64 ( maxResponseHeadersSize . N ) ,
2021-04-23 18:53:32 +00:00
2021-10-16 10:18:20 +00:00
// Set timeout for receiving the first response byte,
// since the duration for reading the full response can be much bigger because of stream parsing.
2021-04-23 18:53:32 +00:00
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1017#issuecomment-767235047
2021-10-16 10:18:20 +00:00
ResponseHeaderTimeout : sw . ScrapeTimeout ,
} ,
// Set 30x bigger timeout than the sw.ScrapeTimeout, since the duration for reading the full response
// can be much bigger because of stream parsing.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1017#issuecomment-767235047
Timeout : 30 * sw . ScrapeTimeout ,
}
2023-06-05 13:56:49 +00:00
if sw . EnableHTTP2 {
_ , err := http2 . ConfigureTransports ( sc . Transport . ( * http . Transport ) )
if err != nil {
logger . Errorf ( "failed to configure HTTP/2 transport: %s" , err )
}
}
2021-10-16 10:18:20 +00:00
if sw . DenyRedirects {
sc . CheckRedirect = func ( req * http . Request , via [ ] * http . Request ) error {
return http . ErrUseLastResponse
2021-04-02 16:56:38 +00:00
}
2020-11-01 21:12:13 +00:00
}
2023-06-05 14:31:58 +00:00
2020-02-23 11:35:47 +00:00
return & client {
2021-04-05 09:15:07 +00:00
hc : hc ,
2023-02-09 19:13:06 +00:00
ctx : ctx ,
2021-04-05 09:15:07 +00:00
sc : sc ,
scrapeURL : sw . ScrapeURL ,
scrapeTimeoutSecondsStr : fmt . Sprintf ( "%.3f" , sw . ScrapeTimeout . Seconds ( ) ) ,
2022-07-06 23:25:31 +00:00
hostPort : hostPort ,
2021-04-05 09:15:07 +00:00
requestURI : requestURI ,
2022-06-22 17:38:43 +00:00
setHeaders : func ( req * http . Request ) { sw . AuthConfig . SetHeaders ( req , true ) } ,
setProxyHeaders : setProxyHeaders ,
setFasthttpHeaders : func ( req * fasthttp . Request ) { sw . AuthConfig . SetFasthttpHeaders ( req , true ) } ,
setFasthttpProxyHeaders : setFasthttpProxyHeaders ,
2021-04-05 09:15:07 +00:00
denyRedirects : sw . DenyRedirects ,
disableCompression : sw . DisableCompression ,
disableKeepAlive : sw . DisableKeepAlive ,
2020-02-23 11:35:47 +00:00
}
}
2020-11-01 21:12:13 +00:00
func ( c * client ) GetStreamReader ( ) ( * streamReader , error ) {
2021-09-12 12:20:42 +00:00
deadline := time . Now ( ) . Add ( c . sc . Timeout )
2023-02-09 19:13:06 +00:00
ctx , cancel := context . WithDeadline ( c . ctx , deadline )
2023-02-23 02:58:44 +00:00
req , err := http . NewRequestWithContext ( ctx , http . MethodGet , c . scrapeURL , nil )
2020-11-01 21:12:13 +00:00
if err != nil {
cancel ( )
return nil , fmt . Errorf ( "cannot create request for %q: %w" , c . scrapeURL , err )
}
// The following `Accept` header has been copied from Prometheus sources.
// See https://github.com/prometheus/prometheus/blob/f9d21f10ecd2a343a381044f131ea4e46381ce09/scrape/scrape.go#L532 .
// This is needed as a workaround for scraping stupid Java-based servers such as Spring Boot.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/608 for details.
// Do not bloat the `Accept` header with OpenMetrics shit, since it looks like dead standard now.
req . Header . Set ( "Accept" , "text/plain;version=0.0.4;q=1,*/*;q=0.1" )
2021-04-05 09:15:07 +00:00
// Set X-Prometheus-Scrape-Timeout-Seconds like Prometheus does, since it is used by some exporters such as PushProx.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1179#issuecomment-813117162
req . Header . Set ( "X-Prometheus-Scrape-Timeout-Seconds" , c . scrapeTimeoutSecondsStr )
2022-06-22 17:38:43 +00:00
c . setHeaders ( req )
c . setProxyHeaders ( req )
2022-08-16 11:52:38 +00:00
scrapeRequests . Inc ( )
2020-11-01 21:12:13 +00:00
resp , err := c . sc . Do ( req )
if err != nil {
cancel ( )
return nil , fmt . Errorf ( "cannot scrape %q: %w" , c . scrapeURL , err )
}
if resp . StatusCode != http . StatusOK {
metrics . GetOrCreateCounter ( fmt . Sprintf ( ` vm_promscrape_scrapes_total { status_code="%d"} ` , resp . StatusCode ) ) . Inc ( )
2022-08-21 21:13:44 +00:00
respBody , _ := io . ReadAll ( resp . Body )
2020-11-01 21:12:13 +00:00
_ = resp . Body . Close ( )
cancel ( )
return nil , fmt . Errorf ( "unexpected status code returned when scraping %q: %d; expecting %d; response body: %q" ,
c . scrapeURL , resp . StatusCode , http . StatusOK , respBody )
}
scrapesOK . Inc ( )
return & streamReader {
2021-05-27 11:52:44 +00:00
r : resp . Body ,
cancel : cancel ,
scrapeURL : c . scrapeURL ,
maxBodySize : int64 ( c . hc . MaxResponseBodySize ) ,
2020-11-01 21:12:13 +00:00
} , nil
}
2022-05-03 10:31:31 +00:00
// checks fasthttp status code for redirect as standard http/client does.
func isStatusRedirect ( statusCode int ) bool {
switch statusCode {
case 301 , 302 , 303 , 307 , 308 :
return true
}
return false
}
2020-02-23 11:35:47 +00:00
func ( c * client ) ReadData ( dst [ ] byte ) ( [ ] byte , error ) {
2020-07-03 18:33:17 +00:00
deadline := time . Now ( ) . Add ( c . hc . ReadTimeout )
2020-02-23 11:35:47 +00:00
req := fasthttp . AcquireRequest ( )
req . SetRequestURI ( c . requestURI )
2022-07-06 23:25:31 +00:00
req . Header . SetHost ( c . hostPort )
2020-07-08 16:47:08 +00:00
// The following `Accept` header has been copied from Prometheus sources.
// See https://github.com/prometheus/prometheus/blob/f9d21f10ecd2a343a381044f131ea4e46381ce09/scrape/scrape.go#L532 .
// This is needed as a workaround for scraping stupid Java-based servers such as Spring Boot.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/608 for details.
// Do not bloat the `Accept` header with OpenMetrics shit, since it looks like dead standard now.
req . Header . Set ( "Accept" , "text/plain;version=0.0.4;q=1,*/*;q=0.1" )
2021-04-05 09:15:07 +00:00
// Set X-Prometheus-Scrape-Timeout-Seconds like Prometheus does, since it is used by some exporters such as PushProx.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1179#issuecomment-813117162
req . Header . Set ( "X-Prometheus-Scrape-Timeout-Seconds" , c . scrapeTimeoutSecondsStr )
2022-06-22 17:38:43 +00:00
c . setFasthttpHeaders ( req )
c . setFasthttpProxyHeaders ( req )
2020-11-01 21:12:13 +00:00
if ! * disableCompression && ! c . disableCompression {
2020-02-23 11:35:47 +00:00
req . Header . Set ( "Accept-Encoding" , "gzip" )
}
2020-07-02 11:19:11 +00:00
if * disableKeepAlive || c . disableKeepAlive {
2020-06-30 23:19:58 +00:00
req . SetConnectionClose ( )
}
2020-02-23 11:35:47 +00:00
resp := fasthttp . AcquireResponse ( )
2020-09-18 09:31:16 +00:00
swapResponseBodies := len ( dst ) == 0
if swapResponseBodies {
// An optimization: write response directly to dst.
2023-03-03 10:02:13 +00:00
// This should reduce memory usage when scraping big targets.
2020-09-18 09:31:16 +00:00
dst = resp . SwapBody ( dst )
}
2023-02-22 16:05:16 +00:00
2023-02-24 20:11:44 +00:00
ctx , cancel := context . WithDeadline ( c . ctx , deadline )
defer cancel ( )
err := doRequestWithPossibleRetry ( ctx , c . hc , req , resp )
2020-04-27 23:13:02 +00:00
statusCode := resp . StatusCode ( )
2021-12-15 22:13:34 +00:00
redirectsCount := 0
2022-05-03 10:31:31 +00:00
for err == nil && isStatusRedirect ( statusCode ) {
2021-12-15 22:13:34 +00:00
if redirectsCount > 5 {
err = fmt . Errorf ( "too many redirects" )
break
}
2021-04-02 16:56:38 +00:00
if c . denyRedirects {
err = fmt . Errorf ( "cannot follow redirects if `follow_redirects: false` is set" )
2021-12-15 22:13:34 +00:00
break
}
// It is expected that the redirect is made on the same host.
// Otherwise it won't work.
location := resp . Header . Peek ( "Location" )
if len ( location ) == 0 {
err = fmt . Errorf ( "missing Location header" )
break
2020-04-27 23:13:02 +00:00
}
2021-12-15 22:13:34 +00:00
req . URI ( ) . UpdateBytes ( location )
2023-02-24 20:11:44 +00:00
err = doRequestWithPossibleRetry ( ctx , c . hc , req , resp )
2021-12-15 22:13:34 +00:00
statusCode = resp . StatusCode ( )
redirectsCount ++
2020-04-27 23:13:02 +00:00
}
2020-11-01 23:09:37 +00:00
if swapResponseBodies {
dst = resp . SwapBody ( dst )
}
2020-02-23 11:35:47 +00:00
fasthttp . ReleaseRequest ( req )
if err != nil {
fasthttp . ReleaseResponse ( resp )
if err == fasthttp . ErrTimeout {
scrapesTimedout . Inc ( )
2020-06-30 19:58:18 +00:00
return dst , fmt . Errorf ( "error when scraping %q with timeout %s: %w" , c . scrapeURL , c . hc . ReadTimeout , err )
2020-02-23 11:35:47 +00:00
}
2020-05-24 11:41:08 +00:00
if err == fasthttp . ErrBodyTooLarge {
2021-09-23 11:47:20 +00:00
maxScrapeSizeExceeded . Inc ( )
2020-05-24 11:41:08 +00:00
return dst , fmt . Errorf ( "the response from %q exceeds -promscrape.maxScrapeSize=%d; " +
2020-08-16 14:05:52 +00:00
"either reduce the response size for the target or increase -promscrape.maxScrapeSize" , c . scrapeURL , maxScrapeSize . N )
2020-05-24 11:41:08 +00:00
}
2020-06-30 19:58:18 +00:00
return dst , fmt . Errorf ( "error when scraping %q: %w" , c . scrapeURL , err )
2020-02-23 11:35:47 +00:00
}
if ce := resp . Header . Peek ( "Content-Encoding" ) ; string ( ce ) == "gzip" {
var err error
2020-09-18 09:31:16 +00:00
if swapResponseBodies {
2020-11-26 16:08:39 +00:00
zb := gunzipBufPool . Get ( )
zb . B , err = fasthttp . AppendGunzipBytes ( zb . B [ : 0 ] , dst )
dst = append ( dst [ : 0 ] , zb . B ... )
gunzipBufPool . Put ( zb )
2020-09-18 09:31:16 +00:00
} else {
2020-11-26 16:08:39 +00:00
dst , err = fasthttp . AppendGunzipBytes ( dst , resp . Body ( ) )
2020-09-18 09:31:16 +00:00
}
2020-02-23 11:35:47 +00:00
if err != nil {
fasthttp . ReleaseResponse ( resp )
scrapesGunzipFailed . Inc ( )
2020-06-30 19:58:18 +00:00
return dst , fmt . Errorf ( "cannot ungzip response from %q: %w" , c . scrapeURL , err )
2020-02-23 11:35:47 +00:00
}
scrapesGunzipped . Inc ( )
2020-09-18 09:31:16 +00:00
} else if ! swapResponseBodies {
2020-02-23 11:35:47 +00:00
dst = append ( dst , resp . Body ( ) ... )
}
2022-12-28 22:42:07 +00:00
fasthttp . ReleaseResponse ( resp )
2022-12-28 20:19:41 +00:00
if len ( dst ) > c . hc . MaxResponseBodySize {
maxScrapeSizeExceeded . Inc ( )
2022-12-28 22:42:07 +00:00
return dst , fmt . Errorf ( "the response from %q exceeds -promscrape.maxScrapeSize=%d (the actual response size is %d bytes); " +
2023-01-24 05:04:50 +00:00
"either reduce the response size for the target or increase -promscrape.maxScrapeSize" , c . scrapeURL , maxScrapeSize . N , len ( dst ) )
2022-12-28 20:19:41 +00:00
}
2020-02-23 11:35:47 +00:00
if statusCode != fasthttp . StatusOK {
metrics . GetOrCreateCounter ( fmt . Sprintf ( ` vm_promscrape_scrapes_total { status_code="%d"} ` , statusCode ) ) . Inc ( )
return dst , fmt . Errorf ( "unexpected status code returned when scraping %q: %d; expecting %d; response body: %q" ,
2020-10-29 14:17:52 +00:00
c . scrapeURL , statusCode , fasthttp . StatusOK , dst )
2020-02-23 11:35:47 +00:00
}
scrapesOK . Inc ( )
return dst , nil
}
2020-11-26 16:08:39 +00:00
var gunzipBufPool bytesutil . ByteBufferPool
2020-02-23 11:35:47 +00:00
var (
2021-09-23 11:47:20 +00:00
maxScrapeSizeExceeded = metrics . NewCounter ( ` vm_promscrape_max_scrape_size_exceeded_errors_total ` )
scrapesTimedout = metrics . NewCounter ( ` vm_promscrape_scrapes_timed_out_total ` )
scrapesOK = metrics . NewCounter ( ` vm_promscrape_scrapes_total { status_code="200"} ` )
scrapesGunzipped = metrics . NewCounter ( ` vm_promscrape_scrapes_gunziped_total ` )
scrapesGunzipFailed = metrics . NewCounter ( ` vm_promscrape_scrapes_gunzip_failed_total ` )
2022-08-16 11:52:38 +00:00
scrapeRequests = metrics . NewCounter ( ` vm_promscrape_scrape_requests_total ` )
2021-09-23 11:47:20 +00:00
scrapeRetries = metrics . NewCounter ( ` vm_promscrape_scrape_retries_total ` )
2020-02-23 11:35:47 +00:00
)
2020-04-16 20:24:33 +00:00
2023-02-24 20:11:44 +00:00
func doRequestWithPossibleRetry ( ctx context . Context , hc * fasthttp . HostClient , req * fasthttp . Request , resp * fasthttp . Response ) error {
2023-01-06 03:34:47 +00:00
scrapeRequests . Inc ( )
2023-02-24 19:39:56 +00:00
var reqErr error
// Return true if the request execution is completed and retry is not required
attempt := func ( ) bool {
2023-02-22 16:05:16 +00:00
// Use DoCtx instead of Do in order to support context cancellation
2023-02-24 20:11:44 +00:00
reqErr = hc . DoCtx ( ctx , req , resp )
2023-02-24 19:39:56 +00:00
if reqErr == nil {
2023-01-06 03:34:47 +00:00
statusCode := resp . StatusCode ( )
if statusCode != fasthttp . StatusTooManyRequests {
2023-02-24 19:39:56 +00:00
return true
2023-01-06 03:34:47 +00:00
}
2023-02-24 19:39:56 +00:00
} else if reqErr != fasthttp . ErrConnectionClosed && ! strings . Contains ( reqErr . Error ( ) , "broken pipe" ) {
return true
}
return false
}
if attempt ( ) {
return reqErr
}
2023-02-24 20:11:44 +00:00
// The first attempt was unsuccessful. Use exponential backoff for further attempts.
// Perform the second attempt immediately after the first attempt - this should help
// in cases when the remote side closes the keep-alive connection before the first attempt.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3293
2023-02-24 19:39:56 +00:00
sleepTime := time . Second
2023-02-24 20:11:44 +00:00
// It is expected that the deadline is already set to ctx, so the loop below
// should eventually finish if all the attempt() calls are unsuccessful.
2023-02-24 19:39:56 +00:00
for {
scrapeRetries . Inc ( )
if attempt ( ) {
return reqErr
2023-01-06 03:34:47 +00:00
}
sleepTime += sleepTime
2023-02-24 20:11:44 +00:00
if ! discoveryutils . SleepCtx ( ctx , sleepTime ) {
return reqErr
2023-01-06 03:34:47 +00:00
}
}
2020-04-16 20:24:33 +00:00
}
2020-11-01 21:12:13 +00:00
type streamReader struct {
2021-05-27 11:52:44 +00:00
r io . ReadCloser
cancel context . CancelFunc
bytesRead int64
scrapeURL string
maxBodySize int64
2020-11-01 21:12:13 +00:00
}
func ( sr * streamReader ) Read ( p [ ] byte ) ( int , error ) {
n , err := sr . r . Read ( p )
sr . bytesRead += int64 ( n )
2021-05-27 12:03:30 +00:00
if err == nil && sr . bytesRead > sr . maxBodySize {
2021-09-23 11:47:20 +00:00
maxScrapeSizeExceeded . Inc ( )
2021-05-27 12:03:30 +00:00
err = fmt . Errorf ( "the response from %q exceeds -promscrape.maxScrapeSize=%d; " +
2021-05-27 11:52:44 +00:00
"either reduce the response size for the target or increase -promscrape.maxScrapeSize" , sr . scrapeURL , sr . maxBodySize )
}
2020-11-01 21:12:13 +00:00
return n , err
}
func ( sr * streamReader ) MustClose ( ) {
sr . cancel ( )
if err := sr . r . Close ( ) ; err != nil {
logger . Errorf ( "cannot close reader: %s" , err )
}
}