mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
2c05066f19
* lib/promscrape: fix cancelling in-flight scrape requests during configuration reload (see #3747) Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * lib/promscrape: fix order of params for `doRequestWithPossibleRetry` to follow codestyle Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * lib/promscrape: accept deadline explicitly and extend passed context for local use Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> --------- Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com>
408 lines
16 KiB
Go
408 lines
16 KiB
Go
package promscrape
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/proxy"
|
|
"github.com/VictoriaMetrics/fasthttp"
|
|
"github.com/VictoriaMetrics/metrics"
|
|
)
|
|
|
|
var (
|
|
maxScrapeSize = flagutil.NewBytes("promscrape.maxScrapeSize", 16*1024*1024, "The maximum size of scrape response in bytes to process from Prometheus targets. "+
|
|
"Bigger responses are rejected")
|
|
maxResponseHeadersSize = flagutil.NewBytes("promscrape.maxResponseHeadersSize", 4096, "The maximum size of http response headers from Prometheus scrape targets")
|
|
disableCompression = flag.Bool("promscrape.disableCompression", false, "Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. "+
|
|
"This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. "+
|
|
"It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control")
|
|
disableKeepAlive = flag.Bool("promscrape.disableKeepAlive", false, "Whether to disable HTTP keep-alive connections when scraping all the targets. "+
|
|
"This may be useful when targets has no support for HTTP keep-alive connection. "+
|
|
"It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control. "+
|
|
"Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets")
|
|
streamParse = flag.Bool("promscrape.streamParse", false, "Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful "+
|
|
"for reducing memory usage when millions of metrics are exposed per each scrape target. "+
|
|
"It is posible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine grained control")
|
|
)
|
|
|
|
type client struct {
|
|
// hc is the default client optimized for common case of scraping targets with moderate number of metrics.
|
|
hc *fasthttp.HostClient
|
|
|
|
// sc (aka `stream client`) is used instead of hc if ScrapeWork.StreamParse is set.
|
|
// It may be useful for scraping targets with millions of metrics per target.
|
|
sc *http.Client
|
|
|
|
ctx context.Context
|
|
scrapeURL string
|
|
scrapeTimeoutSecondsStr string
|
|
hostPort string
|
|
requestURI string
|
|
setHeaders func(req *http.Request)
|
|
setProxyHeaders func(req *http.Request)
|
|
setFasthttpHeaders func(req *fasthttp.Request)
|
|
setFasthttpProxyHeaders func(req *fasthttp.Request)
|
|
denyRedirects bool
|
|
disableCompression bool
|
|
disableKeepAlive bool
|
|
}
|
|
|
|
func addMissingPort(addr string, isTLS bool) string {
|
|
if strings.Contains(addr, ":") {
|
|
return addr
|
|
}
|
|
if isTLS {
|
|
return concatTwoStrings(addr, ":443")
|
|
}
|
|
return concatTwoStrings(addr, ":80")
|
|
}
|
|
|
|
func concatTwoStrings(x, y string) string {
|
|
bb := bbPool.Get()
|
|
b := bb.B[:0]
|
|
b = append(b, x...)
|
|
b = append(b, y...)
|
|
s := bytesutil.InternBytes(b)
|
|
bb.B = b
|
|
bbPool.Put(bb)
|
|
return s
|
|
}
|
|
|
|
func newClient(sw *ScrapeWork, ctx context.Context) *client {
|
|
var u fasthttp.URI
|
|
u.Update(sw.ScrapeURL)
|
|
hostPort := string(u.Host())
|
|
dialAddr := hostPort
|
|
requestURI := string(u.RequestURI())
|
|
isTLS := string(u.Scheme()) == "https"
|
|
var tlsCfg *tls.Config
|
|
if isTLS {
|
|
tlsCfg = sw.AuthConfig.NewTLSConfig()
|
|
}
|
|
setProxyHeaders := func(req *http.Request) {}
|
|
setFasthttpProxyHeaders := func(req *fasthttp.Request) {}
|
|
proxyURL := sw.ProxyURL
|
|
if !isTLS && proxyURL.IsHTTPOrHTTPS() {
|
|
// Send full sw.ScrapeURL in requests to a proxy host for non-TLS scrape targets
|
|
// like net/http package from Go does.
|
|
// See https://en.wikipedia.org/wiki/Proxy_server#Web_proxy_servers
|
|
pu := proxyURL.GetURL()
|
|
dialAddr = pu.Host
|
|
requestURI = sw.ScrapeURL
|
|
isTLS = pu.Scheme == "https"
|
|
if isTLS {
|
|
tlsCfg = sw.ProxyAuthConfig.NewTLSConfig()
|
|
}
|
|
proxyURLOrig := proxyURL
|
|
setProxyHeaders = func(req *http.Request) {
|
|
proxyURLOrig.SetHeaders(sw.ProxyAuthConfig, req)
|
|
}
|
|
setFasthttpProxyHeaders = func(req *fasthttp.Request) {
|
|
proxyURLOrig.SetFasthttpHeaders(sw.ProxyAuthConfig, req)
|
|
}
|
|
proxyURL = &proxy.URL{}
|
|
}
|
|
hostPort = addMissingPort(hostPort, isTLS)
|
|
dialAddr = addMissingPort(dialAddr, isTLS)
|
|
dialFunc, err := newStatDialFunc(proxyURL, sw.ProxyAuthConfig)
|
|
if err != nil {
|
|
logger.Fatalf("cannot create dial func: %s", err)
|
|
}
|
|
hc := &fasthttp.HostClient{
|
|
Addr: dialAddr,
|
|
Name: "vm_promscrape",
|
|
Dial: dialFunc,
|
|
IsTLS: isTLS,
|
|
TLSConfig: tlsCfg,
|
|
MaxIdleConnDuration: 2 * sw.ScrapeInterval,
|
|
ReadTimeout: sw.ScrapeTimeout,
|
|
WriteTimeout: 10 * time.Second,
|
|
MaxResponseBodySize: maxScrapeSize.IntN(),
|
|
MaxIdempotentRequestAttempts: 1,
|
|
ReadBufferSize: maxResponseHeadersSize.IntN(),
|
|
}
|
|
var sc *http.Client
|
|
var proxyURLFunc func(*http.Request) (*url.URL, error)
|
|
if pu := sw.ProxyURL.GetURL(); pu != nil {
|
|
proxyURLFunc = http.ProxyURL(pu)
|
|
}
|
|
sc = &http.Client{
|
|
Transport: &http.Transport{
|
|
TLSClientConfig: tlsCfg,
|
|
Proxy: proxyURLFunc,
|
|
TLSHandshakeTimeout: 10 * time.Second,
|
|
IdleConnTimeout: 2 * sw.ScrapeInterval,
|
|
DisableCompression: *disableCompression || sw.DisableCompression,
|
|
DisableKeepAlives: *disableKeepAlive || sw.DisableKeepAlive,
|
|
DialContext: statStdDial,
|
|
MaxIdleConnsPerHost: 100,
|
|
MaxResponseHeaderBytes: int64(maxResponseHeadersSize.N),
|
|
|
|
// Set timeout for receiving the first response byte,
|
|
// since the duration for reading the full response can be much bigger because of stream parsing.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1017#issuecomment-767235047
|
|
ResponseHeaderTimeout: sw.ScrapeTimeout,
|
|
},
|
|
|
|
// Set 30x bigger timeout than the sw.ScrapeTimeout, since the duration for reading the full response
|
|
// can be much bigger because of stream parsing.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1017#issuecomment-767235047
|
|
Timeout: 30 * sw.ScrapeTimeout,
|
|
}
|
|
if sw.DenyRedirects {
|
|
sc.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
|
return http.ErrUseLastResponse
|
|
}
|
|
}
|
|
return &client{
|
|
hc: hc,
|
|
ctx: ctx,
|
|
sc: sc,
|
|
scrapeURL: sw.ScrapeURL,
|
|
scrapeTimeoutSecondsStr: fmt.Sprintf("%.3f", sw.ScrapeTimeout.Seconds()),
|
|
hostPort: hostPort,
|
|
requestURI: requestURI,
|
|
setHeaders: func(req *http.Request) { sw.AuthConfig.SetHeaders(req, true) },
|
|
setProxyHeaders: setProxyHeaders,
|
|
setFasthttpHeaders: func(req *fasthttp.Request) { sw.AuthConfig.SetFasthttpHeaders(req, true) },
|
|
setFasthttpProxyHeaders: setFasthttpProxyHeaders,
|
|
denyRedirects: sw.DenyRedirects,
|
|
disableCompression: sw.DisableCompression,
|
|
disableKeepAlive: sw.DisableKeepAlive,
|
|
}
|
|
}
|
|
|
|
func (c *client) GetStreamReader() (*streamReader, error) {
|
|
deadline := time.Now().Add(c.sc.Timeout)
|
|
ctx, cancel := context.WithDeadline(c.ctx, deadline)
|
|
req, err := http.NewRequestWithContext(ctx, "GET", c.scrapeURL, nil)
|
|
if err != nil {
|
|
cancel()
|
|
return nil, fmt.Errorf("cannot create request for %q: %w", c.scrapeURL, err)
|
|
}
|
|
// The following `Accept` header has been copied from Prometheus sources.
|
|
// See https://github.com/prometheus/prometheus/blob/f9d21f10ecd2a343a381044f131ea4e46381ce09/scrape/scrape.go#L532 .
|
|
// This is needed as a workaround for scraping stupid Java-based servers such as Spring Boot.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/608 for details.
|
|
// Do not bloat the `Accept` header with OpenMetrics shit, since it looks like dead standard now.
|
|
req.Header.Set("Accept", "text/plain;version=0.0.4;q=1,*/*;q=0.1")
|
|
// Set X-Prometheus-Scrape-Timeout-Seconds like Prometheus does, since it is used by some exporters such as PushProx.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1179#issuecomment-813117162
|
|
req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", c.scrapeTimeoutSecondsStr)
|
|
c.setHeaders(req)
|
|
c.setProxyHeaders(req)
|
|
scrapeRequests.Inc()
|
|
resp, err := c.sc.Do(req)
|
|
if err != nil {
|
|
cancel()
|
|
return nil, fmt.Errorf("cannot scrape %q: %w", c.scrapeURL, err)
|
|
}
|
|
if resp.StatusCode != http.StatusOK {
|
|
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_scrapes_total{status_code="%d"}`, resp.StatusCode)).Inc()
|
|
respBody, _ := io.ReadAll(resp.Body)
|
|
_ = resp.Body.Close()
|
|
cancel()
|
|
return nil, fmt.Errorf("unexpected status code returned when scraping %q: %d; expecting %d; response body: %q",
|
|
c.scrapeURL, resp.StatusCode, http.StatusOK, respBody)
|
|
}
|
|
scrapesOK.Inc()
|
|
return &streamReader{
|
|
r: resp.Body,
|
|
cancel: cancel,
|
|
scrapeURL: c.scrapeURL,
|
|
maxBodySize: int64(c.hc.MaxResponseBodySize),
|
|
}, nil
|
|
}
|
|
|
|
// checks fasthttp status code for redirect as standard http/client does.
|
|
func isStatusRedirect(statusCode int) bool {
|
|
switch statusCode {
|
|
case 301, 302, 303, 307, 308:
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (c *client) ReadData(dst []byte) ([]byte, error) {
|
|
deadline := time.Now().Add(c.hc.ReadTimeout)
|
|
req := fasthttp.AcquireRequest()
|
|
req.SetRequestURI(c.requestURI)
|
|
req.Header.SetHost(c.hostPort)
|
|
// The following `Accept` header has been copied from Prometheus sources.
|
|
// See https://github.com/prometheus/prometheus/blob/f9d21f10ecd2a343a381044f131ea4e46381ce09/scrape/scrape.go#L532 .
|
|
// This is needed as a workaround for scraping stupid Java-based servers such as Spring Boot.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/608 for details.
|
|
// Do not bloat the `Accept` header with OpenMetrics shit, since it looks like dead standard now.
|
|
req.Header.Set("Accept", "text/plain;version=0.0.4;q=1,*/*;q=0.1")
|
|
// Set X-Prometheus-Scrape-Timeout-Seconds like Prometheus does, since it is used by some exporters such as PushProx.
|
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1179#issuecomment-813117162
|
|
req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", c.scrapeTimeoutSecondsStr)
|
|
c.setFasthttpHeaders(req)
|
|
c.setFasthttpProxyHeaders(req)
|
|
if !*disableCompression && !c.disableCompression {
|
|
req.Header.Set("Accept-Encoding", "gzip")
|
|
}
|
|
if *disableKeepAlive || c.disableKeepAlive {
|
|
req.SetConnectionClose()
|
|
}
|
|
resp := fasthttp.AcquireResponse()
|
|
swapResponseBodies := len(dst) == 0
|
|
if swapResponseBodies {
|
|
// An optimization: write response directly to dst.
|
|
// This should reduce memory uage when scraping big targets.
|
|
dst = resp.SwapBody(dst)
|
|
}
|
|
|
|
err := doRequestWithPossibleRetry(c.ctx, c.hc, req, resp, deadline)
|
|
statusCode := resp.StatusCode()
|
|
redirectsCount := 0
|
|
for err == nil && isStatusRedirect(statusCode) {
|
|
if redirectsCount > 5 {
|
|
err = fmt.Errorf("too many redirects")
|
|
break
|
|
}
|
|
if c.denyRedirects {
|
|
err = fmt.Errorf("cannot follow redirects if `follow_redirects: false` is set")
|
|
break
|
|
}
|
|
// It is expected that the redirect is made on the same host.
|
|
// Otherwise it won't work.
|
|
location := resp.Header.Peek("Location")
|
|
if len(location) == 0 {
|
|
err = fmt.Errorf("missing Location header")
|
|
break
|
|
}
|
|
req.URI().UpdateBytes(location)
|
|
err = doRequestWithPossibleRetry(c.ctx, c.hc, req, resp, deadline)
|
|
statusCode = resp.StatusCode()
|
|
redirectsCount++
|
|
}
|
|
if swapResponseBodies {
|
|
dst = resp.SwapBody(dst)
|
|
}
|
|
fasthttp.ReleaseRequest(req)
|
|
if err != nil {
|
|
fasthttp.ReleaseResponse(resp)
|
|
if err == fasthttp.ErrTimeout {
|
|
scrapesTimedout.Inc()
|
|
return dst, fmt.Errorf("error when scraping %q with timeout %s: %w", c.scrapeURL, c.hc.ReadTimeout, err)
|
|
}
|
|
if err == fasthttp.ErrBodyTooLarge {
|
|
maxScrapeSizeExceeded.Inc()
|
|
return dst, fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d; "+
|
|
"either reduce the response size for the target or increase -promscrape.maxScrapeSize", c.scrapeURL, maxScrapeSize.N)
|
|
}
|
|
return dst, fmt.Errorf("error when scraping %q: %w", c.scrapeURL, err)
|
|
}
|
|
if ce := resp.Header.Peek("Content-Encoding"); string(ce) == "gzip" {
|
|
var err error
|
|
if swapResponseBodies {
|
|
zb := gunzipBufPool.Get()
|
|
zb.B, err = fasthttp.AppendGunzipBytes(zb.B[:0], dst)
|
|
dst = append(dst[:0], zb.B...)
|
|
gunzipBufPool.Put(zb)
|
|
} else {
|
|
dst, err = fasthttp.AppendGunzipBytes(dst, resp.Body())
|
|
}
|
|
if err != nil {
|
|
fasthttp.ReleaseResponse(resp)
|
|
scrapesGunzipFailed.Inc()
|
|
return dst, fmt.Errorf("cannot ungzip response from %q: %w", c.scrapeURL, err)
|
|
}
|
|
scrapesGunzipped.Inc()
|
|
} else if !swapResponseBodies {
|
|
dst = append(dst, resp.Body()...)
|
|
}
|
|
fasthttp.ReleaseResponse(resp)
|
|
if len(dst) > c.hc.MaxResponseBodySize {
|
|
maxScrapeSizeExceeded.Inc()
|
|
return dst, fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d (the actual response size is %d bytes); "+
|
|
"either reduce the response size for the target or increase -promscrape.maxScrapeSize", c.scrapeURL, maxScrapeSize.N, len(dst))
|
|
}
|
|
if statusCode != fasthttp.StatusOK {
|
|
metrics.GetOrCreateCounter(fmt.Sprintf(`vm_promscrape_scrapes_total{status_code="%d"}`, statusCode)).Inc()
|
|
return dst, fmt.Errorf("unexpected status code returned when scraping %q: %d; expecting %d; response body: %q",
|
|
c.scrapeURL, statusCode, fasthttp.StatusOK, dst)
|
|
}
|
|
scrapesOK.Inc()
|
|
return dst, nil
|
|
}
|
|
|
|
var gunzipBufPool bytesutil.ByteBufferPool
|
|
|
|
var (
|
|
maxScrapeSizeExceeded = metrics.NewCounter(`vm_promscrape_max_scrape_size_exceeded_errors_total`)
|
|
scrapesTimedout = metrics.NewCounter(`vm_promscrape_scrapes_timed_out_total`)
|
|
scrapesOK = metrics.NewCounter(`vm_promscrape_scrapes_total{status_code="200"}`)
|
|
scrapesGunzipped = metrics.NewCounter(`vm_promscrape_scrapes_gunziped_total`)
|
|
scrapesGunzipFailed = metrics.NewCounter(`vm_promscrape_scrapes_gunzip_failed_total`)
|
|
scrapeRequests = metrics.NewCounter(`vm_promscrape_scrape_requests_total`)
|
|
scrapeRetries = metrics.NewCounter(`vm_promscrape_scrape_retries_total`)
|
|
)
|
|
|
|
func doRequestWithPossibleRetry(ctx context.Context, hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error {
|
|
sleepTime := time.Second
|
|
scrapeRequests.Inc()
|
|
reqCtx, cancel := context.WithDeadline(ctx, deadline)
|
|
defer cancel()
|
|
for {
|
|
// Use DoCtx instead of Do in order to support context cancellation
|
|
err := hc.DoCtx(reqCtx, req, resp)
|
|
if err == nil {
|
|
statusCode := resp.StatusCode()
|
|
if statusCode != fasthttp.StatusTooManyRequests {
|
|
return nil
|
|
}
|
|
} else if err != fasthttp.ErrConnectionClosed && !strings.Contains(err.Error(), "broken pipe") {
|
|
return err
|
|
}
|
|
|
|
// Retry request after exponentially increased sleep.
|
|
maxSleepTime := time.Until(deadline)
|
|
if sleepTime > maxSleepTime {
|
|
return fmt.Errorf("the server closes all the connection attempts: %w", err)
|
|
}
|
|
sleepTime += sleepTime
|
|
if sleepTime > maxSleepTime {
|
|
sleepTime = maxSleepTime
|
|
}
|
|
time.Sleep(sleepTime)
|
|
scrapeRetries.Inc()
|
|
}
|
|
}
|
|
|
|
type streamReader struct {
|
|
r io.ReadCloser
|
|
cancel context.CancelFunc
|
|
bytesRead int64
|
|
scrapeURL string
|
|
maxBodySize int64
|
|
}
|
|
|
|
func (sr *streamReader) Read(p []byte) (int, error) {
|
|
n, err := sr.r.Read(p)
|
|
sr.bytesRead += int64(n)
|
|
if err == nil && sr.bytesRead > sr.maxBodySize {
|
|
maxScrapeSizeExceeded.Inc()
|
|
err = fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d; "+
|
|
"either reduce the response size for the target or increase -promscrape.maxScrapeSize", sr.scrapeURL, sr.maxBodySize)
|
|
}
|
|
return n, err
|
|
}
|
|
|
|
func (sr *streamReader) MustClose() {
|
|
sr.cancel()
|
|
if err := sr.r.Close(); err != nil {
|
|
logger.Errorf("cannot close reader: %s", err)
|
|
}
|
|
}
|