lib/promscrape: log the number of unsuccessful scrapes during the last -promscrape.suppressScrapeErrorsDelay

This commit is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3413
Thanks to @jelmd for the pull request.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2575
This commit is contained in:
Aliaksandr Valialkin 2023-01-12 01:09:26 -08:00
parent a6988eb8c3
commit a819e30ddf
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1

View file

@ -217,11 +217,14 @@ type scrapeWork struct {
// equals to or exceeds -promscrape.minResponseSizeForStreamParse // equals to or exceeds -promscrape.minResponseSizeForStreamParse
lastScrapeCompressed []byte lastScrapeCompressed []byte
// lastErrLogTimestamp is the timestamp in unix seconds of the last logged scrape error // nextErrorLogTime is the timestamp in millisecond when the next scrape error should be logged.
lastErrLogTimestamp uint64 nextErrorLogTime int64
// errsSuppressedCount is the number of suppressed scrape errors since lastErrLogTimestamp // failureRequestsCount is the number of suppressed scrape errors during the last suppressScrapeErrorsDelay
errsSuppressedCount int failureRequestsCount int
// successRequestsCount is the number of success requests during the last suppressScrapeErrorsDelay
successRequestsCount int
} }
func (sw *scrapeWork) loadLastScrape() string { func (sw *scrapeWork) loadLastScrape() string {
@ -355,21 +358,26 @@ func (sw *scrapeWork) logError(s string) {
func (sw *scrapeWork) scrapeAndLogError(scrapeTimestamp, realTimestamp int64) { func (sw *scrapeWork) scrapeAndLogError(scrapeTimestamp, realTimestamp int64) {
err := sw.scrapeInternal(scrapeTimestamp, realTimestamp) err := sw.scrapeInternal(scrapeTimestamp, realTimestamp)
if *suppressScrapeErrors {
return
}
if err == nil { if err == nil {
sw.successRequestsCount++
return return
} }
d := time.Duration(fasttime.UnixTimestamp()-sw.lastErrLogTimestamp) * time.Second sw.failureRequestsCount++
if *suppressScrapeErrors || d < *suppressScrapeErrorsDelay { if sw.nextErrorLogTime == 0 {
sw.errsSuppressedCount++ sw.nextErrorLogTime = realTimestamp + suppressScrapeErrorsDelay.Milliseconds()
}
if realTimestamp < sw.nextErrorLogTime {
return return
} }
err = fmt.Errorf("cannot scrape %q (job %q, labels %s): %w", sw.Config.ScrapeURL, sw.Config.Job(), sw.Config.Labels.String(), err) totalRequests := sw.failureRequestsCount + sw.successRequestsCount
if sw.errsSuppressedCount > 0 { logger.Warnf("cannot scrape target %q (%s) %d out of %d times during -promscrape.suppressScrapeErrorsDelay=%s; the last error: %s",
err = fmt.Errorf("%w; %d similar errors suppressed during the last %.1f seconds", err, sw.errsSuppressedCount, d.Seconds()) sw.Config.ScrapeURL, sw.Config.Labels.String(), sw.failureRequestsCount, totalRequests, *suppressScrapeErrorsDelay, err)
} sw.nextErrorLogTime = realTimestamp + suppressScrapeErrorsDelay.Milliseconds()
logger.Warnf("%s", err) sw.failureRequestsCount = 0
sw.lastErrLogTimestamp = fasttime.UnixTimestamp() sw.successRequestsCount = 0
sw.errsSuppressedCount = 0
} }
var ( var (