mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-11 15:34:56 +00:00
lib/promscrape: log the number of unsuccessful scrapes during the last -promscrape.suppressScrapeErrorsDelay
This commit is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3413 Thanks to @jelmd for the pull request. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2575
This commit is contained in:
parent
a6988eb8c3
commit
a819e30ddf
1 changed files with 22 additions and 14 deletions
|
@ -217,11 +217,14 @@ type scrapeWork struct {
|
||||||
// equals to or exceeds -promscrape.minResponseSizeForStreamParse
|
// equals to or exceeds -promscrape.minResponseSizeForStreamParse
|
||||||
lastScrapeCompressed []byte
|
lastScrapeCompressed []byte
|
||||||
|
|
||||||
// lastErrLogTimestamp is the timestamp in unix seconds of the last logged scrape error
|
// nextErrorLogTime is the timestamp in millisecond when the next scrape error should be logged.
|
||||||
lastErrLogTimestamp uint64
|
nextErrorLogTime int64
|
||||||
|
|
||||||
// errsSuppressedCount is the number of suppressed scrape errors since lastErrLogTimestamp
|
// failureRequestsCount is the number of suppressed scrape errors during the last suppressScrapeErrorsDelay
|
||||||
errsSuppressedCount int
|
failureRequestsCount int
|
||||||
|
|
||||||
|
// successRequestsCount is the number of success requests during the last suppressScrapeErrorsDelay
|
||||||
|
successRequestsCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sw *scrapeWork) loadLastScrape() string {
|
func (sw *scrapeWork) loadLastScrape() string {
|
||||||
|
@ -355,21 +358,26 @@ func (sw *scrapeWork) logError(s string) {
|
||||||
|
|
||||||
func (sw *scrapeWork) scrapeAndLogError(scrapeTimestamp, realTimestamp int64) {
|
func (sw *scrapeWork) scrapeAndLogError(scrapeTimestamp, realTimestamp int64) {
|
||||||
err := sw.scrapeInternal(scrapeTimestamp, realTimestamp)
|
err := sw.scrapeInternal(scrapeTimestamp, realTimestamp)
|
||||||
|
if *suppressScrapeErrors {
|
||||||
|
return
|
||||||
|
}
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
sw.successRequestsCount++
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
d := time.Duration(fasttime.UnixTimestamp()-sw.lastErrLogTimestamp) * time.Second
|
sw.failureRequestsCount++
|
||||||
if *suppressScrapeErrors || d < *suppressScrapeErrorsDelay {
|
if sw.nextErrorLogTime == 0 {
|
||||||
sw.errsSuppressedCount++
|
sw.nextErrorLogTime = realTimestamp + suppressScrapeErrorsDelay.Milliseconds()
|
||||||
|
}
|
||||||
|
if realTimestamp < sw.nextErrorLogTime {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
err = fmt.Errorf("cannot scrape %q (job %q, labels %s): %w", sw.Config.ScrapeURL, sw.Config.Job(), sw.Config.Labels.String(), err)
|
totalRequests := sw.failureRequestsCount + sw.successRequestsCount
|
||||||
if sw.errsSuppressedCount > 0 {
|
logger.Warnf("cannot scrape target %q (%s) %d out of %d times during -promscrape.suppressScrapeErrorsDelay=%s; the last error: %s",
|
||||||
err = fmt.Errorf("%w; %d similar errors suppressed during the last %.1f seconds", err, sw.errsSuppressedCount, d.Seconds())
|
sw.Config.ScrapeURL, sw.Config.Labels.String(), sw.failureRequestsCount, totalRequests, *suppressScrapeErrorsDelay, err)
|
||||||
}
|
sw.nextErrorLogTime = realTimestamp + suppressScrapeErrorsDelay.Milliseconds()
|
||||||
logger.Warnf("%s", err)
|
sw.failureRequestsCount = 0
|
||||||
sw.lastErrLogTimestamp = fasttime.UnixTimestamp()
|
sw.successRequestsCount = 0
|
||||||
sw.errsSuppressedCount = 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
Loading…
Reference in a new issue