lib/promscrape: follow-up for d79f1b106c

- Document the fix at docs/CHANGELOG.md
- Limit the concurrency for sendStaleMarkers() function in order to limit its memory usage
  when big number of targets disappear and staleness markers are sent
  for all the metrics exposed by these targets.
- Make sure that the writeRequestCtx is returned to the pool
  when there is no need to send staleness markers.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668
This commit is contained in:
Aliaksandr Valialkin 2023-01-17 23:09:34 -08:00
parent 5ac0f18ca8
commit a844b97942
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
2 changed files with 19 additions and 6 deletions

View file

@ -28,6 +28,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): consistently put the scrape url with scrape target labels to all error logs for failed scrapes. Previously some failed scrapes were logged without this information.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not send stale markers to remote storage for series exceeding the configured [series limit](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3660).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly apply [series limit](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter) when [staleness tracking](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) is disabled.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): reduce memory usage spikes when big number of scrape targets disappear at once. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668). Thanks to @lzfhust for [the initial fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3669).
* BUGFIX: [Pushgateway import](https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format): properly return `200 OK` HTTP response code. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636).
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly parse `M` and `Mi` suffixes as `1e6` multipliers in `1M` and `1Mi` numeric constants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3664). The issue has been introduced in [v1.86.0](https://docs.victoriametrics.com/CHANGELOG.html#v1860).
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): properly display range query results at `Table` view. For example, `up[5m]` query now shows all the raw samples for the last 5 minutes for the `up` metric at the `Table` view. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3516).

View file

@ -431,15 +431,15 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error
return err
}
var concurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs())
var processScrapedDataConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs())
func (sw *scrapeWork) processScrapedData(scrapeTimestamp, realTimestamp int64, body *bytesutil.ByteBuffer, err error) (bool, error) {
// This function is CPU-bound, while it may allocate big amounts of memory.
// That's why it is a good idea to limit the number of concurrent calls to this function
// in order to limit memory usage under high load without sacrificing the performance.
concurrencyLimitCh <- struct{}{}
processScrapedDataConcurrencyLimitCh <- struct{}{}
defer func() {
<-concurrencyLimitCh
<-processScrapedDataConcurrencyLimitCh
}()
endTimestamp := time.Now().UnixNano() / 1e6
@ -765,7 +765,17 @@ func (sw *scrapeWork) applySeriesLimit(wc *writeRequestCtx) int {
return samplesDropped
}
var sendStaleSeriesConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs())
func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp int64, addAutoSeries bool) {
// This function is CPU-bound, while it may allocate big amounts of memory.
// That's why it is a good idea to limit the number of concurrent calls to this function
// in order to limit memory usage under high load without sacrificing the performance.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668
sendStaleSeriesConcurrencyLimitCh <- struct{}{}
defer func() {
<-sendStaleSeriesConcurrencyLimitCh
}()
if sw.Config.NoStaleMarkers {
return
}
@ -773,7 +783,11 @@ func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp i
if currScrape != "" {
bodyString = parser.GetRowsDiff(lastScrape, currScrape)
}
wc := writeRequestCtxPool.Get(sw.prevLabelsLen)
wc := writeRequestCtxPool.Get(sw.prevLabelsLen)
defer func() {
wc.reset()
writeRequestCtxPool.Put(wc)
}()
if bodyString != "" {
wc.rows.UnmarshalWithErrLogger(bodyString, sw.logError)
srcRows := wc.rows.Rows
@ -805,8 +819,6 @@ func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp i
staleSamplesCreated.Add(len(samples))
}
sw.pushData(sw.Config.AuthToken, &wc.writeRequest)
wc.reset()
writeRequestCtxPool.Put(wc)
}
var staleSamplesCreated = metrics.NewCounter(`vm_promscrape_stale_samples_created_total`)