diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f6919a69d..a71e0fb0c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -28,6 +28,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): consistently put the scrape url with scrape target labels to all error logs for failed scrapes. Previously some failed scrapes were logged without this information. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not send stale markers to remote storage for series exceeding the configured [series limit](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3660). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly apply [series limit](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter) when [staleness tracking](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) is disabled. +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): reduce memory usage spikes when big number of scrape targets disappear at once. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668). Thanks to @lzfhust for [the initial fix](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3669). * BUGFIX: [Pushgateway import](https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format): properly return `200 OK` HTTP response code. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3636). * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly parse `M` and `Mi` suffixes as `1e6` multipliers in `1M` and `1Mi` numeric constants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3664). The issue has been introduced in [v1.86.0](https://docs.victoriametrics.com/CHANGELOG.html#v1860). * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): properly display range query results at `Table` view. For example, `up[5m]` query now shows all the raw samples for the last 5 minutes for the `up` metric at the `Table` view. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3516). diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 095138b0b..18ddb3905 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -431,15 +431,15 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error return err } -var concurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs()) +var processScrapedDataConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs()) func (sw *scrapeWork) processScrapedData(scrapeTimestamp, realTimestamp int64, body *bytesutil.ByteBuffer, err error) (bool, error) { // This function is CPU-bound, while it may allocate big amounts of memory. // That's why it is a good idea to limit the number of concurrent calls to this function // in order to limit memory usage under high load without sacrificing the performance. - concurrencyLimitCh <- struct{}{} + processScrapedDataConcurrencyLimitCh <- struct{}{} defer func() { - <-concurrencyLimitCh + <-processScrapedDataConcurrencyLimitCh }() endTimestamp := time.Now().UnixNano() / 1e6 @@ -765,7 +765,17 @@ func (sw *scrapeWork) applySeriesLimit(wc *writeRequestCtx) int { return samplesDropped } +var sendStaleSeriesConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs()) + func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp int64, addAutoSeries bool) { + // This function is CPU-bound, while it may allocate big amounts of memory. + // That's why it is a good idea to limit the number of concurrent calls to this function + // in order to limit memory usage under high load without sacrificing the performance. + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668 + sendStaleSeriesConcurrencyLimitCh <- struct{}{} + defer func() { + <-sendStaleSeriesConcurrencyLimitCh + }() if sw.Config.NoStaleMarkers { return } @@ -773,7 +783,11 @@ func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp i if currScrape != "" { bodyString = parser.GetRowsDiff(lastScrape, currScrape) } - wc := writeRequestCtxPool.Get(sw.prevLabelsLen) + wc := writeRequestCtxPool.Get(sw.prevLabelsLen) + defer func() { + wc.reset() + writeRequestCtxPool.Put(wc) + }() if bodyString != "" { wc.rows.UnmarshalWithErrLogger(bodyString, sw.logError) srcRows := wc.rows.Rows @@ -805,8 +819,6 @@ func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp i staleSamplesCreated.Add(len(samples)) } sw.pushData(sw.Config.AuthToken, &wc.writeRequest) - wc.reset() - writeRequestCtxPool.Put(wc) } var staleSamplesCreated = metrics.NewCounter(`vm_promscrape_stale_samples_created_total`)