diff --git a/app/vmctl/vm/timeseries.go b/app/vmctl/vm/timeseries.go index 7a54a5152..3c4580eff 100644 --- a/app/vmctl/vm/timeseries.go +++ b/app/vmctl/vm/timeseries.go @@ -56,27 +56,37 @@ func (cw *cWriter) printf(format string, args ...interface{}) { //"{"metric":{"__name__":"cpu_usage_guest","arch":"x64","hostname":"host_19",},"timestamps":[1567296000000,1567296010000],"values":[1567296000000,66]} func (ts *TimeSeries) write(w io.Writer) (int, error) { - pointsCount := len(ts.Timestamps) - if pointsCount == 0 { - return 0, nil - } - + timestamps := ts.Timestamps + values := ts.Values cw := &cWriter{w: w} - cw.printf(`{"metric":{"__name__":%q`, ts.Name) - if len(ts.LabelPairs) > 0 { + for len(timestamps) > 0 { + // Split long lines with more than 10K samples into multiple JSON lines. + // This should limit memory usage at VictoriaMetrics during data ingestion, + // since it allocates memory for the whole JSON line and processes it in one go. + batchSize := 10000 + if batchSize > len(timestamps) { + batchSize = len(timestamps) + } + timestampsBatch := timestamps[:batchSize] + valuesBatch := values[:batchSize] + timestamps = timestamps[batchSize:] + values = values[batchSize:] + + cw.printf(`{"metric":{"__name__":%q`, ts.Name) for _, lp := range ts.LabelPairs { cw.printf(",%q:%q", lp.Name, lp.Value) } - } - cw.printf(`},"timestamps":[`) - for i := 0; i < pointsCount-1; i++ { - cw.printf(`%d,`, ts.Timestamps[i]) + pointsCount := len(timestampsBatch) + cw.printf(`},"timestamps":[`) + for i := 0; i < pointsCount-1; i++ { + cw.printf(`%d,`, timestampsBatch[i]) + } + cw.printf(`%d],"values":[`, timestampsBatch[pointsCount-1]) + for i := 0; i < pointsCount-1; i++ { + cw.printf(`%v,`, valuesBatch[i]) + } + cw.printf("%v]}\n", valuesBatch[pointsCount-1]) } - cw.printf(`%d],"values":[`, ts.Timestamps[pointsCount-1]) - for i := 0; i < pointsCount-1; i++ { - cw.printf(`%v,`, ts.Values[i]) - } - cw.printf("%v]}\n", ts.Values[pointsCount-1]) return cw.n, cw.err } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 3e9530c31..fc7236676 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,6 +10,7 @@ sort: 15 * FEATURE: vmagent: change the default value for `-remoteWrite.queues` from 4 to `2 * numCPUs`. This should reduce scrape duration for highly loaded vmagent, which scrapes tens of thousands of targets. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1385). * FEATURE: vmagent: show the number of samples the target returned during the last scrape on `/targets` and `/api/v1/targets` pages. This should simplify debugging targets, which may return too big or too low number of samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1377). * FEATURE: vmagent: show jobs with zero discovered targets on `/targets` page. This should help debugging improperly configured scrape configs. +* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): limit the number of samples per each imported JSON line. This should limit the memory usage at VictoriaMetrics when importing time series with big number of samples to it. * BUGFIX: prevent from adding new samples to deleted time series after the rotation of the inverted index (the rotation is performed once per `-retentionPeriod`). See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1347#issuecomment-861232136) for details. * BUGFIX: vmstorage: reduce disk write IO usage on systems with big number of CPU cores. The issue has been introduced in the release [v1.59.0](#v1590). See [this commit](aa9b56a046b6ae8083fa659df35dd5e994bf9115) and [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1338#issuecomment-863046999) for details.