From f54133b2003499e8b719529a371832e9b3bbd134 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sun, 23 May 2021 16:39:55 +0300 Subject: [PATCH] lib/storage: do not populate MetricID->MetricName cache during data ingestion This cache isn't needed during data ingestion, so there is no need in spending RAM on it. This reduces RAM usage on data ingestion path by 30% --- docs/CHANGELOG.md | 1 + lib/storage/index_db.go | 26 ++---------------- lib/storage/index_db_test.go | 6 +++-- lib/storage/storage.go | 51 ++++++++++++++++++++++++------------ 4 files changed, 41 insertions(+), 43 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 73083fa51..6c4e8d5ec 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -23,6 +23,7 @@ sort: 15 * FEATURE: vmagent: add support for OAuth2 authorization for scrape targets and service discovery in the same way as Prometheus does. See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#oauth2). * FEATURE: vmagent: add support for OAuth2 authorization when writing data to `-remoteWrite.url`. See `-remoteWrite.oauth2.*` config params in `/path/to/vmagent -help` output. * FEATURE: vmalert: add ability to set `extra_filter_labels` at alerting and recording group configs. See [these docs](https://docs.victoriametrics.com/vmalert.html#groups). +* FEATURE: vmstorage: reduce memory usage by up to 30% when ingesting big number of active time series. * BUGFIX: vmagent: do not retry scraping targets, which don't support HTTP. This should reduce CPU load and network usage at `vmagent` and at scrape target. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1289). * BUGFIX: vmagent: fix possible race when refreshing `role: endpoints` and `role: endpointslices` scrape targets in `kubernetes_sd_config`. Prevoiusly `pod` objects could be updated after the related `endpoints` object update. This could lead to missing scrape targets. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240). diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index 23f9eb5a6..5d3c75078 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -568,7 +568,6 @@ func (db *indexDB) createTSIDByName(dst *TSID, metricName []byte) error { if err := db.generateTSID(dst, metricName, mn); err != nil { return fmt.Errorf("cannot generate TSID: %w", err) } - db.putMetricNameToCache(dst.MetricID, metricName) if err := db.createIndexes(dst, mn); err != nil { return fmt.Errorf("cannot create indexes: %w", err) } @@ -3047,7 +3046,7 @@ const ( int64Max = int64((1 << 63) - 1) ) -func (is *indexSearch) storeDateMetricID(date, metricID uint64) error { +func (is *indexSearch) storeDateMetricID(date, metricID uint64, mn *MetricName) error { ii := getIndexItems() defer putIndexItems(ii) @@ -3059,31 +3058,10 @@ func (is *indexSearch) storeDateMetricID(date, metricID uint64) error { // Create per-day inverted index entries for metricID. kb := kbPool.Get() defer kbPool.Put(kb) - mn := GetMetricName() - defer PutMetricName(mn) - var err error - // There is no need in searching for metric name in is.db.extDB, - // Since the storeDateMetricID function is called only after the metricID->metricName - // is added into the current is.db. - kb.B, err = is.searchMetricNameWithCache(kb.B[:0], metricID) - if err != nil { - if err == io.EOF { - logger.Errorf("missing metricName by metricID %d; this could be the case after unclean shutdown; "+ - "deleting the metricID, so it could be re-created next time", metricID) - if err := is.db.deleteMetricIDs([]uint64{metricID}); err != nil { - return fmt.Errorf("cannot delete metricID %d after unclean shutdown: %w", metricID, err) - } - return nil - } - return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err) - } - if err = mn.Unmarshal(kb.B); err != nil { - return fmt.Errorf("cannot unmarshal metricName %q obtained by metricID %d: %w", metricID, kb.B, err) - } kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs) kb.B = encoding.MarshalUint64(kb.B, date) ii.registerTagIndexes(kb.B, mn, metricID) - if err = is.db.tb.AddItems(ii.Items); err != nil { + if err := is.db.tb.AddItems(ii.Items); err != nil { return fmt.Errorf("cannot add per-day entires for metricID %d: %w", metricID, err) } return nil diff --git a/lib/storage/index_db_test.go b/lib/storage/index_db_test.go index d2b95952c..8678afcd0 100644 --- a/lib/storage/index_db_test.go +++ b/lib/storage/index_db_test.go @@ -697,7 +697,7 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, metricGroups int) ([]MetricNa date := uint64(timestampFromTime(time.Now())) / msecPerDay for i := range tsids { tsid := &tsids[i] - if err := is.storeDateMetricID(date, tsid.MetricID); err != nil { + if err := is.storeDateMetricID(date, tsid.MetricID, &mns[i]); err != nil { return nil, nil, fmt.Errorf("error in storeDateMetricID(%d, %d): %w", date, tsid.MetricID, err) } } @@ -1503,6 +1503,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) { sort.Strings(tagKeys) for day := 0; day < days; day++ { var tsids []TSID + var mns []MetricName for metric := 0; metric < metricsPerDay; metric++ { var mn MetricName mn.MetricGroup = []byte("testMetric") @@ -1525,6 +1526,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) { if err := is.GetOrCreateTSIDByName(&tsid, metricNameBuf); err != nil { t.Fatalf("unexpected error when creating tsid for mn:\n%s: %s", &mn, err) } + mns = append(mns, mn) tsids = append(tsids, tsid) } @@ -1534,7 +1536,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) { for i := range tsids { tsid := &tsids[i] metricIDs.Add(tsid.MetricID) - if err := is.storeDateMetricID(date, tsid.MetricID); err != nil { + if err := is.storeDateMetricID(date, tsid.MetricID, &mns[i]); err != nil { t.Fatalf("error in storeDateMetricID(%d, %d): %s", date, tsid.MetricID, err) } } diff --git a/lib/storage/storage.go b/lib/storage/storage.go index 0ca49fa96..dd87dd593 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -1375,7 +1375,7 @@ func (s *Storage) AddRows(mrs []MetricRow, precisionBits uint8) error { // Add rows to the storage. var err error rr := getRawRowsWithSize(len(mrs)) - rr.rows, err = s.add(rr.rows, mrs, precisionBits) + rr.rows, err = s.add(rr.rows[:0], mrs, precisionBits) putRawRows(rr) <-addRowsConcurrencyCh @@ -1399,9 +1399,10 @@ var ( func (s *Storage) RegisterMetricNames(mrs []MetricRow) error { var ( tsid TSID - mn MetricName metricName []byte ) + mn := GetMetricName() + defer PutMetricName(mn) idb := s.idb() is := idb.getIndexSearch(noDeadline) defer idb.putIndexSearch(is) @@ -1439,7 +1440,7 @@ func (s *Storage) RegisterMetricNames(mrs []MetricRow) error { } if !ok { // The (date, metricID) entry is missing in the indexDB. Add it there. - if err := is.storeDateMetricID(date, metricID); err != nil { + if err := is.storeDateMetricID(date, metricID, mn); err != nil { return fmt.Errorf("cannot register the metric in per-date inverted index because of error when storing (date=%d, metricID=%d) in database: %w", date, metricID, err) } @@ -1452,11 +1453,11 @@ func (s *Storage) RegisterMetricNames(mrs []MetricRow) error { func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]rawRow, error) { idb := s.idb() - rowsLen := len(rows) - if n := rowsLen + len(mrs) - cap(rows); n > 0 { + dstMrs := make([]*MetricRow, len(mrs)) + if n := len(mrs) - cap(rows); n > 0 { rows = append(rows[:cap(rows)], make([]rawRow, n)...) } - rows = rows[:rowsLen+len(mrs)] + rows = rows[:len(mrs)] j := 0 var ( // These vars are used for speeding up bulk imports of multiple adjacent rows for the same metricName. @@ -1495,7 +1496,8 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra atomic.AddUint64(&s.tooBigTimestampRows, 1) continue } - r := &rows[rowsLen+j] + dstMrs[j] = mr + r := &rows[j] j++ r.Timestamp = mr.Timestamp r.Value = mr.Value @@ -1548,8 +1550,9 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra var slowInsertsCount uint64 for i := range pendingMetricRows { pmr := &pendingMetricRows[i] - mr := &pmr.mr - r := &rows[rowsLen+j] + mr := pmr.mr + dstMrs[j] = mr + r := &rows[j] j++ r.Timestamp = mr.Timestamp r.Value = mr.Value @@ -1587,13 +1590,14 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra if firstWarn != nil { logger.Warnf("warn occurred during rows addition: %s", firstWarn) } - rows = rows[:rowsLen+j] + dstMrs = dstMrs[:j] + rows = rows[:j] var firstError error if err := s.tb.AddRows(rows); err != nil { firstError = fmt.Errorf("cannot add rows to table: %w", err) } - if err := s.updatePerDateData(rows); err != nil && firstError == nil { + if err := s.updatePerDateData(rows, dstMrs); err != nil && firstError == nil { firstError = fmt.Errorf("cannot update per-date data: %w", err) } if firstError != nil { @@ -1627,7 +1631,8 @@ func logSkippedSeries(metricNameRaw []byte, flagName string, flagValue int) { var logSkippedSeriesTicker = time.NewTicker(5 * time.Second) func getUserReadableMetricName(metricNameRaw []byte) string { - var mn MetricName + mn := GetMetricName() + defer PutMetricName(mn) if err := mn.UnmarshalRaw(metricNameRaw); err != nil { return fmt.Sprintf("cannot unmarshal metricNameRaw %q: %s", metricNameRaw, err) } @@ -1636,7 +1641,7 @@ func getUserReadableMetricName(metricNameRaw []byte) string { type pendingMetricRow struct { MetricName []byte - mr MetricRow + mr *MetricRow } type pendingMetricRows struct { @@ -1651,7 +1656,7 @@ type pendingMetricRows struct { func (pmrs *pendingMetricRows) reset() { for _, pmr := range pmrs.pmrs { pmr.MetricName = nil - pmr.mr.MetricNameRaw = nil + pmr.mr = nil } pmrs.pmrs = pmrs.pmrs[:0] pmrs.metricNamesBuf = pmrs.metricNamesBuf[:0] @@ -1675,7 +1680,7 @@ func (pmrs *pendingMetricRows) addRow(mr *MetricRow) error { } pmrs.pmrs = append(pmrs.pmrs, pendingMetricRow{ MetricName: pmrs.lastMetricName, - mr: *mr, + mr: mr, }) return nil } @@ -1695,7 +1700,7 @@ func putPendingMetricRows(pmrs *pendingMetricRows) { var pendingMetricRowsPool sync.Pool -func (s *Storage) updatePerDateData(rows []rawRow) error { +func (s *Storage) updatePerDateData(rows []rawRow, mrs []*MetricRow) error { var date uint64 var hour uint64 var prevTimestamp int64 @@ -1713,6 +1718,7 @@ func (s *Storage) updatePerDateData(rows []rawRow) error { type pendingDateMetricID struct { date uint64 metricID uint64 + mr *MetricRow } var pendingDateMetricIDs []pendingDateMetricID var pendingNextDayMetricIDs []uint64 @@ -1746,6 +1752,7 @@ func (s *Storage) updatePerDateData(rows []rawRow) error { pendingDateMetricIDs = append(pendingDateMetricIDs, pendingDateMetricID{ date: date + 1, metricID: metricID, + mr: mrs[i], }) pendingNextDayMetricIDs = append(pendingNextDayMetricIDs, metricID) } @@ -1766,6 +1773,7 @@ func (s *Storage) updatePerDateData(rows []rawRow) error { pendingDateMetricIDs = append(pendingDateMetricIDs, pendingDateMetricID{ date: date, metricID: metricID, + mr: mrs[i], }) } if len(pendingNextDayMetricIDs) > 0 { @@ -1800,6 +1808,7 @@ func (s *Storage) updatePerDateData(rows []rawRow) error { defer idb.putIndexSearch(is) var firstError error dateMetricIDsForCache := make([]dateMetricID, 0, len(pendingDateMetricIDs)) + mn := GetMetricName() for _, dmid := range pendingDateMetricIDs { date := dmid.date metricID := dmid.metricID @@ -1814,7 +1823,14 @@ func (s *Storage) updatePerDateData(rows []rawRow) error { // The (date, metricID) entry is missing in the indexDB. Add it there. // It is OK if the (date, metricID) entry is added multiple times to db // by concurrent goroutines. - if err := is.storeDateMetricID(date, metricID); err != nil { + if err := mn.UnmarshalRaw(dmid.mr.MetricNameRaw); err != nil { + if firstError == nil { + firstError = fmt.Errorf("cannot unmarshal MetricNameRaw %q: %w", dmid.mr.MetricNameRaw, err) + } + continue + } + mn.sortTags() + if err := is.storeDateMetricID(date, metricID, mn); err != nil { if firstError == nil { firstError = fmt.Errorf("error when storing (date=%d, metricID=%d) in database: %w", date, metricID, err) } @@ -1826,6 +1842,7 @@ func (s *Storage) updatePerDateData(rows []rawRow) error { metricID: metricID, }) } + PutMetricName(mn) // The (date, metricID) entries must be added to cache only after they have been successfully added to indexDB. s.dateMetricIDCache.Store(dateMetricIDsForCache) return firstError