mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-21 14:44:00 +00:00
lib/storage: move deletedMetricIDs set from indexDB to Storage
This makes consitent the list of deleted metricIDs when it is used from both the current indexDB and the previous indexDB (aka extDB). This should fix the issue, which could lead to storing new samples under deleted metricIDs after indexDB rotation. See more details at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1347#issuecomment-861232136 . Thanks to @tangqipengleoo for the initial analysis and the pull request - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1383 . This commit resolves the issue in more generic way compared to https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1383 . The downside of the commit is the deletedMetricIDs set isn't cleaned from the metricIDs outside the retention. It needs app restart. This should be OK in most cases.
This commit is contained in:
parent
ebaf68bcb0
commit
b133de1e37
4 changed files with 58 additions and 52 deletions
|
@ -9,6 +9,8 @@ sort: 15
|
|||
* FEATURE: vmagent: add service discovery for DigitalOcean (aka [digitalocean_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#digitalocean_sd_config)). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1367).
|
||||
* FEATURE: vmagent: show the number of samples the target returned during the last scrape on `/targets` and `/api/v1/targets` pages. This should simplify debugging targets, which may return too big or too low number of samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1377).
|
||||
|
||||
* BUGFIX: prevent from adding new samples to deleted time series after the rotation of the inverted index (the rotation is performed once per `-retentionPeriod`). See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1347#issuecomment-861232136) for details.
|
||||
|
||||
|
||||
## [v1.61.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.61.1)
|
||||
|
||||
|
|
|
@ -103,15 +103,6 @@ type indexDB struct {
|
|||
loopsPerDateTagFilterCache *workingsetcache.Cache
|
||||
|
||||
indexSearchPool sync.Pool
|
||||
|
||||
// An inmemory set of deleted metricIDs.
|
||||
//
|
||||
// The set holds deleted metricIDs for the current db and for the extDB.
|
||||
//
|
||||
// It is safe to keep the set in memory even for big number of deleted
|
||||
// metricIDs, since it usually requires 1 bit per deleted metricID.
|
||||
deletedMetricIDs atomic.Value
|
||||
deletedMetricIDsUpdateLock sync.Mutex
|
||||
}
|
||||
|
||||
// openIndexDB opens index db from the given path with the given caches.
|
||||
|
@ -140,14 +131,6 @@ func openIndexDB(path string, s *Storage) (*indexDB, error) {
|
|||
uselessTagFiltersCache: workingsetcache.New(mem/128, time.Hour),
|
||||
loopsPerDateTagFilterCache: workingsetcache.New(mem/128, time.Hour),
|
||||
}
|
||||
|
||||
is := db.getIndexSearch(0, 0, noDeadline)
|
||||
dmis, err := is.loadDeletedMetricIDs()
|
||||
db.putIndexSearch(is)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load deleted metricIDs: %w", err)
|
||||
}
|
||||
db.setDeletedMetricIDs(dmis)
|
||||
return db, nil
|
||||
}
|
||||
|
||||
|
@ -214,7 +197,7 @@ func (db *indexDB) UpdateMetrics(m *IndexDBMetrics) {
|
|||
m.UselessTagFiltersCacheRequests += cs.GetCalls
|
||||
m.UselessTagFiltersCacheMisses += cs.Misses
|
||||
|
||||
m.DeletedMetricsCount += uint64(db.getDeletedMetricIDs().Len())
|
||||
m.DeletedMetricsCount += uint64(db.s.getDeletedMetricIDs().Len())
|
||||
|
||||
m.IndexDBRefCount += atomic.LoadUint64(&db.refCount)
|
||||
m.NewTimeseriesCreated += atomic.LoadUint64(&db.newTimeseriesCreated)
|
||||
|
@ -260,12 +243,6 @@ func (db *indexDB) doExtDB(f func(extDB *indexDB)) bool {
|
|||
//
|
||||
// It decrements refCount for the previous extDB.
|
||||
func (db *indexDB) SetExtDB(extDB *indexDB) {
|
||||
// Add deleted metricIDs from extDB to db.
|
||||
if extDB != nil {
|
||||
dmisExt := extDB.getDeletedMetricIDs()
|
||||
db.updateDeletedMetricIDs(dmisExt)
|
||||
}
|
||||
|
||||
db.extDBLock.Lock()
|
||||
prevExtDB := db.extDB
|
||||
db.extDB = extDB
|
||||
|
@ -759,7 +736,7 @@ func (is *indexSearch) searchTagKeysOnDate(tks map[string]struct{}, date uint64,
|
|||
kb := &is.kb
|
||||
mp := &is.mp
|
||||
mp.Reset()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
loopsPaceLimiter := 0
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
|
||||
kb.B = encoding.MarshalUint64(kb.B, date)
|
||||
|
@ -839,7 +816,7 @@ func (is *indexSearch) searchTagKeys(tks map[string]struct{}, maxTagKeys int) er
|
|||
kb := &is.kb
|
||||
mp := &is.mp
|
||||
mp.Reset()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
loopsPaceLimiter := 0
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
|
||||
prefix := kb.B
|
||||
|
@ -957,7 +934,7 @@ func (is *indexSearch) searchTagValuesOnDate(tvs map[string]struct{}, tagKey []b
|
|||
kb := &is.kb
|
||||
mp := &is.mp
|
||||
mp.Reset()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
loopsPaceLimiter := 0
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
|
||||
kb.B = encoding.MarshalUint64(kb.B, date)
|
||||
|
@ -1043,7 +1020,7 @@ func (is *indexSearch) searchTagValues(tvs map[string]struct{}, tagKey []byte, m
|
|||
kb := &is.kb
|
||||
mp := &is.mp
|
||||
mp.Reset()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
loopsPaceLimiter := 0
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
|
||||
kb.B = marshalTagValue(kb.B, tagKey)
|
||||
|
@ -1198,7 +1175,7 @@ func (is *indexSearch) searchTagValueSuffixesForPrefix(tvss map[string]struct{},
|
|||
ts := &is.ts
|
||||
mp := &is.mp
|
||||
mp.Reset()
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
loopsPaceLimiter := 0
|
||||
ts.Seek(prefix)
|
||||
for len(tvss) < maxTagValueSuffixes && ts.NextItem() {
|
||||
|
@ -1639,7 +1616,7 @@ func (db *indexDB) deleteMetricIDs(metricIDs []uint64) error {
|
|||
// atomically add deleted metricIDs to an inmemory map.
|
||||
dmis := &uint64set.Set{}
|
||||
dmis.AddMulti(metricIDs)
|
||||
db.updateDeletedMetricIDs(dmis)
|
||||
db.s.updateDeletedMetricIDs(dmis)
|
||||
|
||||
// Reset TagFilters -> TSIDS cache, since it may contain deleted TSIDs.
|
||||
invalidateTagCache()
|
||||
|
@ -1666,21 +1643,14 @@ func (db *indexDB) deleteMetricIDs(metricIDs []uint64) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (db *indexDB) getDeletedMetricIDs() *uint64set.Set {
|
||||
return db.deletedMetricIDs.Load().(*uint64set.Set)
|
||||
}
|
||||
|
||||
func (db *indexDB) setDeletedMetricIDs(dmis *uint64set.Set) {
|
||||
db.deletedMetricIDs.Store(dmis)
|
||||
}
|
||||
|
||||
func (db *indexDB) updateDeletedMetricIDs(metricIDs *uint64set.Set) {
|
||||
db.deletedMetricIDsUpdateLock.Lock()
|
||||
dmisOld := db.getDeletedMetricIDs()
|
||||
dmisNew := dmisOld.Clone()
|
||||
dmisNew.Union(metricIDs)
|
||||
db.setDeletedMetricIDs(dmisNew)
|
||||
db.deletedMetricIDsUpdateLock.Unlock()
|
||||
func (db *indexDB) loadDeletedMetricIDs() (*uint64set.Set, error) {
|
||||
is := db.getIndexSearch(0, 0, noDeadline)
|
||||
dmis, err := is.loadDeletedMetricIDs()
|
||||
db.putIndexSearch(is)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return dmis, nil
|
||||
}
|
||||
|
||||
func (is *indexSearch) loadDeletedMetricIDs() (*uint64set.Set, error) {
|
||||
|
@ -1776,7 +1746,7 @@ func (db *indexDB) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics int,
|
|||
var tagFiltersKeyBufPool bytesutil.ByteBufferPool
|
||||
|
||||
func (is *indexSearch) getTSIDByMetricName(dst *TSID, metricName []byte) error {
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
kb.B = append(kb.B[:0], nsPrefixMetricNameToTSID)
|
||||
|
@ -2340,7 +2310,7 @@ func (is *indexSearch) searchMetricIDs(tfss []*TagFilters, tr TimeRange, maxMetr
|
|||
sortedMetricIDs := metricIDs.AppendTo(nil)
|
||||
|
||||
// Filter out deleted metricIDs.
|
||||
dmis := is.db.getDeletedMetricIDs()
|
||||
dmis := is.db.s.getDeletedMetricIDs()
|
||||
if dmis.Len() > 0 {
|
||||
metricIDsFiltered := sortedMetricIDs[:0]
|
||||
for _, metricID := range sortedMetricIDs {
|
||||
|
|
|
@ -1791,13 +1791,15 @@ func toTFPointers(tfs []tagFilter) []*tagFilter {
|
|||
}
|
||||
|
||||
func newTestStorage() *Storage {
|
||||
return &Storage{
|
||||
s := &Storage{
|
||||
cachePath: "test-storage-cache",
|
||||
|
||||
metricIDCache: workingsetcache.New(1234, time.Hour),
|
||||
metricNameCache: workingsetcache.New(1234, time.Hour),
|
||||
tsidCache: workingsetcache.New(1234, time.Hour),
|
||||
}
|
||||
s.setDeletedMetricIDs(&uint64set.Set{})
|
||||
return s
|
||||
}
|
||||
|
||||
func stopTestStorage(s *Storage) {
|
||||
|
|
|
@ -120,6 +120,13 @@ type Storage struct {
|
|||
|
||||
// The minimum timestamp when composite index search can be used.
|
||||
minTimestampForCompositeIndex int64
|
||||
|
||||
// An inmemory set of deleted metricIDs.
|
||||
//
|
||||
// It is safe to keep the set in memory even for big number of deleted
|
||||
// metricIDs, since it usually requires 1 bit per deleted metricID.
|
||||
deletedMetricIDs atomic.Value
|
||||
deletedMetricIDsUpdateLock sync.Mutex
|
||||
}
|
||||
|
||||
type pendingHourMetricIDEntry struct {
|
||||
|
@ -218,6 +225,18 @@ func OpenStorage(path string, retentionMsecs int64, maxHourlySeries, maxDailySer
|
|||
idbCurr.SetExtDB(idbPrev)
|
||||
s.idbCurr.Store(idbCurr)
|
||||
|
||||
// Load deleted metricIDs from idbCurr and idbPrev
|
||||
dmisCurr, err := idbCurr.loadDeletedMetricIDs()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load deleted metricIDs for the current indexDB: %w", err)
|
||||
}
|
||||
dmisPrev, err := idbPrev.loadDeletedMetricIDs()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load deleted metricIDs for the previous indexDB: %w", err)
|
||||
}
|
||||
s.setDeletedMetricIDs(dmisCurr)
|
||||
s.updateDeletedMetricIDs(dmisPrev)
|
||||
|
||||
// Load data
|
||||
tablePath := path + "/data"
|
||||
tb, err := openTable(tablePath, s.getDeletedMetricIDs, retentionMsecs)
|
||||
|
@ -239,16 +258,29 @@ func (s *Storage) RetentionMsecs() int64 {
|
|||
return s.retentionMsecs
|
||||
}
|
||||
|
||||
func (s *Storage) getDeletedMetricIDs() *uint64set.Set {
|
||||
return s.deletedMetricIDs.Load().(*uint64set.Set)
|
||||
}
|
||||
|
||||
func (s *Storage) setDeletedMetricIDs(dmis *uint64set.Set) {
|
||||
s.deletedMetricIDs.Store(dmis)
|
||||
}
|
||||
|
||||
func (s *Storage) updateDeletedMetricIDs(metricIDs *uint64set.Set) {
|
||||
s.deletedMetricIDsUpdateLock.Lock()
|
||||
dmisOld := s.getDeletedMetricIDs()
|
||||
dmisNew := dmisOld.Clone()
|
||||
dmisNew.Union(metricIDs)
|
||||
s.setDeletedMetricIDs(dmisNew)
|
||||
s.deletedMetricIDsUpdateLock.Unlock()
|
||||
}
|
||||
|
||||
// DebugFlush flushes recently added storage data, so it becomes visible to search.
|
||||
func (s *Storage) DebugFlush() {
|
||||
s.tb.flushRawRows()
|
||||
s.idb().tb.DebugFlush()
|
||||
}
|
||||
|
||||
func (s *Storage) getDeletedMetricIDs() *uint64set.Set {
|
||||
return s.idb().getDeletedMetricIDs()
|
||||
}
|
||||
|
||||
// CreateSnapshot creates snapshot for s and returns the snapshot name.
|
||||
func (s *Storage) CreateSnapshot() (string, error) {
|
||||
logger.Infof("creating Storage snapshot for %q...", s.path)
|
||||
|
|
Loading…
Reference in a new issue