lib/storage: properly check for minMissingTimestamps

After changes at commit 787b9cd. Minimal timestamps for extDB check was performed without context of the index search prefix.
It worked fine for Single node version, but for cluster version a different prefix was used for
metricID search requests. It may lead to incomplete results, if minimal missing timestamp was cached
for the tenant with different ingestion patterns.

 Minimal reproducible case is:
- metrics were ingested for tenants 0 and 1
- at some point in time metrics ingestion for tenant 1 stopped
- index records have the following timestamps layout:
 tenant 0:  1,2,3,4,5,6
 tenant 1:  1,2,3,4
- after indexDB rotation, containsTimeRange lookups may produce
  incorrect results:
 time range request for tenant 1 - 5:6 caches 5 as min timestamp
 request for the same or smaller time range for tenant 0 now returns
empty results.

Second case:
- requests for the tenant without metrics always updates atomic value with incorrect minimal time range for other tenants.

 This commit replaces single atomic with map of search prefix keys. It should have slight performance overhead,
but work consistently for cluster version. minMissingTimestamp is cached by prefix search key, which included tenantID.

 Since it will be only populated at runtime, it doesn't hold unused tenants for queries.

Related issue:
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7417
This commit is contained in:
Nikolay 2024-11-15 16:18:32 +01:00 committed by f41gh7
parent 479ae93e04
commit 1985110de2
No known key found for this signature in database
GPG key ID: 4558311CF775EC72
3 changed files with 160 additions and 19 deletions

View file

@ -24,10 +24,9 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl/): drop rows that do not belong to the current series during import. The dropped rows should belong to another series whose tags are a superset of the current series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7301) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7330). Thanks to @dpedu for reporting and cooperating with the test. * BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl/): drop rows that do not belong to the current series during import. The dropped rows should belong to another series whose tags are a superset of the current series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7301) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7330). Thanks to @dpedu for reporting and cooperating with the test.
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/), `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): keep the order of resulting time series when `limit_offset` is applied. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7068). * BUGFIX: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/), `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): keep the order of resulting time series when `limit_offset` is applied. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7068).
* BUGFIX: [graphite](https://docs.victoriametrics.com/#graphite-render-api-usage): properly handle xFilesFactor=0 for `transformRemoveEmptySeries` function. See [this PR](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7337) for details. * BUGFIX: [graphite](https://docs.victoriametrics.com/#graphite-render-api-usage): properly handle xFilesFactor=0 for `transformRemoveEmptySeries` function. See [this PR](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/7337) for details.
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth/): properly check availability of all the backends before giving up when proxying requests. Previously, vmauth could return an error even if there were healthy backends available. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3061) for details. * BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth): properly check availability of all the backends before giving up when proxying requests. Previously, vmauth could return an error even if there were healthy backends available. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3061) for details.
* BUGFIX: [vmauth](https://docs.victoriametrics.com/vmauth/): properly inherit [`drop_src_path_prefix_parts`](https://docs.victoriametrics.com/vmauth/#dropping-request-path-prefix), [`load_balancing_policy`](https://docs.victoriametrics.com/vmauth/#high-availability), [`retry_status_codes`](https://docs.victoriametrics.com/vmauth/#load-balancing) and [`discover_backend_ips`](https://docs.victoriametrics.com/vmauth/#discovering-backend-ips) options by `url_map` entries if `url_prefix` option isn't set at the [user config level](https://docs.victoriametrics.com/vmauth/#auth-config). These options were inherited only when the `url_prefix` option was set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7519). * BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly return query results for search requests after index rotation. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7417) for details.
* BUGFIX: [dashboards](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards): add `file` label filter to vmalert dashboard panels. Previously, metrics from groups with the same name but different rule files could be mixed in the results. * BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly handle [multitenant](https://docs.victoriametrics.com/cluster-victoriametrics/#multitenancy-via-labels) query request errors and correctly perform search for available tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7549) for details.
BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): Properly handle [multitenant](https://docs.victoriametrics.com/cluster-victoriametrics/#multitenancy-via-labels) query request errors and correctly perform search for available tenants. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7549) for details.
## [v1.106.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.106.0) ## [v1.106.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.106.0)

View file

@ -91,13 +91,17 @@ type indexDB struct {
// The db must be automatically recovered after that. // The db must be automatically recovered after that.
missingMetricNamesForMetricID atomic.Uint64 missingMetricNamesForMetricID atomic.Uint64
// minMissingTimestamp is the minimum timestamp, which is missing in the given indexDB. // minMissingTimestampByKey holds the minimum timestamps by index search key,
// which is missing in the given indexDB.
// Key must be formed with marshalCommonPrefix function.
// //
// This field is used at containsTimeRange() function only for the previous indexDB, // This field is used at containsTimeRange() function only for the previous indexDB,
// since this indexDB is readonly. // since this indexDB is readonly.
// This field cannot be used for the current indexDB, since it may receive data // This field cannot be used for the current indexDB, since it may receive data
// with bigger timestamps at any time. // with bigger timestamps at any time.
minMissingTimestamp atomic.Int64 minMissingTimestampByKey map[string]int64
// protects minMissingTimestampByKey
minMissingTimestampByKeyLock sync.RWMutex
// generation identifies the index generation ID // generation identifies the index generation ID
// and is used for syncing items from different indexDBs // and is used for syncing items from different indexDBs
@ -162,6 +166,7 @@ func mustOpenIndexDB(path string, s *Storage, isReadOnly *atomic.Bool) *indexDB
tb: tb, tb: tb,
name: name, name: name,
minMissingTimestampByKey: make(map[string]int64),
tagFiltersToMetricIDsCache: workingsetcache.New(tagFiltersCacheSize), tagFiltersToMetricIDsCache: workingsetcache.New(tagFiltersCacheSize),
s: s, s: s,
loopsPerDateTagFilterCache: workingsetcache.New(mem / 128), loopsPerDateTagFilterCache: workingsetcache.New(mem / 128),
@ -1945,25 +1950,36 @@ func (is *indexSearch) containsTimeRange(tr TimeRange) bool {
// This means that it may contain data for the given tr with probability close to 100%. // This means that it may contain data for the given tr with probability close to 100%.
return true return true
} }
// The db corresponds to the previous indexDB, which is readonly. // The db corresponds to the previous indexDB, which is readonly.
// So it is safe caching the minimum timestamp, which isn't covered by the db. // So it is safe caching the minimum timestamp, which isn't covered by the db.
minMissingTimestamp := db.minMissingTimestamp.Load()
if minMissingTimestamp != 0 && tr.MinTimestamp >= minMissingTimestamp { // use common prefix as a key for minMissingTimestamp
// it's needed to properly track timestamps for cluster version
// which uses tenant labels for the index search
kb := &is.kb
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID)
key := kb.B
db.minMissingTimestampByKeyLock.RLock()
minMissingTimestamp, ok := db.minMissingTimestampByKey[string(key)]
db.minMissingTimestampByKeyLock.RUnlock()
if ok && tr.MinTimestamp >= minMissingTimestamp {
return false return false
} }
if is.containsTimeRangeSlowForPrefixBuf(kb, tr) {
if is.containsTimeRangeSlow(tr) {
return true return true
} }
db.minMissingTimestamp.CompareAndSwap(minMissingTimestamp, tr.MinTimestamp) db.minMissingTimestampByKeyLock.Lock()
db.minMissingTimestampByKey[string(key)] = tr.MinTimestamp
db.minMissingTimestampByKeyLock.Unlock()
return false return false
} }
func (is *indexSearch) containsTimeRangeSlow(tr TimeRange) bool { func (is *indexSearch) containsTimeRangeSlowForPrefixBuf(prefixBuf *bytesutil.ByteBuffer, tr TimeRange) bool {
ts := &is.ts ts := &is.ts
kb := &is.kb
// Verify whether the tr.MinTimestamp is included into `ts` or is smaller than the minimum date stored in `ts`. // Verify whether the tr.MinTimestamp is included into `ts` or is smaller than the minimum date stored in `ts`.
// Do not check whether tr.MaxTimestamp is included into `ts` or is bigger than the max date stored in `ts` for performance reasons. // Do not check whether tr.MaxTimestamp is included into `ts` or is bigger than the max date stored in `ts` for performance reasons.
@ -1972,13 +1988,12 @@ func (is *indexSearch) containsTimeRangeSlow(tr TimeRange) bool {
// The main practical case allows skipping searching in prev indexdb (`ts`) when `tr` // The main practical case allows skipping searching in prev indexdb (`ts`) when `tr`
// is located above the max date stored there. // is located above the max date stored there.
minDate := uint64(tr.MinTimestamp) / msecPerDay minDate := uint64(tr.MinTimestamp) / msecPerDay
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID) prefix := prefixBuf.B
prefix := kb.B prefixBuf.B = encoding.MarshalUint64(prefixBuf.B, minDate)
kb.B = encoding.MarshalUint64(kb.B, minDate) ts.Seek(prefixBuf.B)
ts.Seek(kb.B)
if !ts.NextItem() { if !ts.NextItem() {
if err := ts.Error(); err != nil { if err := ts.Error(); err != nil {
logger.Panicf("FATAL: error when searching for minDate=%d, prefix %q: %w", minDate, kb.B, err) logger.Panicf("FATAL: error when searching for minDate=%d, prefix %q: %w", minDate, prefixBuf.B, err)
} }
return false return false
} }

View file

@ -2101,3 +2101,130 @@ func stopTestStorage(s *Storage) {
s.tsidCache.Stop() s.tsidCache.Stop()
fs.MustRemoveDirAtomic(s.cachePath) fs.MustRemoveDirAtomic(s.cachePath)
} }
func TestSearchContainsTimeRange(t *testing.T) {
path := t.Name()
os.RemoveAll(path)
s := MustOpenStorage(path, retentionMax, 0, 0)
db := s.idb()
is := db.getIndexSearch(noDeadline)
// Create a bunch of per-day time series
const (
days = 6
tenant2IngestionDay = 8
metricsPerDay = 1000
)
rotationDay := time.Date(2019, time.October, 15, 5, 1, 0, 0, time.UTC)
rotationMillis := uint64(rotationDay.UnixMilli())
rotationDate := rotationMillis / msecPerDay
var metricNameBuf []byte
perDayMetricIDs := make(map[uint64]*uint64set.Set)
labelNames := []string{
"__name__", "constant", "day", "UniqueId", "some_unique_id",
}
sort.Strings(labelNames)
newMN := func(name string, day, metric int) MetricName {
var mn MetricName
mn.MetricGroup = []byte(name)
mn.AddTag(
"constant",
"const",
)
mn.AddTag(
"day",
fmt.Sprintf("%v", day),
)
mn.AddTag(
"UniqueId",
fmt.Sprintf("%v", metric),
)
mn.AddTag(
"some_unique_id",
fmt.Sprintf("%v", day),
)
mn.sortTags()
return mn
}
// ingest metrics for tenant 0:0
for day := 0; day < days; day++ {
date := rotationDate - uint64(day)
var metricIDs uint64set.Set
for metric := range metricsPerDay {
mn := newMN("testMetric", day, metric)
metricNameBuf = mn.Marshal(metricNameBuf[:0])
var genTSID generationTSID
if !is.getTSIDByMetricName(&genTSID, metricNameBuf, date) {
generateTSID(&genTSID.TSID, &mn)
createAllIndexesForMetricName(is, &mn, &genTSID.TSID, date)
}
metricIDs.Add(genTSID.TSID.MetricID)
}
perDayMetricIDs[date] = &metricIDs
}
db.putIndexSearch(is)
// Flush index to disk, so it becomes visible for search
s.DebugFlush()
is2 := db.getIndexSearch(noDeadline)
// Check that all the metrics are found for all the days.
for date := rotationDate - days + 1; date <= rotationDate; date++ {
metricIDs, err := is2.getMetricIDsForDate(date, metricsPerDay)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if !perDayMetricIDs[date].Equal(metricIDs) {
t.Fatalf("unexpected metricIDs found;\ngot\n%d\nwant\n%d", metricIDs.AppendTo(nil), perDayMetricIDs[date].AppendTo(nil))
}
}
db.putIndexSearch(is2)
// rotate indexdb
s.mustRotateIndexDB(rotationDay)
db = s.idb()
// perform search for 0:0 tenant
// results of previous search requests shouldn't affect it
isExt := db.extDB.getIndexSearch(noDeadline)
// search for range that covers prev indexDB for dates before ingestion
tr := TimeRange{
MinTimestamp: int64(rotationMillis - msecPerDay*(days)),
MaxTimestamp: int64(rotationMillis),
}
if !isExt.containsTimeRange(tr) {
t.Fatalf("expected to have given time range at prev IndexDB")
}
// search for range not exist at prev indexDB
tr = TimeRange{
MinTimestamp: int64(rotationMillis + msecPerDay*(days+4)),
MaxTimestamp: int64(rotationMillis + msecPerDay*(days+2)),
}
if isExt.containsTimeRange(tr) {
t.Fatalf("not expected to have given time range at prev IndexDB")
}
key := isExt.marshalCommonPrefix(nil, nsPrefixDateToMetricID)
db.extDB.minMissingTimestampByKeyLock.Lock()
minMissingTimetamp := db.extDB.minMissingTimestampByKey[string(key)]
db.extDB.minMissingTimestampByKeyLock.Unlock()
if minMissingTimetamp != tr.MinTimestamp {
t.Fatalf("unexpected minMissingTimestamp for 0:0 tenant got %d, want %d", minMissingTimetamp, tr.MinTimestamp)
}
db.extDB.putIndexSearch(isExt)
s.MustClose()
fs.MustRemoveAll(path)
}