mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-10 15:14:09 +00:00
li/storage: re-use the per-day inverted index search code for searching in global index
This allows removing a big pile of outdated code for global index search. This may help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1486
This commit is contained in:
parent
cbb81c2ce9
commit
c473d8ffe1
4 changed files with 38 additions and 513 deletions
|
@ -328,12 +328,6 @@ func registerStorageMetrics(strg *storage.Storage) {
|
|||
metrics.NewGauge(`vm_missing_tsids_for_metric_id_total`, func() float64 {
|
||||
return float64(idbm().MissingTSIDsForMetricID)
|
||||
})
|
||||
metrics.NewGauge(`vm_date_metric_ids_search_calls_total`, func() float64 {
|
||||
return float64(idbm().DateMetricIDsSearchCalls)
|
||||
})
|
||||
metrics.NewGauge(`vm_date_metric_ids_search_hits_total`, func() float64 {
|
||||
return float64(idbm().DateMetricIDsSearchHits)
|
||||
})
|
||||
metrics.NewGauge(`vm_index_blocks_with_metric_ids_processed_total`, func() float64 {
|
||||
return float64(idbm().IndexBlocksWithMetricIDsProcessed)
|
||||
})
|
||||
|
@ -489,6 +483,9 @@ func registerStorageMetrics(strg *storage.Storage) {
|
|||
metrics.NewGauge(`vm_date_range_hits_total`, func() float64 {
|
||||
return float64(idbm().DateRangeSearchHits)
|
||||
})
|
||||
metrics.NewGauge(`vm_global_search_calls_total`, func() float64 {
|
||||
return float64(idbm().GlobalSearchCalls)
|
||||
})
|
||||
|
||||
metrics.NewGauge(`vm_missing_metric_names_for_metric_id_total`, func() float64 {
|
||||
return float64(idbm().MissingMetricNamesForMetricID)
|
||||
|
@ -534,9 +531,6 @@ func registerStorageMetrics(strg *storage.Storage) {
|
|||
metrics.NewGauge(`vm_cache_entries{type="indexdb/tagFilters"}`, func() float64 {
|
||||
return float64(idbm().TagFiltersCacheSize)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_entries{type="indexdb/uselessTagFilters"}`, func() float64 {
|
||||
return float64(idbm().UselessTagFiltersCacheSize)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_entries{type="storage/regexps"}`, func() float64 {
|
||||
return float64(storage.RegexpCacheSize())
|
||||
})
|
||||
|
@ -577,9 +571,6 @@ func registerStorageMetrics(strg *storage.Storage) {
|
|||
metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/tagFilters"}`, func() float64 {
|
||||
return float64(idbm().TagFiltersCacheSizeBytes)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_size_bytes{type="indexdb/uselessTagFilters"}`, func() float64 {
|
||||
return float64(idbm().UselessTagFiltersCacheSizeBytes)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_size_bytes{type="storage/prefetchedMetricIDs"}`, func() float64 {
|
||||
return float64(m().PrefetchedMetricIDsSizeBytes)
|
||||
})
|
||||
|
@ -608,9 +599,6 @@ func registerStorageMetrics(strg *storage.Storage) {
|
|||
metrics.NewGauge(`vm_cache_requests_total{type="indexdb/tagFilters"}`, func() float64 {
|
||||
return float64(idbm().TagFiltersCacheRequests)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_requests_total{type="indexdb/uselessTagFilters"}`, func() float64 {
|
||||
return float64(idbm().UselessTagFiltersCacheRequests)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_requests_total{type="storage/regexps"}`, func() float64 {
|
||||
return float64(storage.RegexpCacheRequests())
|
||||
})
|
||||
|
@ -639,9 +627,6 @@ func registerStorageMetrics(strg *storage.Storage) {
|
|||
metrics.NewGauge(`vm_cache_misses_total{type="indexdb/tagFilters"}`, func() float64 {
|
||||
return float64(idbm().TagFiltersCacheMisses)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_misses_total{type="indexdb/uselessTagFilters"}`, func() float64 {
|
||||
return float64(idbm().UselessTagFiltersCacheMisses)
|
||||
})
|
||||
metrics.NewGauge(`vm_cache_misses_total{type="storage/regexps"}`, func() float64 {
|
||||
return float64(storage.RegexpCacheMisses())
|
||||
})
|
||||
|
|
|
@ -63,18 +63,15 @@ type indexDB struct {
|
|||
// High rate for this value means corrupted indexDB.
|
||||
missingTSIDsForMetricID uint64
|
||||
|
||||
// The number of searches for metric ids by days.
|
||||
dateMetricIDsSearchCalls uint64
|
||||
|
||||
// The number of successful searches for metric ids by days.
|
||||
dateMetricIDsSearchHits uint64
|
||||
|
||||
// The number of calls for date range searches.
|
||||
dateRangeSearchCalls uint64
|
||||
|
||||
// The number of hits for date range searches.
|
||||
dateRangeSearchHits uint64
|
||||
|
||||
// The number of calls for global search.
|
||||
globalSearchCalls uint64
|
||||
|
||||
// missingMetricNamesForMetricID is a counter of missing MetricID -> MetricName entries.
|
||||
// High rate may mean corrupted indexDB due to unclean shutdown.
|
||||
// The db must be automatically recovered after that.
|
||||
|
@ -94,10 +91,6 @@ type indexDB struct {
|
|||
// The parent storage.
|
||||
s *Storage
|
||||
|
||||
// Cache for useless TagFilters entries, which have no tag filters
|
||||
// matching low number of metrics.
|
||||
uselessTagFiltersCache *workingsetcache.Cache
|
||||
|
||||
// Cache for (date, tagFilter) -> loopsCount, which is used for reducing
|
||||
// the amount of work when matching a set of filters.
|
||||
loopsPerDateTagFilterCache *workingsetcache.Cache
|
||||
|
@ -128,7 +121,6 @@ func openIndexDB(path string, s *Storage) (*indexDB, error) {
|
|||
|
||||
tagFiltersCache: workingsetcache.New(mem/32, time.Hour),
|
||||
s: s,
|
||||
uselessTagFiltersCache: workingsetcache.New(mem/128, time.Hour),
|
||||
loopsPerDateTagFilterCache: workingsetcache.New(mem/128, time.Hour),
|
||||
}
|
||||
return db, nil
|
||||
|
@ -143,11 +135,6 @@ type IndexDBMetrics struct {
|
|||
TagFiltersCacheRequests uint64
|
||||
TagFiltersCacheMisses uint64
|
||||
|
||||
UselessTagFiltersCacheSize uint64
|
||||
UselessTagFiltersCacheSizeBytes uint64
|
||||
UselessTagFiltersCacheRequests uint64
|
||||
UselessTagFiltersCacheMisses uint64
|
||||
|
||||
DeletedMetricsCount uint64
|
||||
|
||||
IndexDBRefCount uint64
|
||||
|
@ -157,11 +144,10 @@ type IndexDBMetrics struct {
|
|||
|
||||
RecentHourMetricIDsSearchCalls uint64
|
||||
RecentHourMetricIDsSearchHits uint64
|
||||
DateMetricIDsSearchCalls uint64
|
||||
DateMetricIDsSearchHits uint64
|
||||
|
||||
DateRangeSearchCalls uint64
|
||||
DateRangeSearchHits uint64
|
||||
GlobalSearchCalls uint64
|
||||
|
||||
MissingMetricNamesForMetricID uint64
|
||||
|
||||
|
@ -190,23 +176,15 @@ func (db *indexDB) UpdateMetrics(m *IndexDBMetrics) {
|
|||
m.TagFiltersCacheRequests += cs.GetCalls
|
||||
m.TagFiltersCacheMisses += cs.Misses
|
||||
|
||||
cs.Reset()
|
||||
db.uselessTagFiltersCache.UpdateStats(&cs)
|
||||
m.UselessTagFiltersCacheSize += cs.EntriesCount
|
||||
m.UselessTagFiltersCacheSizeBytes += cs.BytesSize
|
||||
m.UselessTagFiltersCacheRequests += cs.GetCalls
|
||||
m.UselessTagFiltersCacheMisses += cs.Misses
|
||||
|
||||
m.DeletedMetricsCount += uint64(db.s.getDeletedMetricIDs().Len())
|
||||
|
||||
m.IndexDBRefCount += atomic.LoadUint64(&db.refCount)
|
||||
m.NewTimeseriesCreated += atomic.LoadUint64(&db.newTimeseriesCreated)
|
||||
m.MissingTSIDsForMetricID += atomic.LoadUint64(&db.missingTSIDsForMetricID)
|
||||
m.DateMetricIDsSearchCalls += atomic.LoadUint64(&db.dateMetricIDsSearchCalls)
|
||||
m.DateMetricIDsSearchHits += atomic.LoadUint64(&db.dateMetricIDsSearchHits)
|
||||
|
||||
m.DateRangeSearchCalls += atomic.LoadUint64(&db.dateRangeSearchCalls)
|
||||
m.DateRangeSearchHits += atomic.LoadUint64(&db.dateRangeSearchHits)
|
||||
m.GlobalSearchCalls += atomic.LoadUint64(&db.globalSearchCalls)
|
||||
|
||||
m.MissingMetricNamesForMetricID += atomic.LoadUint64(&db.missingMetricNamesForMetricID)
|
||||
|
||||
|
@ -277,12 +255,10 @@ func (db *indexDB) decRef() {
|
|||
|
||||
// Free space occupied by caches owned by db.
|
||||
db.tagFiltersCache.Stop()
|
||||
db.uselessTagFiltersCache.Stop()
|
||||
db.loopsPerDateTagFilterCache.Stop()
|
||||
|
||||
db.tagFiltersCache = nil
|
||||
db.s = nil
|
||||
db.uselessTagFiltersCache = nil
|
||||
db.loopsPerDateTagFilterCache = nil
|
||||
|
||||
if atomic.LoadUint64(&db.mustDrop) == 0 {
|
||||
|
@ -1623,9 +1599,6 @@ func (db *indexDB) deleteMetricIDs(metricIDs []uint64) error {
|
|||
// Reset MetricName -> TSID cache, since it may contain deleted TSIDs.
|
||||
db.s.resetAndSaveTSIDCache()
|
||||
|
||||
// Do not reset uselessTagFiltersCache, since the found metricIDs
|
||||
// on cache miss are filtered out later with deletedMetricIDs.
|
||||
|
||||
// Store the metricIDs as deleted.
|
||||
// Make this after updating the deletedMetricIDs and resetting caches
|
||||
// in order to exclude the possibility of the inconsistent state when the deleted metricIDs
|
||||
|
@ -1991,167 +1964,6 @@ func (is *indexSearch) updateMetricIDsByMetricNameMatch(metricIDs, srcMetricIDs
|
|||
return nil
|
||||
}
|
||||
|
||||
func (is *indexSearch) getTagFilterWithMinMetricIDsCountOptimized(tfs *TagFilters, tr TimeRange, maxMetrics int) (*tagFilter, *uint64set.Set, error) {
|
||||
minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountAdaptive(tfs, maxMetrics)
|
||||
if err == nil {
|
||||
return minTf, minMetricIDs, nil
|
||||
}
|
||||
if err != errTooManyMetrics {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// All the tag filters match too many metrics.
|
||||
|
||||
// Slow path: try filtering the matching metrics by time range.
|
||||
// This should work well for cases when old metrics are constantly substituted
|
||||
// by big number of new metrics. For example, prometheus-operator creates many new
|
||||
// metrics for each new deployment.
|
||||
//
|
||||
// Allow fetching up to 20*maxMetrics metrics for the given time range
|
||||
// in the hope these metricIDs will be filtered out by other filters later.
|
||||
maxTimeRangeMetrics := 20 * maxMetrics
|
||||
metricIDsForTimeRange, err := is.getMetricIDsForTimeRange(tr, maxTimeRangeMetrics+1)
|
||||
if err == errMissingMetricIDsForDate {
|
||||
return nil, nil, fmt.Errorf("cannot find tag filter matching less than %d time series; "+
|
||||
"either increase -search.maxUniqueTimeseries or use more specific tag filters", maxMetrics)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if metricIDsForTimeRange.Len() <= maxTimeRangeMetrics {
|
||||
return nil, metricIDsForTimeRange, nil
|
||||
}
|
||||
return nil, nil, fmt.Errorf("more than %d time series found on the time range %s; either increase -search.maxUniqueTimeseries or shrink the time range",
|
||||
maxMetrics, tr.String())
|
||||
}
|
||||
|
||||
func (is *indexSearch) getTagFilterWithMinMetricIDsCountAdaptive(tfs *TagFilters, maxMetrics int) (*tagFilter, *uint64set.Set, error) {
|
||||
appendCacheKeyPrefix := func(dst []byte, prefix byte) []byte {
|
||||
dst = append(dst, prefix)
|
||||
dst = append(dst, is.db.name...)
|
||||
dst = encoding.MarshalUint64(dst, uint64(maxMetrics))
|
||||
return dst
|
||||
}
|
||||
kb := &is.kb
|
||||
kb.B = appendCacheKeyPrefix(kb.B[:0], uselessMultiTagFiltersKeyPrefix)
|
||||
kb.B = tfs.marshal(kb.B)
|
||||
if len(is.db.uselessTagFiltersCache.Get(nil, kb.B)) > 0 {
|
||||
// Skip useless work below, since the tfs doesn't contain tag filters matching less than maxMetrics metrics.
|
||||
return nil, nil, errTooManyMetrics
|
||||
}
|
||||
|
||||
// Iteratively increase maxAllowedMetrics up to maxMetrics in order to limit
|
||||
// the time required for founding the tag filter with minimum matching metrics.
|
||||
maxAllowedMetrics := 16
|
||||
if maxAllowedMetrics > maxMetrics {
|
||||
maxAllowedMetrics = maxMetrics
|
||||
}
|
||||
for {
|
||||
minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCount(tfs, maxAllowedMetrics)
|
||||
if err != errTooManyMetrics {
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if minMetricIDs.Len() < maxAllowedMetrics {
|
||||
// Found the tag filter with the minimum number of metrics.
|
||||
return minTf, minMetricIDs, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Too many metrics matched.
|
||||
if maxAllowedMetrics >= maxMetrics {
|
||||
// The tag filter with minimum matching metrics matches at least maxMetrics metrics.
|
||||
kb.B = appendCacheKeyPrefix(kb.B[:0], uselessMultiTagFiltersKeyPrefix)
|
||||
kb.B = tfs.marshal(kb.B)
|
||||
is.db.uselessTagFiltersCache.Set(kb.B, uselessTagFilterCacheValue)
|
||||
return nil, nil, errTooManyMetrics
|
||||
}
|
||||
|
||||
// Increase maxAllowedMetrics and try again.
|
||||
maxAllowedMetrics *= 4
|
||||
if maxAllowedMetrics > maxMetrics {
|
||||
maxAllowedMetrics = maxMetrics
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var errTooManyMetrics = errors.New("all the tag filters match too many metrics")
|
||||
|
||||
func (is *indexSearch) getTagFilterWithMinMetricIDsCount(tfs *TagFilters, maxMetrics int) (*tagFilter, *uint64set.Set, error) {
|
||||
appendCacheKeyPrefix := func(dst []byte, prefix byte) []byte {
|
||||
dst = append(dst, prefix)
|
||||
dst = append(dst, is.db.name...)
|
||||
dst = encoding.MarshalUint64(dst, uint64(maxMetrics))
|
||||
return dst
|
||||
}
|
||||
var minMetricIDs *uint64set.Set
|
||||
var minTf *tagFilter
|
||||
kb := &is.kb
|
||||
uselessTagFilters := 0
|
||||
for i := range tfs.tfs {
|
||||
tf := &tfs.tfs[i]
|
||||
if tf.isNegative {
|
||||
// Skip negative filters.
|
||||
continue
|
||||
}
|
||||
|
||||
kb.B = appendCacheKeyPrefix(kb.B[:0], uselessSingleTagFilterKeyPrefix)
|
||||
kb.B = tf.Marshal(kb.B, is.accountID, is.projectID)
|
||||
if len(is.db.uselessTagFiltersCache.Get(nil, kb.B)) > 0 {
|
||||
// Skip useless work below, since the tf matches at least maxMetrics metrics.
|
||||
uselessTagFilters++
|
||||
continue
|
||||
}
|
||||
|
||||
metricIDs, _, err := is.getMetricIDsForTagFilter(tf, maxMetrics, int64Max)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot find MetricIDs for tagFilter %s: %w", tf, err)
|
||||
}
|
||||
if metricIDs.Len() >= maxMetrics {
|
||||
// The tf matches at least maxMetrics. Skip it
|
||||
kb.B = appendCacheKeyPrefix(kb.B[:0], uselessSingleTagFilterKeyPrefix)
|
||||
kb.B = tf.Marshal(kb.B, is.accountID, is.projectID)
|
||||
is.db.uselessTagFiltersCache.Set(kb.B, uselessTagFilterCacheValue)
|
||||
uselessTagFilters++
|
||||
continue
|
||||
}
|
||||
|
||||
minMetricIDs = metricIDs
|
||||
minTf = tf
|
||||
maxMetrics = minMetricIDs.Len()
|
||||
if maxMetrics <= 1 {
|
||||
// There is no need in inspecting other filters, since minTf
|
||||
// already matches 0 or 1 metric.
|
||||
break
|
||||
}
|
||||
}
|
||||
if minTf != nil {
|
||||
return minTf, minMetricIDs, nil
|
||||
}
|
||||
if uselessTagFilters == len(tfs.tfs) {
|
||||
// All the tag filters return at least maxMetrics entries.
|
||||
return nil, nil, errTooManyMetrics
|
||||
}
|
||||
|
||||
// There is no positive filter with small number of matching metrics.
|
||||
// Create it, so it matches all the MetricIDs.
|
||||
kb.B = appendCacheKeyPrefix(kb.B[:0], uselessNegativeTagFilterKeyPrefix)
|
||||
kb.B = tfs.marshal(kb.B)
|
||||
if len(is.db.uselessTagFiltersCache.Get(nil, kb.B)) > 0 {
|
||||
return nil, nil, errTooManyMetrics
|
||||
}
|
||||
metricIDs := &uint64set.Set{}
|
||||
if err := is.updateMetricIDsAll(metricIDs, maxMetrics); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if metricIDs.Len() >= maxMetrics {
|
||||
kb.B = appendCacheKeyPrefix(kb.B[:0], uselessNegativeTagFilterKeyPrefix)
|
||||
kb.B = tfs.marshal(kb.B)
|
||||
is.db.uselessTagFiltersCache.Set(kb.B, uselessTagFilterCacheValue)
|
||||
}
|
||||
return nil, metricIDs, nil
|
||||
}
|
||||
|
||||
func removeCompositeTagFilters(tfs []*tagFilter, prefix []byte) []*tagFilter {
|
||||
if !hasCompositeTagFilters(tfs, prefix) {
|
||||
return tfs
|
||||
|
@ -2359,41 +2171,16 @@ func (is *indexSearch) updateMetricIDsForTagFilters(metricIDs *uint64set.Set, tf
|
|||
return err
|
||||
}
|
||||
|
||||
// Slow path - try searching over the whole inverted index.
|
||||
|
||||
// Sort tag filters for faster ts.Seek below.
|
||||
sort.Slice(tfs.tfs, func(i, j int) bool {
|
||||
return tfs.tfs[i].Less(&tfs.tfs[j])
|
||||
})
|
||||
minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountOptimized(tfs, tr, maxMetrics)
|
||||
// Slow path - fall back to search in the global inverted index.
|
||||
atomic.AddUint64(&is.db.globalSearchCalls, 1)
|
||||
m, err := is.getMetricIDsForDateAndFilters(0, tfs, maxMetrics)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Find intersection of minTf with other tfs.
|
||||
for i := range tfs.tfs {
|
||||
tf := &tfs.tfs[i]
|
||||
if tf == minTf {
|
||||
continue
|
||||
}
|
||||
mIDs, err := is.intersectMetricIDsWithTagFilter(tf, minMetricIDs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
minMetricIDs = mIDs
|
||||
}
|
||||
metricIDs.UnionMayOwn(minMetricIDs)
|
||||
metricIDs.UnionMayOwn(m)
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
uselessSingleTagFilterKeyPrefix = 0
|
||||
uselessMultiTagFiltersKeyPrefix = 1
|
||||
uselessNegativeTagFilterKeyPrefix = 2
|
||||
)
|
||||
|
||||
var uselessTagFilterCacheValue = []byte("1")
|
||||
|
||||
func (is *indexSearch) getMetricIDsForTagFilter(tf *tagFilter, maxMetrics int, maxLoopsCount int64) (*uint64set.Set, int64, error) {
|
||||
if tf.isNegative {
|
||||
logger.Panicf("BUG: isNegative must be false")
|
||||
|
@ -2401,7 +2188,7 @@ func (is *indexSearch) getMetricIDsForTagFilter(tf *tagFilter, maxMetrics int, m
|
|||
metricIDs := &uint64set.Set{}
|
||||
if len(tf.orSuffixes) > 0 {
|
||||
// Fast path for orSuffixes - seek for rows for each value from orSuffixes.
|
||||
loopsCount, err := is.updateMetricIDsForOrSuffixesNoFilter(tf, metricIDs, maxMetrics, maxLoopsCount)
|
||||
loopsCount, err := is.updateMetricIDsForOrSuffixes(tf, metricIDs, maxMetrics, maxLoopsCount)
|
||||
if err != nil {
|
||||
return nil, loopsCount, fmt.Errorf("error when searching for metricIDs for tagFilter in fast path: %w; tagFilter=%s", err, tf)
|
||||
}
|
||||
|
@ -2409,9 +2196,7 @@ func (is *indexSearch) getMetricIDsForTagFilter(tf *tagFilter, maxMetrics int, m
|
|||
}
|
||||
|
||||
// Slow path - scan for all the rows with the given prefix.
|
||||
// Pass nil filter to getMetricIDsForTagFilterSlow, since it works faster on production workloads
|
||||
// than non-nil filter with many entries.
|
||||
loopsCount, err := is.getMetricIDsForTagFilterSlow(tf, nil, metricIDs.Add, maxLoopsCount)
|
||||
loopsCount, err := is.getMetricIDsForTagFilterSlow(tf, metricIDs.Add, maxLoopsCount)
|
||||
if err != nil {
|
||||
return nil, loopsCount, fmt.Errorf("error when searching for metricIDs for tagFilter in slow path: %w; tagFilter=%s", err, tf)
|
||||
}
|
||||
|
@ -2420,7 +2205,7 @@ func (is *indexSearch) getMetricIDsForTagFilter(tf *tagFilter, maxMetrics int, m
|
|||
|
||||
var errTooManyLoops = fmt.Errorf("too many loops is needed for applying this filter")
|
||||
|
||||
func (is *indexSearch) getMetricIDsForTagFilterSlow(tf *tagFilter, filter *uint64set.Set, f func(metricID uint64), maxLoopsCount int64) (int64, error) {
|
||||
func (is *indexSearch) getMetricIDsForTagFilterSlow(tf *tagFilter, f func(metricID uint64), maxLoopsCount int64) (int64, error) {
|
||||
if len(tf.orSuffixes) > 0 {
|
||||
logger.Panicf("BUG: the getMetricIDsForTagFilterSlow must be called only for empty tf.orSuffixes; got %s", tf.orSuffixes)
|
||||
}
|
||||
|
@ -2467,18 +2252,10 @@ func (is *indexSearch) getMetricIDsForTagFilterSlow(tf *tagFilter, filter *uint6
|
|||
// There is no need in checking it again with potentially
|
||||
// slow tf.matchSuffix, which may call regexp.
|
||||
for _, metricID := range mp.MetricIDs {
|
||||
if filter != nil && !filter.Has(metricID) {
|
||||
continue
|
||||
}
|
||||
f(metricID)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if filter != nil && !mp.HasCommonMetricIDs(filter) {
|
||||
// Faster path: there is no need in calling tf.matchSuffix,
|
||||
// since the current row has no matching metricIDs.
|
||||
continue
|
||||
}
|
||||
// Slow path: need tf.matchSuffix call.
|
||||
ok, err := tf.matchSuffix(suffix)
|
||||
// Assume that tf.matchSuffix call needs 10x more time than a single metric scan iteration.
|
||||
|
@ -2510,9 +2287,6 @@ func (is *indexSearch) getMetricIDsForTagFilterSlow(tf *tagFilter, filter *uint6
|
|||
prevMatch = true
|
||||
prevMatchingSuffix = append(prevMatchingSuffix[:0], suffix...)
|
||||
for _, metricID := range mp.MetricIDs {
|
||||
if filter != nil && !filter.Has(metricID) {
|
||||
continue
|
||||
}
|
||||
f(metricID)
|
||||
}
|
||||
}
|
||||
|
@ -2522,7 +2296,7 @@ func (is *indexSearch) getMetricIDsForTagFilterSlow(tf *tagFilter, filter *uint6
|
|||
return loopsCount, nil
|
||||
}
|
||||
|
||||
func (is *indexSearch) updateMetricIDsForOrSuffixesNoFilter(tf *tagFilter, metricIDs *uint64set.Set, maxMetrics int, maxLoopsCount int64) (int64, error) {
|
||||
func (is *indexSearch) updateMetricIDsForOrSuffixes(tf *tagFilter, metricIDs *uint64set.Set, maxMetrics int, maxLoopsCount int64) (int64, error) {
|
||||
if tf.isNegative {
|
||||
logger.Panicf("BUG: isNegative must be false")
|
||||
}
|
||||
|
@ -2533,7 +2307,7 @@ func (is *indexSearch) updateMetricIDsForOrSuffixesNoFilter(tf *tagFilter, metri
|
|||
kb.B = append(kb.B[:0], tf.prefix...)
|
||||
kb.B = append(kb.B, orSuffix...)
|
||||
kb.B = append(kb.B, tagSeparatorChar)
|
||||
lc, err := is.updateMetricIDsForOrSuffixNoFilter(kb.B, metricIDs, maxMetrics, maxLoopsCount-loopsCount)
|
||||
lc, err := is.updateMetricIDsForOrSuffix(kb.B, metricIDs, maxMetrics, maxLoopsCount-loopsCount)
|
||||
loopsCount += lc
|
||||
if err != nil {
|
||||
return loopsCount, err
|
||||
|
@ -2545,25 +2319,7 @@ func (is *indexSearch) updateMetricIDsForOrSuffixesNoFilter(tf *tagFilter, metri
|
|||
return loopsCount, nil
|
||||
}
|
||||
|
||||
func (is *indexSearch) updateMetricIDsForOrSuffixesWithFilter(tf *tagFilter, metricIDs, filter *uint64set.Set, maxLoopsCount int64) (int64, error) {
|
||||
sortedFilter := filter.AppendTo(nil)
|
||||
kb := kbPool.Get()
|
||||
defer kbPool.Put(kb)
|
||||
var loopsCount int64
|
||||
for _, orSuffix := range tf.orSuffixes {
|
||||
kb.B = append(kb.B[:0], tf.prefix...)
|
||||
kb.B = append(kb.B, orSuffix...)
|
||||
kb.B = append(kb.B, tagSeparatorChar)
|
||||
lc, err := is.updateMetricIDsForOrSuffixWithFilter(kb.B, metricIDs, sortedFilter, tf.isNegative, maxLoopsCount-loopsCount)
|
||||
loopsCount += lc
|
||||
if err != nil {
|
||||
return loopsCount, err
|
||||
}
|
||||
}
|
||||
return loopsCount, nil
|
||||
}
|
||||
|
||||
func (is *indexSearch) updateMetricIDsForOrSuffixNoFilter(prefix []byte, metricIDs *uint64set.Set, maxMetrics int, maxLoopsCount int64) (int64, error) {
|
||||
func (is *indexSearch) updateMetricIDsForOrSuffix(prefix []byte, metricIDs *uint64set.Set, maxMetrics int, maxLoopsCount int64) (int64, error) {
|
||||
ts := &is.ts
|
||||
mp := &is.mp
|
||||
mp.Reset()
|
||||
|
@ -2597,156 +2353,8 @@ func (is *indexSearch) updateMetricIDsForOrSuffixNoFilter(prefix []byte, metricI
|
|||
return loopsCount, nil
|
||||
}
|
||||
|
||||
func (is *indexSearch) updateMetricIDsForOrSuffixWithFilter(prefix []byte, metricIDs *uint64set.Set, sortedFilter []uint64, isNegative bool, maxLoopsCount int64) (int64, error) {
|
||||
if len(sortedFilter) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
firstFilterMetricID := sortedFilter[0]
|
||||
lastFilterMetricID := sortedFilter[len(sortedFilter)-1]
|
||||
ts := &is.ts
|
||||
mp := &is.mp
|
||||
mp.Reset()
|
||||
var loopsCount int64
|
||||
loopsPaceLimiter := 0
|
||||
ts.Seek(prefix)
|
||||
var sf []uint64
|
||||
var metricID uint64
|
||||
for ts.NextItem() {
|
||||
if loopsPaceLimiter&paceLimiterMediumIterationsMask == 0 {
|
||||
if err := checkSearchDeadlineAndPace(is.deadline); err != nil {
|
||||
return loopsCount, err
|
||||
}
|
||||
}
|
||||
loopsPaceLimiter++
|
||||
item := ts.Item
|
||||
if !bytes.HasPrefix(item, prefix) {
|
||||
return loopsCount, nil
|
||||
}
|
||||
if err := mp.InitOnlyTail(item, item[len(prefix):]); err != nil {
|
||||
return loopsCount, err
|
||||
}
|
||||
loopsCount += int64(mp.MetricIDsLen())
|
||||
if loopsCount > maxLoopsCount {
|
||||
return loopsCount, errTooManyLoops
|
||||
}
|
||||
firstMetricID, lastMetricID := mp.FirstAndLastMetricIDs()
|
||||
if lastMetricID < firstFilterMetricID {
|
||||
// Skip the item, since it contains metricIDs lower
|
||||
// than metricIDs in sortedFilter.
|
||||
continue
|
||||
}
|
||||
if firstMetricID > lastFilterMetricID {
|
||||
// Stop searching, since the current item and all the subsequent items
|
||||
// contain metricIDs higher than metricIDs in sortedFilter.
|
||||
return loopsCount, nil
|
||||
}
|
||||
sf = sortedFilter
|
||||
mp.ParseMetricIDs()
|
||||
matchingMetricIDs := mp.MetricIDs[:0]
|
||||
for _, metricID = range mp.MetricIDs {
|
||||
if len(sf) == 0 {
|
||||
break
|
||||
}
|
||||
if metricID > sf[0] {
|
||||
n := binarySearchUint64(sf, metricID)
|
||||
sf = sf[n:]
|
||||
if len(sf) == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if metricID < sf[0] {
|
||||
continue
|
||||
}
|
||||
matchingMetricIDs = append(matchingMetricIDs, metricID)
|
||||
sf = sf[1:]
|
||||
}
|
||||
if len(matchingMetricIDs) > 0 {
|
||||
if isNegative {
|
||||
for _, metricID := range matchingMetricIDs {
|
||||
metricIDs.Del(metricID)
|
||||
}
|
||||
} else {
|
||||
metricIDs.AddMulti(matchingMetricIDs)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := ts.Error(); err != nil {
|
||||
return loopsCount, fmt.Errorf("error when searching for tag filter prefix %q: %w", prefix, err)
|
||||
}
|
||||
return loopsCount, nil
|
||||
}
|
||||
|
||||
func binarySearchUint64(a []uint64, v uint64) uint {
|
||||
// Copy-pasted sort.Search from https://golang.org/src/sort/search.go?s=2246:2286#L49
|
||||
i, j := uint(0), uint(len(a))
|
||||
for i < j {
|
||||
h := (i + j) >> 1
|
||||
if h < uint(len(a)) && a[h] < v {
|
||||
i = h + 1
|
||||
} else {
|
||||
j = h
|
||||
}
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
var errFallbackToGlobalSearch = errors.New("fall back from per-day index search to global index search")
|
||||
|
||||
var errMissingMetricIDsForDate = errors.New("missing metricIDs for date")
|
||||
|
||||
func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int) (*uint64set.Set, error) {
|
||||
atomic.AddUint64(&is.db.dateMetricIDsSearchCalls, 1)
|
||||
minDate := uint64(tr.MinTimestamp) / msecPerDay
|
||||
maxDate := uint64(tr.MaxTimestamp) / msecPerDay
|
||||
if minDate > maxDate || maxDate-minDate > maxDaysForPerDaySearch {
|
||||
// Too much dates must be covered. Give up.
|
||||
return nil, errMissingMetricIDsForDate
|
||||
}
|
||||
if minDate == maxDate {
|
||||
// Fast path - query on a single day.
|
||||
metricIDs, err := is.getMetricIDsForDate(minDate, maxMetrics)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
atomic.AddUint64(&is.db.dateMetricIDsSearchHits, 1)
|
||||
return metricIDs, nil
|
||||
}
|
||||
|
||||
// Slower path - query over multiple days in parallel.
|
||||
metricIDs := &uint64set.Set{}
|
||||
var wg sync.WaitGroup
|
||||
var errGlobal error
|
||||
var mu sync.Mutex // protects metricIDs + errGlobal from concurrent access below.
|
||||
for minDate <= maxDate {
|
||||
wg.Add(1)
|
||||
go func(date uint64) {
|
||||
defer wg.Done()
|
||||
isLocal := is.db.getIndexSearch(is.accountID, is.projectID, is.deadline)
|
||||
m, err := isLocal.getMetricIDsForDate(date, maxMetrics)
|
||||
is.db.putIndexSearch(isLocal)
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if errGlobal != nil {
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
errGlobal = err
|
||||
return
|
||||
}
|
||||
if metricIDs.Len() < maxMetrics {
|
||||
metricIDs.UnionMayOwn(m)
|
||||
}
|
||||
}(minDate)
|
||||
minDate++
|
||||
}
|
||||
wg.Wait()
|
||||
if errGlobal != nil {
|
||||
return nil, errGlobal
|
||||
}
|
||||
atomic.AddUint64(&is.db.dateMetricIDsSearchHits, 1)
|
||||
return metricIDs, nil
|
||||
}
|
||||
|
||||
const maxDaysForPerDaySearch = 40
|
||||
|
||||
func (is *indexSearch) tryUpdatingMetricIDsForDateRange(metricIDs *uint64set.Set, tfs *TagFilters, tr TimeRange, maxMetrics int) error {
|
||||
|
@ -2900,10 +2508,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
|
|||
// so later they can be filtered out with negative filters.
|
||||
m, err := is.getMetricIDsForDate(date, maxDateMetrics)
|
||||
if err != nil {
|
||||
if err == errMissingMetricIDsForDate {
|
||||
// Zero time series were written on the given date.
|
||||
return nil, nil
|
||||
}
|
||||
return nil, fmt.Errorf("cannot obtain all the metricIDs: %w", err)
|
||||
}
|
||||
if m.Len() >= maxDateMetrics {
|
||||
|
@ -3125,13 +2729,18 @@ func (is *indexSearch) hasDateMetricID(date, metricID uint64) (bool, error) {
|
|||
}
|
||||
|
||||
func (is *indexSearch) getMetricIDsForDateTagFilter(tf *tagFilter, date uint64, commonPrefix []byte, maxMetrics int, maxLoopsCount int64) (*uint64set.Set, int64, error) {
|
||||
// Augument tag filter prefix for per-date search instead of global search.
|
||||
if !bytes.HasPrefix(tf.prefix, commonPrefix) {
|
||||
logger.Panicf("BUG: unexpected tf.prefix %q; must start with commonPrefix %q", tf.prefix, commonPrefix)
|
||||
}
|
||||
kb := kbPool.Get()
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
|
||||
kb.B = encoding.MarshalUint64(kb.B, date)
|
||||
if date != 0 {
|
||||
// Use per-date search.
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
|
||||
kb.B = encoding.MarshalUint64(kb.B, date)
|
||||
} else {
|
||||
// Use global search if date isn't set.
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
|
||||
}
|
||||
kb.B = append(kb.B, tf.prefix[len(commonPrefix):]...)
|
||||
tfNew := *tf
|
||||
tfNew.isNegative = false // isNegative for the original tf is handled by the caller.
|
||||
|
@ -3177,8 +2786,14 @@ func (is *indexSearch) getMetricIDsForDate(date uint64, maxMetrics int) (*uint64
|
|||
// Extract all the metricIDs from (date, __name__=value)->metricIDs entries.
|
||||
kb := kbPool.Get()
|
||||
defer kbPool.Put(kb)
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
|
||||
kb.B = encoding.MarshalUint64(kb.B, date)
|
||||
if date != 0 {
|
||||
// Use per-date search
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
|
||||
kb.B = encoding.MarshalUint64(kb.B, date)
|
||||
} else {
|
||||
// Use global search
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
|
||||
}
|
||||
kb.B = marshalTagValue(kb.B, nil)
|
||||
var metricIDs uint64set.Set
|
||||
if err := is.updateMetricIDsForPrefix(kb.B, &metricIDs, maxMetrics); err != nil {
|
||||
|
@ -3187,15 +2802,6 @@ func (is *indexSearch) getMetricIDsForDate(date uint64, maxMetrics int) (*uint64
|
|||
return &metricIDs, nil
|
||||
}
|
||||
|
||||
func (is *indexSearch) updateMetricIDsAll(metricIDs *uint64set.Set, maxMetrics int) error {
|
||||
kb := kbPool.Get()
|
||||
defer kbPool.Put(kb)
|
||||
// Extract all the metricIDs from (__name__=value)->metricIDs entries.
|
||||
kb.B = is.marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
|
||||
kb.B = marshalTagValue(kb.B, nil)
|
||||
return is.updateMetricIDsForPrefix(kb.B, metricIDs, maxMetrics)
|
||||
}
|
||||
|
||||
func (is *indexSearch) updateMetricIDsForPrefix(prefix []byte, metricIDs *uint64set.Set, maxMetrics int) error {
|
||||
ts := &is.ts
|
||||
mp := &is.mp
|
||||
|
@ -3236,38 +2842,6 @@ func (is *indexSearch) updateMetricIDsForPrefix(prefix []byte, metricIDs *uint64
|
|||
// The estimated number of index scan loops a single loop in updateMetricIDsByMetricNameMatch takes.
|
||||
const loopsCountPerMetricNameMatch = 150
|
||||
|
||||
func (is *indexSearch) intersectMetricIDsWithTagFilter(tf *tagFilter, filter *uint64set.Set) (*uint64set.Set, error) {
|
||||
if filter.Len() == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
metricIDs := filter
|
||||
if !tf.isNegative {
|
||||
metricIDs = &uint64set.Set{}
|
||||
}
|
||||
if len(tf.orSuffixes) > 0 {
|
||||
// Fast path for orSuffixes - seek for rows for each value from orSuffixes.
|
||||
_, err := is.updateMetricIDsForOrSuffixesWithFilter(tf, metricIDs, filter, int64Max)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when intersecting metricIDs for tagFilter in fast path: %w; tagFilter=%s", err, tf)
|
||||
}
|
||||
return metricIDs, nil
|
||||
}
|
||||
|
||||
// Slow path - scan for all the rows with the given prefix.
|
||||
_, err := is.getMetricIDsForTagFilterSlow(tf, filter, func(metricID uint64) {
|
||||
if tf.isNegative {
|
||||
// filter must be equal to metricIDs
|
||||
metricIDs.Del(metricID)
|
||||
} else {
|
||||
metricIDs.Add(metricID)
|
||||
}
|
||||
}, int64Max)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when intersecting metricIDs for tagFilter in slow path: %w; tagFilter=%s", err, tf)
|
||||
}
|
||||
return metricIDs, nil
|
||||
}
|
||||
|
||||
var kbPool bytesutil.ByteBufferPool
|
||||
|
||||
// Returns local unique MetricID.
|
||||
|
@ -3405,21 +2979,6 @@ func (mp *tagToMetricIDsRowParser) EqualPrefix(x *tagToMetricIDsRowParser) bool
|
|||
return mp.ProjectID == x.ProjectID && mp.AccountID == x.AccountID && mp.Date == x.Date && mp.NSPrefix == x.NSPrefix
|
||||
}
|
||||
|
||||
// FirstAndLastMetricIDs returns the first and the last metricIDs in the mp.tail.
|
||||
func (mp *tagToMetricIDsRowParser) FirstAndLastMetricIDs() (uint64, uint64) {
|
||||
tail := mp.tail
|
||||
if len(tail) < 8 {
|
||||
logger.Panicf("BUG: cannot unmarshal metricID from %d bytes; need 8 bytes", len(tail))
|
||||
return 0, 0
|
||||
}
|
||||
firstMetricID := encoding.UnmarshalUint64(tail)
|
||||
lastMetricID := firstMetricID
|
||||
if len(tail) > 8 {
|
||||
lastMetricID = encoding.UnmarshalUint64(tail[len(tail)-8:])
|
||||
}
|
||||
return firstMetricID, lastMetricID
|
||||
}
|
||||
|
||||
// MetricIDsLen returns the number of MetricIDs in the mp.tail
|
||||
func (mp *tagToMetricIDsRowParser) MetricIDsLen() int {
|
||||
return len(mp.tail) / 8
|
||||
|
@ -3448,16 +3007,6 @@ func (mp *tagToMetricIDsRowParser) ParseMetricIDs() {
|
|||
}
|
||||
}
|
||||
|
||||
// HasCommonMetricIDs returns true if mp has at least one common metric id with filter.
|
||||
func (mp *tagToMetricIDsRowParser) HasCommonMetricIDs(filter *uint64set.Set) bool {
|
||||
for _, metricID := range mp.MetricIDs {
|
||||
if filter.Has(metricID) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsDeletedTag verifies whether the tag from mp is deleted according to dmis.
|
||||
//
|
||||
// dmis must contain deleted MetricIDs.
|
||||
|
|
|
@ -1626,12 +1626,12 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// Check that all the metrics are found in updateMetricIDsAll
|
||||
var metricIDs uint64set.Set
|
||||
if err := is2.updateMetricIDsAll(&metricIDs, metricsPerDay*days); err != nil {
|
||||
// Check that all the metrics are found in global index
|
||||
metricIDs, err := is2.getMetricIDsForDate(0, metricsPerDay*days)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if !allMetricIDs.Equal(&metricIDs) {
|
||||
if !allMetricIDs.Equal(metricIDs) {
|
||||
t.Fatalf("unexpected metricIDs found;\ngot\n%d\nwant\n%d", metricIDs.AppendTo(nil), allMetricIDs.AppendTo(nil))
|
||||
}
|
||||
|
||||
|
|
|
@ -219,15 +219,6 @@ func (tfs *TagFilters) Reset(accountID, projectID uint32) {
|
|||
tfs.commonPrefix = marshalCommonPrefix(tfs.commonPrefix[:0], nsPrefixTagToMetricIDs, accountID, projectID)
|
||||
}
|
||||
|
||||
func (tfs *TagFilters) marshal(dst []byte) []byte {
|
||||
dst = encoding.MarshalUint32(dst, tfs.accountID)
|
||||
dst = encoding.MarshalUint32(dst, tfs.projectID)
|
||||
for i := range tfs.tfs {
|
||||
dst = tfs.tfs[i].MarshalNoAccountIDProjectID(dst)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// tagFilter represents a filter used for filtering tags.
|
||||
type tagFilter struct {
|
||||
key []byte
|
||||
|
|
Loading…
Reference in a new issue