lib/storage: tune tag filters search logic

Tune the logic according to the logs provided at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1338#issuecomment-864293624

The previous logic had a race when multiple concurrent queries execute the same tag filter without prior stats.
This could result in incorrectly stored stats for such tag filter, which then could result in non-optimal sorting of tag filters
for further queries.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1338
This commit is contained in:
Aliaksandr Valialkin 2021-06-23 13:10:23 +03:00
parent e8a5bb92b7
commit c22114c6f0

View file

@ -2787,8 +2787,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
for i := range tfs.tfs {
tf := &tfs.tfs[i]
loopsCount, filterLoopsCount, timestamp := is.getLoopsCountAndTimestampForDateFilter(date, tf)
origLoopsCount := loopsCount
origFilterLoopsCount := filterLoopsCount
if currentTime > timestamp+3600 {
// Update stats once per hour for relatively fast tag filters.
// There is no need in spending CPU resources on updating stats for heavy tag filters.
@ -2799,17 +2797,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
filterLoopsCount = 0
}
}
if loopsCount == 0 {
// Prevent from possible thundering herd issue when potentially heavy tf is executed from multiple concurrent queries
// by temporary persisting its position in the tag filters list.
if origLoopsCount == 0 {
origLoopsCount = 9e6
}
if origFilterLoopsCount == 0 {
origFilterLoopsCount = 9e6
}
is.storeLoopsCountForDateFilter(date, tf, origLoopsCount, origFilterLoopsCount)
}
tfws[i] = tagFilterWithWeight{
tf: tf,
loopsCount: loopsCount,
@ -2837,13 +2824,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
is.storeLoopsCountForDateFilter(date, tfw.tf, tfw.loopsCount, tfw.filterLoopsCount)
}
}
storeZeroLoopsCounts := func(tfws []tagFilterWithWeight) {
for _, tfw := range tfws {
if tfw.loopsCount == 0 || tfw.filterLoopsCount == 0 {
is.storeLoopsCountForDateFilter(date, tfw.tf, tfw.loopsCount, tfw.filterLoopsCount)
}
}
}
// Populate metricIDs for the first non-negative filter with the cost smaller than maxLoopsCount.
var metricIDs *uint64set.Set
@ -2869,8 +2849,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
}
// Move failing filter to the end of filter list.
storeLoopsCount(&tfw, int64Max)
storeZeroLoopsCounts(tfws[i+1:])
storeZeroLoopsCounts(tfwsRemaining)
return nil, err
}
if m.Len() >= maxDateMetrics {
@ -2892,7 +2870,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
// so later they can be filtered out with negative filters.
m, err := is.getMetricIDsForDate(date, maxDateMetrics)
if err != nil {
storeZeroLoopsCounts(tfws)
if err == errMissingMetricIDsForDate {
// Zero time series were written on the given date.
return nil, nil
@ -2901,7 +2878,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
}
if m.Len() >= maxDateMetrics {
// Too many time series found for the given (date). Fall back to global search.
storeZeroLoopsCounts(tfws)
return nil, errFallbackToGlobalSearch
}
metricIDs = m
@ -2940,7 +2916,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
metricIDsLen := metricIDs.Len()
if metricIDsLen == 0 {
// There is no need in applying the remaining filters to an empty set.
storeZeroLoopsCounts(tfws[i:])
break
}
if tfw.filterLoopsCount > int64(metricIDsLen)*loopsCountPerMetricNameMatch {
@ -2949,7 +2924,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
for _, tfw := range tfws[i:] {
tfsPostponed = append(tfsPostponed, tfw.tf)
}
storeZeroLoopsCounts(tfws[i:])
break
}
maxLoopsCount := getFirstPositiveFilterLoopsCount(tfws[i+1:])
@ -2966,7 +2940,6 @@ func (is *indexSearch) getMetricIDsForDateAndFilters(date uint64, tfs *TagFilter
}
// Move failing tf to the end of filter list
storeFilterLoopsCount(&tfw, int64Max)
storeZeroLoopsCounts(tfws[i:])
return nil, err
}
storeFilterLoopsCount(&tfw, filterLoopsCount)