lib/storage: filter deleted label names and values from `/api/v1/labe… (#6342)

…ls` and `/api/v1/label/.../values`

Check for deleted metrics when `match[]` filter matches small number of
time series (optimized path).

The issue was introduced
[v1.81.0](https://docs.victoriametrics.com/changelog_2022/#v1810).

Related issue
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6300 Updates
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978

Signed-off-by: hagen1778 <roman@victoriametrics.com>

(cherry picked from commit b984f4672e)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2024-05-29 14:07:44 +02:00 committed by hagen1778
parent 44538b35a1
commit 8665807fd9
No known key found for this signature in database
GPG key ID: 3BF75F3741CA9640
3 changed files with 64 additions and 20 deletions

View file

@ -17,6 +17,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix calendar display when `UTC+00:00` timezone is set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6239).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/): skip empty data blocks before sending to the remote write destination. Thanks to @viperstars for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6241).
* BUGFIX: properly estimate the needed memory for query execution if it has the format [`aggr_func`](https://docs.victoriametrics.com/metricsql/#aggregate-functions)([`rollup_func[d]`](https://docs.victoriametrics.com/metricsql/#rollup-functions) (for example, `sum(rate(request_duration_seconds_bucket[5m]))`). This should allow performing aggregations over bigger number of time series when VictoriaMetrics runs in environments with small amounts of available memory. The issue has been introduced in [this commit](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/5138eaeea0791caa34bcfab410e0ca9cd253cd8f) in [v1.83.0](https://docs.victoriametrics.com/changelog_2022/#v1830).
* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): filter deleted label names and values from [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) responses when `match[]` filter matches small number of time series. The issue was introduced [v1.81.0](https://docs.victoriametrics.com/changelog_2022/#v1810).
## [v1.93.14](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.14)

View file

@ -734,10 +734,18 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer,
func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) error {
lns["__name__"] = struct{}{}
dmis := is.db.s.getDeletedMetricIDs()
checkDeleted := dmis.Len() > 0
var mn MetricName
foundLabelNames := 0
var buf []byte
for _, metricID := range metricIDs {
if checkDeleted && dmis.Has(metricID) {
// skip deleted IDs from result
continue
}
var err error
buf, err = is.searchMetricNameWithCache(buf[:0], metricID)
if err != nil {
@ -931,10 +939,18 @@ func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs ma
if labelName == "" {
labelName = "__name__"
}
dmis := is.db.s.getDeletedMetricIDs()
checkDeleted := dmis.Len() > 0
var mn MetricName
foundLabelValues := 0
var buf []byte
for _, metricID := range metricIDs {
if checkDeleted && dmis.Has(metricID) {
// skip deleted IDs from result
continue
}
var err error
buf, err = is.searchMetricNameWithCache(buf[:0], metricID)
if err != nil {

View file

@ -1537,30 +1537,34 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
"testMetric",
}
sort.Strings(labelNames)
newMN := func(name string, day, metric int) MetricName {
var mn MetricName
mn.MetricGroup = []byte(name)
mn.AddTag(
"constant",
"const",
)
mn.AddTag(
"day",
fmt.Sprintf("%v", day),
)
mn.AddTag(
"UniqueId",
fmt.Sprintf("%v", metric),
)
mn.AddTag(
"some_unique_id",
fmt.Sprintf("%v", day),
)
mn.sortTags()
return mn
}
for day := 0; day < days; day++ {
date := baseDate - uint64(day)
var metricIDs uint64set.Set
for metric := 0; metric < metricsPerDay; metric++ {
var mn MetricName
mn.MetricGroup = []byte("testMetric")
mn.AddTag(
"constant",
"const",
)
mn.AddTag(
"day",
fmt.Sprintf("%v", day),
)
mn.AddTag(
"UniqueId",
fmt.Sprintf("%v", metric),
)
mn.AddTag(
"some_unique_id",
fmt.Sprintf("%v", day),
)
mn.sortTags()
mn := newMN("testMetric", day, metric)
metricNameBuf = mn.Marshal(metricNameBuf[:0])
var genTSID generationTSID
if !is.getTSIDByMetricName(&genTSID, metricNameBuf, date) {
@ -1601,6 +1605,29 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
}
db.putIndexSearch(is2)
// add a metric that will be deleted shortly
is3 := db.getIndexSearch(noDeadline)
day := days
date := baseDate - uint64(day)
mn := newMN("deletedMetric", day, 999)
mn.AddTag(
"labelToDelete",
fmt.Sprintf("%v", day),
)
mn.sortTags()
metricNameBuf = mn.Marshal(metricNameBuf[:0])
var genTSID generationTSID
if !is3.getTSIDByMetricName(&genTSID, metricNameBuf, date) {
generateTSID(&genTSID.TSID, &mn)
createAllIndexesForMetricName(is3, &mn, &genTSID.TSID, date)
}
// delete the added metric. It is expected it won't be returned during searches
deletedSet := &uint64set.Set{}
deletedSet.Add(genTSID.TSID.MetricID)
s.setDeletedMetricIDs(deletedSet)
db.putIndexSearch(is3)
s.DebugFlush()
// Check SearchLabelNamesWithFiltersOnTimeRange with the specified time range.
tr := TimeRange{
MinTimestamp: int64(now) - msecPerDay,