From 8665807fd90f402c5a42617c314ce754f90445c5 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko <roman@victoriametrics.com> Date: Wed, 29 May 2024 14:07:44 +0200 Subject: [PATCH] =?UTF-8?q?lib/storage:=20filter=20deleted=20label=20names?= =?UTF-8?q?=20and=20values=20from=20`/api/v1/labe=E2=80=A6=20(#6342)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ls` and `/api/v1/label/.../values` Check for deleted metrics when `match[]` filter matches small number of time series (optimized path). The issue was introduced [v1.81.0](https://docs.victoriametrics.com/changelog_2022/#v1810). Related issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6300 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 Signed-off-by: hagen1778 <roman@victoriametrics.com> (cherry picked from commit b984f4672e4993f60562a482e18dae86262500fc) Signed-off-by: hagen1778 <roman@victoriametrics.com> --- docs/CHANGELOG.md | 1 + lib/storage/index_db.go | 16 +++++++++ lib/storage/index_db_test.go | 67 +++++++++++++++++++++++++----------- 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index e3eecd60e7..80ea478b02 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -17,6 +17,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix calendar display when `UTC+00:00` timezone is set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6239). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent/): skip empty data blocks before sending to the remote write destination. Thanks to @viperstars for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6241). * BUGFIX: properly estimate the needed memory for query execution if it has the format [`aggr_func`](https://docs.victoriametrics.com/metricsql/#aggregate-functions)([`rollup_func[d]`](https://docs.victoriametrics.com/metricsql/#rollup-functions) (for example, `sum(rate(request_duration_seconds_bucket[5m]))`). This should allow performing aggregations over bigger number of time series when VictoriaMetrics runs in environments with small amounts of available memory. The issue has been introduced in [this commit](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/5138eaeea0791caa34bcfab410e0ca9cd253cd8f) in [v1.83.0](https://docs.victoriametrics.com/changelog_2022/#v1830). +* BUGFIX: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): filter deleted label names and values from [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) responses when `match[]` filter matches small number of time series. The issue was introduced [v1.81.0](https://docs.victoriametrics.com/changelog_2022/#v1810). ## [v1.93.14](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.14) diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index 5f67afdb10..32e9439cfe 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -734,10 +734,18 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) error { lns["__name__"] = struct{}{} + + dmis := is.db.s.getDeletedMetricIDs() + checkDeleted := dmis.Len() > 0 + var mn MetricName foundLabelNames := 0 var buf []byte for _, metricID := range metricIDs { + if checkDeleted && dmis.Has(metricID) { + // skip deleted IDs from result + continue + } var err error buf, err = is.searchMetricNameWithCache(buf[:0], metricID) if err != nil { @@ -931,10 +939,18 @@ func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs ma if labelName == "" { labelName = "__name__" } + + dmis := is.db.s.getDeletedMetricIDs() + checkDeleted := dmis.Len() > 0 + var mn MetricName foundLabelValues := 0 var buf []byte for _, metricID := range metricIDs { + if checkDeleted && dmis.Has(metricID) { + // skip deleted IDs from result + continue + } var err error buf, err = is.searchMetricNameWithCache(buf[:0], metricID) if err != nil { diff --git a/lib/storage/index_db_test.go b/lib/storage/index_db_test.go index 074613f334..82265e7bf7 100644 --- a/lib/storage/index_db_test.go +++ b/lib/storage/index_db_test.go @@ -1537,30 +1537,34 @@ func TestSearchTSIDWithTimeRange(t *testing.T) { "testMetric", } sort.Strings(labelNames) + + newMN := func(name string, day, metric int) MetricName { + var mn MetricName + mn.MetricGroup = []byte(name) + mn.AddTag( + "constant", + "const", + ) + mn.AddTag( + "day", + fmt.Sprintf("%v", day), + ) + mn.AddTag( + "UniqueId", + fmt.Sprintf("%v", metric), + ) + mn.AddTag( + "some_unique_id", + fmt.Sprintf("%v", day), + ) + mn.sortTags() + return mn + } for day := 0; day < days; day++ { date := baseDate - uint64(day) var metricIDs uint64set.Set for metric := 0; metric < metricsPerDay; metric++ { - var mn MetricName - mn.MetricGroup = []byte("testMetric") - mn.AddTag( - "constant", - "const", - ) - mn.AddTag( - "day", - fmt.Sprintf("%v", day), - ) - mn.AddTag( - "UniqueId", - fmt.Sprintf("%v", metric), - ) - mn.AddTag( - "some_unique_id", - fmt.Sprintf("%v", day), - ) - mn.sortTags() - + mn := newMN("testMetric", day, metric) metricNameBuf = mn.Marshal(metricNameBuf[:0]) var genTSID generationTSID if !is.getTSIDByMetricName(&genTSID, metricNameBuf, date) { @@ -1601,6 +1605,29 @@ func TestSearchTSIDWithTimeRange(t *testing.T) { } db.putIndexSearch(is2) + // add a metric that will be deleted shortly + is3 := db.getIndexSearch(noDeadline) + day := days + date := baseDate - uint64(day) + mn := newMN("deletedMetric", day, 999) + mn.AddTag( + "labelToDelete", + fmt.Sprintf("%v", day), + ) + mn.sortTags() + metricNameBuf = mn.Marshal(metricNameBuf[:0]) + var genTSID generationTSID + if !is3.getTSIDByMetricName(&genTSID, metricNameBuf, date) { + generateTSID(&genTSID.TSID, &mn) + createAllIndexesForMetricName(is3, &mn, &genTSID.TSID, date) + } + // delete the added metric. It is expected it won't be returned during searches + deletedSet := &uint64set.Set{} + deletedSet.Add(genTSID.TSID.MetricID) + s.setDeletedMetricIDs(deletedSet) + db.putIndexSearch(is3) + s.DebugFlush() + // Check SearchLabelNamesWithFiltersOnTimeRange with the specified time range. tr := TimeRange{ MinTimestamp: int64(now) - msecPerDay,