diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ad8944b25..1d7f94e01 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -16,6 +16,7 @@ The following tip changes can be tested by building VictoriaMetrics components f ## v1.79.x long-time support release (LTS) * BUGFIX: prevent from excess CPU usage when the storage enters [read-only mode](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#readonly-mode). +* BUGFIX: improve performance for requests to [/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and [/api/v1/label/.../values](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues) when the filter in the `match[]` query arg matches small number of time series. The performance for this case has been reduced in [v1.78.0](https://docs.victoriametrics.com/CHANGELOG.html#v1780). See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1533) issues. ## [v1.79.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.79.2) diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index 19fb8ebf6..65481c0e0 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -815,9 +815,13 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, if err != nil { return err } - if filter != nil && filter.Len() == 0 { - qt.Printf("found zero label names for filter=%s", tfss) - return nil + if filter != nil && filter.Len() <= 100e3 { + // It is faster to obtain label names by metricIDs from the filter + // instead of scanning the inverted index for the matching filters. + // This hould help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 + metricIDs := filter.AppendTo(nil) + qt.Printf("sort %d metricIDs", len(metricIDs)) + return is.getLabelNamesForMetricIDs(qt, metricIDs, lns, maxLabelNames) } var prevLabelName []byte ts := &is.ts @@ -877,6 +881,41 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, return nil } +func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) error { + lns["__name__"] = struct{}{} + var mn MetricName + foundLabelNames := 0 + var buf []byte + for _, metricID := range metricIDs { + var err error + buf, err = is.searchMetricNameWithCache(buf[:0], metricID) + if err != nil { + if err == io.EOF { + // It is likely the metricID->metricName entry didn't propagate to inverted index yet. + // Skip this metricID for now. + continue + } + return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err) + } + if err := mn.Unmarshal(buf); err != nil { + return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err) + } + for _, tag := range mn.Tags { + _, ok := lns[string(tag.Key)] + if !ok { + foundLabelNames++ + lns[string(tag.Key)] = struct{}{} + if len(lns) >= maxLabelNames { + qt.Printf("hit the limit on the number of unique label names: %d", maxLabelNames) + return nil + } + } + } + } + qt.Printf("get %d distinct label names from %d metricIDs", foundLabelNames, len(metricIDs)) + return nil +} + // SearchLabelValuesWithFiltersOnTimeRange returns label values for the given labelName, tfss and tr. func (db *indexDB) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*TagFilters, tr TimeRange, maxLabelValues, maxMetrics int, deadline uint64) ([]string, error) { @@ -972,9 +1011,13 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer if err != nil { return err } - if filter != nil && filter.Len() == 0 { - qt.Printf("found zero label values for filter=%s", tfss) - return nil + if filter != nil && filter.Len() < 100e3 { + // It is faster to obtain label names by metricIDs from the filter + // instead of scanning the inverted index for the matching filters. + // This hould help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 + metricIDs := filter.AppendTo(nil) + qt.Printf("sort %d metricIDs", len(metricIDs)) + return is.getLabelValuesForMetricIDs(qt, lvs, labelName, metricIDs, maxLabelValues) } if labelName == "__name__" { // __name__ label is encoded as empty string in indexdb. @@ -1033,6 +1076,39 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer return nil } +func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs map[string]struct{}, labelName string, metricIDs []uint64, maxLabelValues int) error { + var mn MetricName + foundLabelValues := 0 + var buf []byte + for _, metricID := range metricIDs { + var err error + buf, err = is.searchMetricNameWithCache(buf[:0], metricID) + if err != nil { + if err == io.EOF { + // It is likely the metricID->metricName entry didn't propagate to inverted index yet. + // Skip this metricID for now. + continue + } + return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err) + } + if err := mn.Unmarshal(buf); err != nil { + return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err) + } + tagValue := mn.GetTagValue(labelName) + _, ok := lvs[string(tagValue)] + if !ok { + foundLabelValues++ + lvs[string(tagValue)] = struct{}{} + if len(lvs) >= maxLabelValues { + qt.Printf("hit the limit on the number of unique label values for label %q: %d", labelName, maxLabelValues) + return nil + } + } + } + qt.Printf("get %d distinct values for label %q from %d metricIDs", foundLabelValues, labelName, len(metricIDs)) + return nil +} + // SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr. // // This allows implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or similar APIs.