From d1d2771bee6e4d8807322ec4ad77fb95cd4ade36 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 12 Mar 2024 01:43:27 +0200 Subject: [PATCH] lib/storage: optimize /api/v1/labels and /api/v1/label/.../values when match[] contains metric name Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055 --- docs/CHANGELOG.md | 1 + lib/storage/index_db.go | 40 ++++++++++++++++++++++++++++-------- lib/storage/index_db_test.go | 27 ++++++++++++++++++++++++ lib/storage/tag_filters.go | 23 +++++++++++++++++++++ 4 files changed, 83 insertions(+), 8 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c56c2da5c..cc253c4d1 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -46,6 +46,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): expose `vm_streamaggr_flush_timeouts_total` and `vm_streamaggr_dedup_flush_timeouts_total` [counters](https://docs.victoriametrics.com/keyconcepts/#counter) at [`/metrics` page](https://docs.victoriametrics.com/#monitoring), which can be used for detecting flush timeouts for stream aggregation states. Expose also `vm_streamaggr_flush_duration_seconds` and `vm_streamaggr_dedup_flush_duration_seconds` [histograms](https://docs.victoriametrics.com/keyconcepts/#histogram) for monitoring the real flush durations of stream aggregation states. * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): improve trace display for better visual separation of branches. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5926). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): use the provided `-remoteWrite.tlsServerName` as `Host` header in requests to `-remoteWrite.url`. This allows sending data to https remote storage by IP address instead of hostname. Thanks to @minor-fixes for initial idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5802). +* FEATURE: optimize [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) when `match[]` filters contains metric name. For example, `/api/v1/label/instance/values?match[]=up` now works much faster than before. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for [native protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-from-victoriametrics). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5748). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5824). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for VictoriaMetrics destination specified via `--vm-*` cmd-line flags used in [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol), [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb), [Prometheus](https://docs.victoriametrics.com/vmctl/#migrating-data-from-prometheus) and [Promscale](https://docs.victoriametrics.com/vmctl/#migrating-data-from-promscale) migration modes. diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index fd68ad3fb..da5f956ac 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -663,6 +663,7 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, is.getLabelNamesForMetricIDs(qt, metricIDs, lns, maxLabelNames) return nil } + var prevLabelName []byte ts := &is.ts kb := &is.kb @@ -674,8 +675,18 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, if date == 0 { nsPrefixExpected = nsPrefixTagToMetricIDs } + + hasCompositeLabelName := false kb.B = is.marshalCommonPrefixForDate(kb.B[:0], date) - prefix := kb.B + if name := getCommonMetricNameForTagFilterss(tfss); len(name) > 0 { + compositeLabelName := marshalCompositeTagKey(nil, name, nil) + kb.B = marshalTagValue(kb.B, compositeLabelName) + // Drop trailing tagSeparator + kb.B = kb.B[:len(kb.B)-1] + hasCompositeLabelName = true + } + prefix := append([]byte{}, kb.B...) + ts.Seek(prefix) for len(lns) < maxLabelNames && ts.NextItem() { if loopsPaceLimiter&paceLimiterFastIterationsMask == 0 { @@ -695,15 +706,15 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, continue } labelName := mp.Tag.Key - if len(labelName) == 0 { + if len(labelName) == 0 || hasCompositeLabelName { underscoreNameSeen = true } - if isArtificialTagKey(labelName) || string(labelName) == string(prevLabelName) { + if (!hasCompositeLabelName && isArtificialTagKey(labelName)) || string(labelName) == string(prevLabelName) { // Search for the next tag key. // The last char in kb.B must be tagSeparatorChar. // Just increment it in order to jump to the next tag key. kb.B = is.marshalCommonPrefixForDate(kb.B[:0], date) - if len(labelName) > 0 && labelName[0] == compositeTagKeyPrefix { + if !hasCompositeLabelName && len(labelName) > 0 && labelName[0] == compositeTagKeyPrefix { // skip composite tag entries kb.B = append(kb.B, compositeTagKeyPrefix) } else { @@ -713,7 +724,15 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, ts.Seek(kb.B) continue } - lns[string(labelName)] = struct{}{} + if !hasCompositeLabelName { + lns[string(labelName)] = struct{}{} + } else { + _, key, err := unmarshalCompositeTagKey(labelName) + if err != nil { + return fmt.Errorf("cannot unmarshal composite tag key: %s", err) + } + lns[string(key)] = struct{}{} + } prevLabelName = append(prevLabelName[:0], labelName...) } if underscoreNameSeen { @@ -853,7 +872,7 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer if err != nil { return err } - if filter != nil && filter.Len() < 100e3 { + if filter != nil && filter.Len() <= 100e3 { // It is faster to obtain label values by metricIDs from the filter // instead of scanning the inverted index for the matching filters. // This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 @@ -866,7 +885,12 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer // __name__ label is encoded as empty string in indexdb. labelName = "" } + labelNameBytes := bytesutil.ToUnsafeBytes(labelName) + if name := getCommonMetricNameForTagFilterss(tfss); len(name) > 0 && labelName != "" { + labelNameBytes = marshalCompositeTagKey(nil, name, labelNameBytes) + } + var prevLabelValue []byte ts := &is.ts kb := &is.kb @@ -879,7 +903,7 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer } kb.B = is.marshalCommonPrefixForDate(kb.B[:0], date) kb.B = marshalTagValue(kb.B, labelNameBytes) - prefix := kb.B + prefix := append([]byte{}, kb.B...) ts.Seek(prefix) for len(lvs) < maxLabelValues && ts.NextItem() { if loopsPaceLimiter&paceLimiterFastIterationsMask == 0 { @@ -1233,7 +1257,7 @@ func (is *indexSearch) getTSDBStatus(qt *querytracer.Tracer, tfss []*TagFilters, nsPrefixExpected = nsPrefixTagToMetricIDs } kb.B = is.marshalCommonPrefixForDate(kb.B[:0], date) - prefix := kb.B + prefix := append([]byte{}, kb.B...) ts.Seek(prefix) for ts.NextItem() { if loopsPaceLimiter&paceLimiterFastIterationsMask == 0 { diff --git a/lib/storage/index_db_test.go b/lib/storage/index_db_test.go index 5bee81a12..f5ebc9336 100644 --- a/lib/storage/index_db_test.go +++ b/lib/storage/index_db_test.go @@ -1655,6 +1655,13 @@ func TestSearchTSIDWithTimeRange(t *testing.T) { if err := tfs.Add([]byte("constant"), []byte("const"), false, false); err != nil { t.Fatalf("cannot add filter: %s", err) } + tfsMetricName := NewTagFilters() + if err := tfsMetricName.Add([]byte("constant"), []byte("const"), false, false); err != nil { + t.Fatalf("cannot add filter on label: %s", err) + } + if err := tfsMetricName.Add(nil, []byte("testMetric"), false, false); err != nil { + t.Fatalf("cannot add filter on metric name: %s", err) + } // Perform a search within a day. // This should return the metrics for the day @@ -1690,6 +1697,16 @@ func TestSearchTSIDWithTimeRange(t *testing.T) { t.Fatalf("unexpected labelNames; got\n%s\nwant\n%s", lns, labelNames) } + // Check SearchLabelNamesWithFiltersOnTimeRange with filters on metric name and time range. + lns, err = db.SearchLabelNamesWithFiltersOnTimeRange(nil, []*TagFilters{tfsMetricName}, tr, 10000, 1e9, noDeadline) + if err != nil { + t.Fatalf("unexpected error in SearchLabelNamesWithFiltersOnTimeRange(filters=%s, timeRange=%s): %s", tfs, &tr, err) + } + sort.Strings(lns) + if !reflect.DeepEqual(lns, labelNames) { + t.Fatalf("unexpected labelNames; got\n%s\nwant\n%s", lns, labelNames) + } + // Check SearchLabelValuesWithFiltersOnTimeRange with the specified filter. lvs, err = db.SearchLabelValuesWithFiltersOnTimeRange(nil, "", []*TagFilters{tfs}, TimeRange{}, 10000, 1e9, noDeadline) if err != nil { @@ -1710,6 +1727,16 @@ func TestSearchTSIDWithTimeRange(t *testing.T) { t.Fatalf("unexpected labelValues; got\n%s\nwant\n%s", lvs, labelValues) } + // Check SearchLabelValuesWithFiltersOnTimeRange with filters on metric name and time range. + lvs, err = db.SearchLabelValuesWithFiltersOnTimeRange(nil, "", []*TagFilters{tfsMetricName}, tr, 10000, 1e9, noDeadline) + if err != nil { + t.Fatalf("unexpected error in SearchLabelValuesWithFiltersOnTimeRange(filters=%s, timeRange=%s): %s", tfs, &tr, err) + } + sort.Strings(lvs) + if !reflect.DeepEqual(lvs, labelValues) { + t.Fatalf("unexpected labelValues; got\n%s\nwant\n%s", lvs, labelValues) + } + // Perform a search across all the days, should match all metrics tr = TimeRange{ MinTimestamp: int64(now - msecPerDay*days), diff --git a/lib/storage/tag_filters.go b/lib/storage/tag_filters.go index b7b70958c..9214651c6 100644 --- a/lib/storage/tag_filters.go +++ b/lib/storage/tag_filters.go @@ -19,6 +19,29 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil" ) +func getCommonMetricNameForTagFilterss(tfss []*TagFilters) []byte { + if len(tfss) == 0 { + return nil + } + prevName := getMetricNameFilter(tfss[0]) + for _, tfs := range tfss[1:] { + name := getMetricNameFilter(tfs) + if string(prevName) != string(name) { + return nil + } + } + return prevName +} + +func getMetricNameFilter(tfs *TagFilters) []byte { + for _, tf := range tfs.tfs { + if len(tf.key) == 0 && !tf.isNegative && !tf.isRegexp { + return tf.value + } + } + return nil +} + // convertToCompositeTagFilterss converts tfss to composite filters. // // This converts `foo{bar="baz",x=~"a.+"}` to `{foo=bar="baz",foo=x=~"a.+"} filter.