From 852aed62f7db078af41ef87ce7de170c79368b02 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 16 Nov 2020 13:45:50 +0200 Subject: [PATCH] app/vmselect/prometheus: improve performance for `/api/v1/labels` and `/api/v1/label//values` on time ranges exceeding one day when `match[]` query arg is set --- app/vmselect/prometheus/prometheus.go | 101 +++++++++++++++++--------- docs/CHANGELOG.md | 2 +- 2 files changed, 66 insertions(+), 37 deletions(-) diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index bb49641f7..689785516 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -662,27 +662,43 @@ func labelValuesWithMatches(at *auth.Token, denyPartialResponse bool, labelName MaxTimestamp: end, TagFilterss: tagFilterss, } - rss, isPartial, err := netstorage.ProcessSearchQuery(at, denyPartialResponse, sq, false, deadline) - if err != nil { - return nil, false, fmt.Errorf("cannot fetch data for %q: %w", sq, err) - } - m := make(map[string]struct{}) - var mLock sync.Mutex - err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) error { - labelValue := rs.MetricName.GetTagValue(labelName) - if len(labelValue) == 0 { - return nil + isPartial := false + if end-start > 24*3600*1000 { + // It is cheaper to call SearchMetricNames on time ranges exceeding a day. + mns, isPartialResponse, err := netstorage.SearchMetricNames(at, denyPartialResponse, sq, deadline) + if err != nil { + return nil, false, fmt.Errorf("cannot fetch time series for %q: %w", sq, err) + } + isPartial = isPartialResponse + for _, mn := range mns { + labelValue := mn.GetTagValue(labelName) + if len(labelValue) == 0 { + continue + } + m[string(labelValue)] = struct{}{} + } + } else { + rss, isPartialResponse, err := netstorage.ProcessSearchQuery(at, denyPartialResponse, sq, false, deadline) + if err != nil { + return nil, false, fmt.Errorf("cannot fetch data for %q: %w", sq, err) + } + isPartial = isPartialResponse + var mLock sync.Mutex + err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) error { + labelValue := rs.MetricName.GetTagValue(labelName) + if len(labelValue) == 0 { + return nil + } + mLock.Lock() + m[string(labelValue)] = struct{}{} + mLock.Unlock() + return nil + }) + if err != nil { + return nil, false, fmt.Errorf("error when data fetching: %w", err) } - mLock.Lock() - m[string(labelValue)] = struct{}{} - mLock.Unlock() - return nil - }) - if err != nil { - return nil, false, fmt.Errorf("error when data fetching: %w", err) } - labelValues := make([]string, 0, len(m)) for labelValue := range m { labelValues = append(labelValues, labelValue) @@ -856,26 +872,39 @@ func labelsWithMatches(at *auth.Token, denyPartialResponse bool, matches []strin MaxTimestamp: end, TagFilterss: tagFilterss, } - rss, isPartial, err := netstorage.ProcessSearchQuery(at, denyPartialResponse, sq, false, deadline) - if err != nil { - return nil, false, fmt.Errorf("cannot fetch data for %q: %w", sq, err) - } - m := make(map[string]struct{}) - var mLock sync.Mutex - err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) error { - mLock.Lock() - tags := rs.MetricName.Tags - for i := range tags { - t := &tags[i] - m[string(t.Key)] = struct{}{} + isPartial := false + if end-start > 24*3600*1000 { + // It is cheaper to call SearchMetricNames on time ranges exceeding a day. + mns, isPartialResponse, err := netstorage.SearchMetricNames(at, denyPartialResponse, sq, deadline) + if err != nil { + return nil, false, fmt.Errorf("cannot fetch time series for %q: %w", sq, err) + } + isPartial = isPartialResponse + for _, mn := range mns { + for _, tag := range mn.Tags { + m[string(tag.Key)] = struct{}{} + } + } + } else { + rss, isPartialResponse, err := netstorage.ProcessSearchQuery(at, denyPartialResponse, sq, false, deadline) + if err != nil { + return nil, false, fmt.Errorf("cannot fetch data for %q: %w", sq, err) + } + isPartial = isPartialResponse + var mLock sync.Mutex + err = rss.RunParallel(func(rs *netstorage.Result, workerID uint) error { + mLock.Lock() + for _, tag := range rs.MetricName.Tags { + m[string(tag.Key)] = struct{}{} + } + m["__name__"] = struct{}{} + mLock.Unlock() + return nil + }) + if err != nil { + return nil, false, fmt.Errorf("error when data fetching: %w", err) } - m["__name__"] = struct{}{} - mLock.Unlock() - return nil - }) - if err != nil { - return nil, false, fmt.Errorf("error when data fetching: %w", err) } labels := make([]string, 0, len(m)) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 5266bb327..d30bc3c21 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -7,7 +7,7 @@ * FEATURE: vmselect: add `"isPartial":{true|false}` field in JSON output for `/api/v1/*` functions from [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/). `"isPartial":true` is set if the response contains partial data because of a part of `vmstorage` nodes were unavailable during query processing. -* FEATURE: improve performance for `/api/v1/series` on time ranges exceeding one day. +* FEATURE: improve performance for `/api/v1/series`, `/api/v1/labels` and `/api/v1/label//values` on time ranges exceeding one day. * FEATURE: vmagent: reduce memory usage when service discovery detects big number of scrape targets and the set of discovered targets changes over time. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825 * FEATURE: vmagent: add `-promscrape.dropOriginalLabels` command-line option, which can be used for reducing memory usage when scraping big number of targets.