app/vmselect: add optional limit query arg to /api/v1/labels and /api/v1/label_values endpoints

This arg allows limiting the number of sample values returned from these APIs
This commit is contained in:
Aliaksandr Valialkin 2022-06-10 09:50:30 +03:00
parent 483b402bb2
commit 89b778902b
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
9 changed files with 70 additions and 37 deletions

View file

@ -607,6 +607,8 @@ For example, the following query would return data for the last 30 minutes: `/ap
VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
VictoriaMetrics accepts `limit` query arg for `/api/v1/labels` and `/api/v1/label/<labelName>/values` handlers for limiting the number of returned entries. For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels. If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values, then limits specified in the command-line flags are used.
By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range.
Additionally, VictoriaMetrics provides the following handlers:

View file

@ -197,7 +197,7 @@ func MetricsExpandHandler(startTime time.Time, w http.ResponseWriter, r *http.Re
func MetricsIndexHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := searchutils.GetDeadlineForQuery(r, startTime)
jsonp := r.FormValue("jsonp")
metricNames, err := netstorage.GetLabelValues(nil, "__name__", deadline)
metricNames, err := netstorage.GetLabelValues(nil, "__name__", 0, deadline)
if err != nil {
return fmt.Errorf(`cannot obtain metric names: %w`, err)
}

View file

@ -5,7 +5,6 @@ import (
"net/http"
"regexp"
"sort"
"strconv"
"strings"
"time"
@ -159,7 +158,7 @@ var (
// See https://graphite.readthedocs.io/en/stable/tags.html#auto-complete-support
func TagsAutoCompleteValuesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := searchutils.GetDeadlineForQuery(r, startTime)
limit, err := getInt(r, "limit")
limit, err := searchutils.GetInt(r, "limit")
if err != nil {
return err
}
@ -245,7 +244,7 @@ var tagsAutoCompleteValuesDuration = metrics.NewSummary(`vm_request_duration_sec
// See https://graphite.readthedocs.io/en/stable/tags.html#auto-complete-support
func TagsAutoCompleteTagsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := searchutils.GetDeadlineForQuery(r, startTime)
limit, err := getInt(r, "limit")
limit, err := searchutils.GetInt(r, "limit")
if err != nil {
return err
}
@ -324,7 +323,7 @@ var tagsAutoCompleteTagsDuration = metrics.NewSummary(`vm_request_duration_secon
// See https://graphite.readthedocs.io/en/stable/tags.html#exploring-tags
func TagsFindSeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := searchutils.GetDeadlineForQuery(r, startTime)
limit, err := getInt(r, "limit")
limit, err := searchutils.GetInt(r, "limit")
if err != nil {
return err
}
@ -392,7 +391,7 @@ var tagsFindSeriesDuration = metrics.NewSummary(`vm_request_duration_seconds{pat
// See https://graphite.readthedocs.io/en/stable/tags.html#exploring-tags
func TagValuesHandler(startTime time.Time, tagName string, w http.ResponseWriter, r *http.Request) error {
deadline := searchutils.GetDeadlineForQuery(r, startTime)
limit, err := getInt(r, "limit")
limit, err := searchutils.GetInt(r, "limit")
if err != nil {
return err
}
@ -420,7 +419,7 @@ var tagValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/t
// See https://graphite.readthedocs.io/en/stable/tags.html#exploring-tags
func TagsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
deadline := searchutils.GetDeadlineForQuery(r, startTime)
limit, err := getInt(r, "limit")
limit, err := searchutils.GetInt(r, "limit")
if err != nil {
return err
}
@ -443,18 +442,6 @@ func TagsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) er
var tagsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/tags"}`)
func getInt(r *http.Request, argName string) (int, error) {
argValue := r.FormValue(argName)
if len(argValue) == 0 {
return 0, nil
}
n, err := strconv.Atoi(argValue)
if err != nil {
return 0, fmt.Errorf("cannot parse %q=%q: %w", argName, argValue, err)
}
return n, nil
}
func getSearchQueryForExprs(startTime time.Time, etfs [][]storage.TagFilter, exprs []string, maxMetrics int) (*storage.SearchQuery, error) {
tfs, err := exprsToTagFilters(exprs)
if err != nil {

View file

@ -612,13 +612,16 @@ func DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline sear
}
// GetLabelsOnTimeRange returns labels for the given tr until the given deadline.
func GetLabelsOnTimeRange(qt *querytracer.Tracer, tr storage.TimeRange, deadline searchutils.Deadline) ([]string, error) {
func GetLabelsOnTimeRange(qt *querytracer.Tracer, tr storage.TimeRange, limit int, deadline searchutils.Deadline) ([]string, error) {
qt = qt.NewChild("get labels on timeRange=%s", &tr)
defer qt.Done()
if deadline.Exceeded() {
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
}
labels, err := vmstorage.SearchTagKeysOnTimeRange(tr, *maxTagKeysPerSearch, deadline.Deadline())
if limit > *maxTagKeysPerSearch || limit <= 0 {
limit = *maxTagKeysPerSearch
}
labels, err := vmstorage.SearchTagKeysOnTimeRange(tr, limit, deadline.Deadline())
qt.Printf("get %d labels", len(labels))
if err != nil {
return nil, fmt.Errorf("error during labels search on time range: %w", err)
@ -642,7 +645,7 @@ func GetGraphiteTags(qt *querytracer.Tracer, filter string, limit int, deadline
if deadline.Exceeded() {
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
}
labels, err := GetLabels(nil, deadline)
labels, err := GetLabels(nil, 0, deadline)
if err != nil {
return nil, err
}
@ -683,13 +686,16 @@ func hasString(a []string, s string) bool {
}
// GetLabels returns labels until the given deadline.
func GetLabels(qt *querytracer.Tracer, deadline searchutils.Deadline) ([]string, error) {
func GetLabels(qt *querytracer.Tracer, limit int, deadline searchutils.Deadline) ([]string, error) {
qt = qt.NewChild("get labels")
defer qt.Done()
if deadline.Exceeded() {
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
}
labels, err := vmstorage.SearchTagKeys(*maxTagKeysPerSearch, deadline.Deadline())
if limit > *maxTagKeysPerSearch || limit <= 0 {
limit = *maxTagKeysPerSearch
}
labels, err := vmstorage.SearchTagKeys(limit, deadline.Deadline())
qt.Printf("get %d labels from global index", len(labels))
if err != nil {
return nil, fmt.Errorf("error during labels search: %w", err)
@ -708,7 +714,7 @@ func GetLabels(qt *querytracer.Tracer, deadline searchutils.Deadline) ([]string,
// GetLabelValuesOnTimeRange returns label values for the given labelName on the given tr
// until the given deadline.
func GetLabelValuesOnTimeRange(qt *querytracer.Tracer, labelName string, tr storage.TimeRange, deadline searchutils.Deadline) ([]string, error) {
func GetLabelValuesOnTimeRange(qt *querytracer.Tracer, labelName string, tr storage.TimeRange, limit int, deadline searchutils.Deadline) ([]string, error) {
qt = qt.NewChild("get values for label %s on a timeRange %s", labelName, &tr)
defer qt.Done()
if deadline.Exceeded() {
@ -718,7 +724,10 @@ func GetLabelValuesOnTimeRange(qt *querytracer.Tracer, labelName string, tr stor
labelName = ""
}
// Search for tag values
labelValues, err := vmstorage.SearchTagValuesOnTimeRange([]byte(labelName), tr, *maxTagValuesPerSearch, deadline.Deadline())
if limit > *maxTagValuesPerSearch || limit <= 0 {
limit = *maxTagValuesPerSearch
}
labelValues, err := vmstorage.SearchTagValuesOnTimeRange([]byte(labelName), tr, limit, deadline.Deadline())
qt.Printf("get %d label values", len(labelValues))
if err != nil {
return nil, fmt.Errorf("error during label values search on time range for labelName=%q: %w", labelName, err)
@ -739,7 +748,7 @@ func GetGraphiteTagValues(qt *querytracer.Tracer, tagName, filter string, limit
if tagName == "name" {
tagName = ""
}
tagValues, err := GetLabelValues(nil, tagName, deadline)
tagValues, err := GetLabelValues(nil, tagName, 0, deadline)
if err != nil {
return nil, err
}
@ -757,7 +766,7 @@ func GetGraphiteTagValues(qt *querytracer.Tracer, tagName, filter string, limit
// GetLabelValues returns label values for the given labelName
// until the given deadline.
func GetLabelValues(qt *querytracer.Tracer, labelName string, deadline searchutils.Deadline) ([]string, error) {
func GetLabelValues(qt *querytracer.Tracer, labelName string, limit int, deadline searchutils.Deadline) ([]string, error) {
qt = qt.NewChild("get values for label %s", labelName)
defer qt.Done()
if deadline.Exceeded() {
@ -767,7 +776,10 @@ func GetLabelValues(qt *querytracer.Tracer, labelName string, deadline searchuti
labelName = ""
}
// Search for tag values
labelValues, err := vmstorage.SearchTagValues([]byte(labelName), *maxTagValuesPerSearch, deadline.Deadline())
if limit > *maxTagValuesPerSearch || limit <= 0 {
limit = *maxTagValuesPerSearch
}
labelValues, err := vmstorage.SearchTagValues([]byte(labelName), limit, deadline.Deadline())
qt.Printf("get %d label values", len(labelValues))
if err != nil {
return nil, fmt.Errorf("error during label values search for labelName=%q: %w", labelName, err)

View file

@ -450,10 +450,14 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s
if err != nil {
return err
}
limit, err := searchutils.GetInt(r, "limit")
if err != nil {
return err
}
var labelValues []string
if len(cp.filterss) == 0 {
if cp.IsDefaultTimeRange() {
labelValues, err = netstorage.GetLabelValues(qt, labelName, cp.deadline)
labelValues, err = netstorage.GetLabelValues(qt, labelName, limit, cp.deadline)
if err != nil {
return fmt.Errorf(`cannot obtain label values for %q: %w`, labelName, err)
}
@ -465,7 +469,7 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s
MinTimestamp: cp.start,
MaxTimestamp: cp.end,
}
labelValues, err = netstorage.GetLabelValuesOnTimeRange(qt, labelName, tr, cp.deadline)
labelValues, err = netstorage.GetLabelValuesOnTimeRange(qt, labelName, tr, limit, cp.deadline)
if err != nil {
return fmt.Errorf(`cannot obtain label values on time range for %q: %w`, labelName, err)
}
@ -478,7 +482,7 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s
if cp.start == 0 {
cp.start = cp.end - defaultStep
}
labelValues, err = labelValuesWithMatches(qt, labelName, cp)
labelValues, err = labelValuesWithMatches(qt, labelName, cp, limit)
if err != nil {
return fmt.Errorf("cannot obtain label values for %q on time range [%d...%d]: %w", labelName, cp.start, cp.end, err)
}
@ -494,7 +498,7 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s
return nil
}
func labelValuesWithMatches(qt *querytracer.Tracer, labelName string, cp *commonParams) ([]string, error) {
func labelValuesWithMatches(qt *querytracer.Tracer, labelName string, cp *commonParams, limit int) ([]string, error) {
// Add `labelName!=''` tag filter in order to filter out series without the labelName.
// There is no need in adding `__name__!=''` filter, since all the time series should
// already have non-empty name.
@ -546,6 +550,9 @@ func labelValuesWithMatches(qt *querytracer.Tracer, labelName string, cp *common
for labelValue := range m {
labelValues = append(labelValues, labelValue)
}
if limit > 0 && len(labelValues) > limit {
labelValues = labelValues[:limit]
}
sort.Strings(labelValues)
qt.Printf("sort %d label values", len(labelValues))
return labelValues, nil
@ -659,10 +666,14 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW
if err != nil {
return err
}
limit, err := searchutils.GetInt(r, "limit")
if err != nil {
return err
}
var labels []string
if len(cp.filterss) == 0 {
if cp.IsDefaultTimeRange() {
labels, err = netstorage.GetLabels(qt, cp.deadline)
labels, err = netstorage.GetLabels(qt, limit, cp.deadline)
if err != nil {
return fmt.Errorf("cannot obtain labels: %w", err)
}
@ -674,7 +685,7 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW
MinTimestamp: cp.start,
MaxTimestamp: cp.end,
}
labels, err = netstorage.GetLabelsOnTimeRange(qt, tr, cp.deadline)
labels, err = netstorage.GetLabelsOnTimeRange(qt, tr, limit, cp.deadline)
if err != nil {
return fmt.Errorf("cannot obtain labels on time range: %w", err)
}
@ -685,7 +696,7 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW
if cp.start == 0 {
cp.start = cp.end - defaultStep
}
labels, err = labelsWithMatches(qt, cp)
labels, err = labelsWithMatches(qt, cp, limit)
if err != nil {
return fmt.Errorf("cannot obtain labels for timeRange=[%d..%d]: %w", cp.start, cp.end, err)
}
@ -701,7 +712,7 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW
return nil
}
func labelsWithMatches(qt *querytracer.Tracer, cp *commonParams) ([]string, error) {
func labelsWithMatches(qt *querytracer.Tracer, cp *commonParams, limit int) ([]string, error) {
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxSeriesLimit)
m := make(map[string]struct{})
if cp.end-cp.start > 24*3600*1000 {
@ -741,6 +752,9 @@ func labelsWithMatches(qt *querytracer.Tracer, cp *commonParams) ([]string, erro
for label := range m {
labels = append(labels, label)
}
if limit > 0 && limit < len(labels) {
labels = labels[:limit]
}
sort.Strings(labels)
qt.Printf("sort %d labels", len(labels))
return labels, nil

View file

@ -25,6 +25,19 @@ func roundToSeconds(ms int64) int64 {
return ms - ms%1000
}
// GetInt returns integer value from the given argKey.
func GetInt(r *http.Request, argKey string) (int, error) {
argValue := r.FormValue(argKey)
if len(argValue) == 0 {
return 0, nil
}
n, err := strconv.Atoi(argValue)
if err != nil {
return 0, fmt.Errorf("cannot parse integer %q=%q: %w", argKey, argValue, err)
}
return n, nil
}
// GetTime returns time from the given argKey query arg.
//
// If argKey is missing in r, then defaultMs rounded to seconds is returned.

View file

@ -23,6 +23,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* FEATURE: add support of `lowercase` and `uppercase` relabeling actions in the same way as [Prometheus 2.36.0 does](https://github.com/prometheus/prometheus/releases/tag/v2.36.0). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2664).
* FEATURE: add ability to change the `indexdb` rotation timezone offset via `-retentionTimezoneOffset` command-line flag. Previously it was performed at 4am UTC time. This could lead to performance degradation in the middle of the day when VictoriaMetrics runs in time zones located too far from UTC. Thanks to @cnych for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2574).
* FEATURE: limit the number of background merge threads on systems with big number of CPU cores by default. This increases the max size of parts, which can be created during background merge when `-storageDataPath` directory has limited free disk space. This may improve on-disk data compression efficiency and query performance. The limits can be tuned if needed with `-smallMergeConcurrency` and `-bigMergeConcurrency` command-line flags. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2673).
* FEATURE: accept optional `limit` query arg at [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names) and [/api/v1/label_values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values) for limiting the numbef of sample entries returned from these endpoints. See [these docs](https://docs.victoriametrics.com/#prometheus-querying-api-enhancements).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): support `limit` param per-group for limiting number of produced samples per each rule. Thanks to @Howie59 for [implementation](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2676).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): remove dependency on Internet access at [web API pages](https://docs.victoriametrics.com/vmalert.html#web). Previously the functionality and the layout of these pages was broken without Internet access. See [shis issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2594).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): implement the `http://vmagent:8429/service-discovery` page in the same way as Prometheus does. This page shows the original labels for all the discovered targets alongside the resulting labels after the relabeling. This simplifies service discovery debugging.

View file

@ -607,6 +607,8 @@ For example, the following query would return data for the last 30 minutes: `/ap
VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
VictoriaMetrics accepts `limit` query arg for `/api/v1/labels` and `/api/v1/label/<labelName>/values` handlers for limiting the number of returned entries. For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels. If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values, then limits specified in the command-line flags are used.
By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range.
Additionally, VictoriaMetrics provides the following handlers:

View file

@ -611,6 +611,8 @@ For example, the following query would return data for the last 30 minutes: `/ap
VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point.
VictoriaMetrics accepts `limit` query arg for `/api/v1/labels` and `/api/v1/label/<labelName>/values` handlers for limiting the number of returned entries. For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels. If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values, then limits specified in the command-line flags are used.
By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range.
Additionally, VictoriaMetrics provides the following handlers: