Merge branch 'public-single-node' into pmm-6401-read-prometheus-data-files

This commit is contained in:
Aliaksandr Valialkin 2022-08-18 01:31:49 +03:00
commit bb154f8829
No known key found for this signature in database
GPG key ID: A72BEC6CD3D0DED1
45 changed files with 1132 additions and 198 deletions

View file

@ -1139,7 +1139,11 @@ VictoriaMetrics also may scrape Prometheus targets - see [these docs](#how-to-sc
VictoriaMetrics supports Prometheus-compatible relabeling for all the ingested metrics if `-relabelConfig` command-line flag points
to a file containing a list of [relabel_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) entries.
The `-relabelConfig` also can point to http or https url. For example, `-relabelConfig=https://config-server/relabel_config.yml`.
See [this article with relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2).
The following docs can be useful in understanding the relabeling:
* [Cookbook for common relabeling tasks](https://docs.victoriametrics.com/relabeling.html).
* [Relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2).
The `-relabelConfig` files can contain special placeholders in the form `%{ENV_VAR}`, which are replaced by the corresponding environment variable values.

View file

@ -259,37 +259,37 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
up == 0
```
* `scrape_duration_seconds` - this metric exposes scrape duration. This allows monitoring slow scrapes. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns scrapes, which take more than 1.5 seconds to complete:
* `scrape_duration_seconds` - the duration of the scrape for the given target. This allows monitoring slow scrapes. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns scrapes, which take more than 1.5 seconds to complete:
```metricsql
scrape_duration_seconds > 1.5
```
* `scrape_timeout_seconds` - this metric exposes the configured timeout for the current scrape target (aka `scrape_timeout`). This allows detecting targets with scrape durations close to the configured scrape timeout. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets (identified by `instance` label), which take more than 80% of the configured `scrape_timeout` during scrapes:
* `scrape_timeout_seconds` - the configured timeout for the current scrape target (aka `scrape_timeout`). This allows detecting targets with scrape durations close to the configured scrape timeout. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets (identified by `instance` label), which take more than 80% of the configured `scrape_timeout` during scrapes:
```metricsql
scrape_duration_seconds / scrape_timeout_seconds > 0.8
```
* `scrape_samples_scraped` - this metric exposes the number of samples (aka metrics) parsed per each scrape. This allows detecting targets, which expose too many metrics. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets, which expose more than 10000 metrics:
* `scrape_samples_scraped` - the number of samples (aka metrics) parsed per each scrape. This allows detecting targets, which expose too many metrics. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets, which expose more than 10000 metrics:
```metricsql
scrape_samples_scraped > 10000
```
* `scrape_samples_limit` - this metric exposes the configured limit on the number of metrics the given target can expose. The limit can be set via `sample_limit` option at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs). This allows detecting targets, which expose too many metrics compared to the configured `sample_limit`. For example, the following query returns targets (identified by `instance` label), which expose more than 80% metrics compared to the configed `sample_limit`:
* `scrape_samples_limit` - the configured limit on the number of metrics the given target can expose. The limit can be set via `sample_limit` option at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs). This metric is exposed only if the `sample_limit` is set. This allows detecting targets, which expose too many metrics compared to the configured `sample_limit`. For example, the following query returns targets (identified by `instance` label), which expose more than 80% metrics compared to the configed `sample_limit`:
```metricsql
scrape_samples_scraped / scrape_samples_limit > 0.8
```
* `scrape_samples_post_metric_relabeling` - this metric exposes the number of samples (aka metrics) left after applying metric-level relabeling from `metric_relabel_configs` section (see [relabeling docs](#relabeling) for more details). This allows detecting targets with too many metrics after the relabeling. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets with more than 10000 metrics after the relabeling:
* `scrape_samples_post_metric_relabeling` - the number of samples (aka metrics) left after applying metric-level relabeling from `metric_relabel_configs` section (see [relabeling docs](#relabeling) for more details). This allows detecting targets with too many metrics after the relabeling. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets with more than 10000 metrics after the relabeling:
```metricsql
scrape_samples_post_metric_relabeling > 10000
```
* `scrape_series_added` - this metric exposes **an approximate** number of new series the given target generates during the current scrape. This metric allows detecting targets (identified by `instance` label), which lead to [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets, which generate more than 1000 new series during the last hour:
* `scrape_series_added` - **an approximate** number of new series the given target generates during the current scrape. This metric allows detecting targets (identified by `instance` label), which lead to [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets, which generate more than 1000 new series during the last hour:
```metricsql
sum_over_time(scrape_series_added[1h]) > 1000
@ -297,10 +297,24 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
`vmagent` sets `scrape_series_added` to zero when it runs with `-promscrape.noStaleMarkers` command-line option (e.g. when [staleness markers](#prometheus-staleness-markers) are disabled).
* `scrape_series_limit` - the limit on the number of unique time series the given target can expose according to [these docs](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). This metric is exposed only if the series limit is set.
* `scrape_series_current` - the number of unique series the given target exposed so far. This metric is exposed only if the series limit is set according to [these docs](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). This metric allows alerting when the number of exposed series by the given target reaches the limit. For example, the following query would alert when the target exposes more than 90% of unique series compared to the configured limit.
```metricsql
scrape_series_current / scrape_series_limit > 0.9
```
* `scrape_series_limit_samples_dropped` - exposes the number of dropped samples during the scrape because of the exceeded limit on the number of unique series. This metric is exposed only if the series limit is set according to [these docs](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). This metric allows alerting when scraped samples are dropped because of the exceeded limit. For example, the following query alerts when at least a single sample is dropped because of the exceeded limit during the last hour:
```metricsql
sum_over_time(scrape_series_limit_samples_dropped[1h]) > 0
```
## Relabeling
VictoriaMetrics components (including `vmagent`) support [Prometheus-compatible relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) with [additional enhancements](#relabeling-enhancements) at various stages of data processing. The relabeling can be defined in the following places processed by `vmagent`:
VictoriaMetrics components support [Prometheus-compatible relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) with [additional enhancements](#relabeling-enhancements) at various stages of data processing. The relabeling can be defined in the following places processed by `vmagent`:
* At the `scrape_config -> relabel_configs` section in `-promscrape.config` file. This relabeling is used for modifying labels in discovered targets and for dropping unneded targets. This relabeling can be debugged by passing `relabel_debug: true` option to the corresponding `scrape_config` section. In this case `vmagent` logs target labels before and after the relabeling and then drops the logged target.
* At the `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file. This relabeling is used for modifying labels in scraped metrics and for dropping unneeded metrics. This relabeling can be debugged by passing `metric_relabel_debug: true` option to the corresponding `scrape_config` section. In this case `vmagent` logs metrics before and after the relabeling and then drops the logged metrics.
@ -311,6 +325,7 @@ All the files with relabeling configs can contain special placeholders in the fo
The following articles contain useful information about Prometheus relabeling:
* [Cookbook for common relabeling tasks](https://docs.victoriametrics.com/relabeling.html)
* [How to use Relabeling in Prometheus and VictoriaMetrics](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2)
* [Life of a label](https://www.robustperception.io/life-of-a-label)
* [Discarding targets and timeseries with relabeling](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts)
@ -562,10 +577,24 @@ By default `vmagent` doesn't limit the number of time series each scrape target
* Via `series_limit` config option at `scrape_config` section. This limit is applied individually to all the scrape targets defined in the given `scrape_config`.
* Via `__series_limit__` label, which can be set with [relabeling](#relabeling) at `relabel_configs` section. This limit is applied to the corresponding scrape targets. Typical use case: to set the limit via [Kubernetes annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) for targets, which may expose too high number of time series.
All the scraped metrics are dropped for time series exceeding the given limit. The exceeded limit can be [monitored](#monitoring) via `promscrape_series_limit_rows_dropped_total` metric.
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
Scraped metrics are dropped for time series exceeding the given limit.
`vmagent` creates the following additional per-target metrics for targets with non-zero series limit:
- `scrape_series_limit_samples_dropped` - the number of dropped samples during the scrape when the unique series limit is exceeded.
- `scrape_series_limit` - the series limit for the given target.
- `scrape_series_current` - the current number of series for the given target.
These metrics are automatically sent to the configured `-remoteWrite.url` alongside with the scraped per-target metrics.
These metrics allow building the following alerting rules:
- `scrape_series_current / scrape_series_limit > 0.9` - alerts when the number of series exposed by the target reaches 90% of the limit.
- `rate(scrape_series_samples_dropped_total) > 0` - alerts when some samples are dropped because the series limit on a particular target is reached.
By default `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`. The limit can be enforced by setting the following command-line flags:
* `-remoteWrite.maxHourlySeries` - limits the number of unique time series `vmagent` can write to remote storage systems during the last hour. Useful for limiting the number of active time series.
@ -573,10 +602,14 @@ By default `vmagent` doesn't limit the number of time series written to remote s
Both limits can be set simultaneously. If any of these limits is reached, then samples for new time series are dropped instead of sending them to remote storage systems. A sample of dropped series is put in the log with `WARNING` level.
The exceeded limits can be [monitored](#monitoring) with the following metrics:
`vmagent` exposes the following metrics at `http://vmagent:8429/metrics` page (see [monitoring docs](#monitoring) for details):
* `vmagent_hourly_series_limit_rows_dropped_total` - the number of metrics dropped due to exceeded hourly limit on the number of unique time series.
* `vmagent_hourly_series_limit_max_series` - the hourly series limit set via `-remoteWrite.maxHourlySeries`.
* `vmagent_hourly_series_limit_current_series` - the current number of unique series registered during the last hour.
* `vmagent_daily_series_limit_rows_dropped_total` - the number of metrics dropped due to exceeded daily limit on the number of unique time series.
* `vmagent_daily_series_limit_max_series` - the daily series limit set via `-remoteWrite.maxDailySeries`.
* `vmagent_daily_series_limit_current_series` - the current number of unique series registered during the last day.
These limits are approximate, so `vmagent` can underflow/overflow the limit by a small percentage (usually less than 1%).

View file

@ -193,9 +193,25 @@ annotations:
[ <labelname>: <tmpl_string> ]
```
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations to format data, iterate over it or execute expressions.
#### Templating
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations to format data, iterate over
or execute expressions.
The following variables are available in templating:
| Variable | Description | Example |
|------------------------------------|-----------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------|
| $value or .Value | The current alert's value. Avoid using value in labels, it may cause unexpected issues. | {% raw %}"Number of connections is {{ $value }}{% endraw %} |
| $labels or .Labels | The list of labels of the current alert. Use as ".Labels.<label_name>". | {% raw %}"Too high number of connections for {{ .Labels.instance }}"{% endraw %} |
| $alertID or .AlertID | The current alert's ID generated by vmalert. | {% raw %}"Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}"{% endraw %} |
| $groupID or .GroupID | The current alert's group ID generated by vmalert. | {% raw %}"Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}"{% endraw %} |
| $expr or .Expr | Alert's expression. Can be used for generating links to Grafana or other systems. | {% raw %}"/api/v1/query?query={{ $expr&vert;quotesEscape&vert;queryEscape }}"{% endraw %} |
| $externalLabels or .ExternalLabels | List of labels configured via `-external.label` command-line flag. | {% raw %}"Issues with {{ $labels.instance }} (datacenter-{{ $externalLabels.dc }})"{% endraw %} |
| $externalURL or .ExternalURL | URL configured via `-external.url` command-line flag. Used for cases when vmalert is hidden behind proxy. | {% raw %}"Visit {{ $externalURL }} for more details"{% endraw %} |
Additionally, `vmalert` provides some extra templating functions
listed [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/notifier/template_func.go) and [reusable templates](#reusable-templates).
listed [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/notifier/template_func.go)
and [reusable templates](#reusable-templates).
#### Reusable templates
@ -484,8 +500,9 @@ or time series modification via [relabeling](https://docs.victoriametrics.com/vm
* `http://<vmalert-addr>` - UI;
* `http://<vmalert-addr>/api/v1/rules` - list of all loaded groups and rules;
* `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
* `http://<vmalert-addr>/vmalert/api/v1/alert?group_id=<group_id>&alert_id=<alert_id>"` - get alert status by ID.
* `http://<vmalert-addr>/vmalert/api/v1/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in JSON format.
Used as alert source in AlertManager.
* `http://<vmalert-addr>/vmalert/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in web UI.
* `http://<vmalert-addr>/metrics` - application metrics.
* `http://<vmalert-addr>/-/reload` - hot configuration reload.
@ -620,7 +637,7 @@ Pass `-help` to `vmalert` in order to see the full list of supported
command-line flags with their descriptions.
The shortlist of configuration flags is the following:
{% raw %}
```
-clusterMode
If clusterMode is enabled, then vmalert automatically adds the tenant specified in config groups to -datasource.url, -remoteWrite.url and -remoteRead.url. See https://docs.victoriametrics.com/vmalert.html#multitenancy
@ -695,8 +712,8 @@ The shortlist of configuration flags is the following:
-evaluationInterval duration
How often to evaluate the rules (default 1m0s)
-external.alert.source string
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/vmalert/api/v1/alert?group_id=&alert_id=' is used
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
Supports templating. For example, link to Grafana: 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'. (default "{{.ExternalURL}}/vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}")
-external.label array
Optional label in the form 'Name=value' to add to all generated recording rules and alerts. Pass multiple -label flags in order to add multiple label sets.
Supports an array of values separated by comma or specified via multiple flags.
@ -956,6 +973,7 @@ The shortlist of configuration flags is the following:
-version
Show VictoriaMetrics version
```
{% endraw %}
### Hot config reload

View file

@ -60,7 +60,8 @@ absolute path to all .tpl files in root.`)
externalURL = flag.String("external.url", "", "External URL is used as alert's source for sent alerts to the notifier")
externalAlertSource = flag.String("external.alert.source", "", `External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/vmalert/api/v1/alert?group_id=&alert_id=' is used`)
Supports templating. For example, link to Grafana: 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.
If empty 'vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}' is used.`)
externalLabels = flagutil.NewArray("external.label", "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. "+
"Pass multiple -label flags in order to add multiple label sets.")
@ -249,7 +250,7 @@ func getAlertURLGenerator(externalURL *url.URL, externalAlertSource string, vali
if externalAlertSource == "" {
return func(a notifier.Alert) string {
gID, aID := strconv.FormatUint(a.GroupID, 10), strconv.FormatUint(a.ID, 10)
return fmt.Sprintf("%s/vmalert/api/v1/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
return fmt.Sprintf("%s/vmalert/alert?%s=%s&%s=%s", externalURL, paramGroupID, gID, paramAlertID, aID)
}, nil
}
if validateTemplate {

View file

@ -41,7 +41,7 @@ func TestGetAlertURLGenerator(t *testing.T) {
if err != nil {
t.Errorf("unexpected error %s", err)
}
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/api/v1/alert?%s=42&%s=2", paramGroupID, paramAlertID)
exp := fmt.Sprintf("https://victoriametrics.com/path/vmalert/alert?%s=42&%s=2", paramGroupID, paramAlertID)
if exp != fn(testAlert) {
t.Errorf("unexpected url want %s, got %s", exp, fn(testAlert))
}

View file

@ -74,9 +74,11 @@ func (as AlertState) String() string {
// AlertTplData is used to execute templating
type AlertTplData struct {
Labels map[string]string
Value float64
Expr string
Labels map[string]string
Value float64
Expr string
AlertID uint64
GroupID uint64
}
var tplHeaders = []string{
@ -85,6 +87,8 @@ var tplHeaders = []string{
"{{ $expr := .Expr }}",
"{{ $externalLabels := .ExternalLabels }}",
"{{ $externalURL := .ExternalURL }}",
"{{ $alertID := .AlertID }}",
"{{ $groupID := .GroupID }}",
}
// ExecTemplate executes the Alert template for given
@ -92,7 +96,7 @@ var tplHeaders = []string{
// Every alert could have a different datasource, so function
// requires a queryFunction as an argument.
func (a *Alert) ExecTemplate(q templates.QueryFn, labels, annotations map[string]string) (map[string]string, error) {
tplData := AlertTplData{Value: a.Value, Labels: labels, Expr: a.Expr}
tplData := AlertTplData{Value: a.Value, Labels: labels, Expr: a.Expr, AlertID: a.ID, GroupID: a.GroupID}
tmpl, err := templates.GetWithFuncs(templates.FuncsWithQuery(q))
if err != nil {
return nil, fmt.Errorf("error getting a template: %w", err)

View file

@ -109,6 +109,19 @@ func TestAlert_ExecTemplate(t *testing.T) {
"description": fmt.Sprintf("It is 10000 connections for localhost (cluster-%s)", extCluster),
},
},
{
name: "alert and group IDs",
alert: &Alert{
ID: 42,
GroupID: 24,
},
annotations: map[string]string{
"url": "/api/v1/alert?alertID={{$alertID}}&groupID={{$groupID}}",
},
expTpl: map[string]string{
"url": "/api/v1/alert?alertID=42&groupID=24",
},
},
}
qFn := func(q string) ([]datasource.Metric, error) {

View file

@ -500,6 +500,12 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
statusCode = esc.StatusCode
}
w.WriteHeader(statusCode)
var ure *promql.UserReadableError
if errors.As(err, &ure) {
prometheus.WriteErrorResponse(w, statusCode, ure)
return
}
prometheus.WriteErrorResponse(w, statusCode, err)
}

View file

@ -839,7 +839,7 @@ func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, w http.Respo
}
result, err := promql.Exec(qt, &ec, query, false)
if err != nil {
return fmt.Errorf("cannot execute query: %w", err)
return err
}
if step < maxStepForPointsAdjustment.Milliseconds() {
queryOffset := getLatencyOffsetMilliseconds()

View file

@ -294,7 +294,9 @@ func evalExprInternal(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr)
func evalTransformFunc(qt *querytracer.Tracer, ec *EvalConfig, fe *metricsql.FuncExpr) ([]*timeseries, error) {
tf := getTransformFunc(fe.Name)
if tf == nil {
return nil, fmt.Errorf(`unknown func %q`, fe.Name)
return nil, &UserReadableError{
Err: fmt.Errorf(`unknown func %q`, fe.Name),
}
}
args, err := evalExprs(qt, ec, fe.Args)
if err != nil {
@ -336,7 +338,9 @@ func evalAggrFunc(qt *querytracer.Tracer, ec *EvalConfig, ae *metricsql.AggrFunc
}
af := getAggrFunc(ae.Name)
if af == nil {
return nil, fmt.Errorf(`unknown func %q`, ae.Name)
return nil, &UserReadableError{
Err: fmt.Errorf(`unknown func %q`, ae.Name),
}
}
afa := &aggrFuncArg{
ae: ae,
@ -679,10 +683,14 @@ func evalRollupFunc(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf
}
tssAt, err := evalExpr(qt, ec, re.At)
if err != nil {
return nil, fmt.Errorf("cannot evaluate `@` modifier: %w", err)
return nil, &UserReadableError{
Err: fmt.Errorf("cannot evaluate `@` modifier: %w", err),
}
}
if len(tssAt) != 1 {
return nil, fmt.Errorf("`@` modifier must return a single series; it returns %d series instead", len(tssAt))
return nil, &UserReadableError{
Err: fmt.Errorf("`@` modifier must return a single series; it returns %d series instead", len(tssAt)),
}
}
atTimestamp := int64(tssAt[0].Values[0] * 1000)
ecNew := copyEvalConfig(ec)
@ -742,7 +750,9 @@ func evalRollupFuncWithoutAt(qt *querytracer.Tracer, ec *EvalConfig, funcName st
rvs, err = evalRollupFuncWithSubquery(qt, ecNew, funcName, rf, expr, re)
}
if err != nil {
return nil, err
return nil, &UserReadableError{
Err: err,
}
}
if funcName == "absent_over_time" {
rvs = aggregateAbsentOverTime(ec, re.Expr, rvs)
@ -964,7 +974,9 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
sq := storage.NewSearchQuery(minTimestamp, ec.End, tfss, ec.MaxSeries)
rss, err := netstorage.ProcessSearchQuery(qt, sq, ec.Deadline)
if err != nil {
return nil, err
return nil, &UserReadableError{
Err: err,
}
}
rssLen := rss.Len()
if rssLen == 0 {
@ -1000,12 +1012,14 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
rml := getRollupMemoryLimiter()
if !rml.Get(uint64(rollupMemorySize)) {
rss.Cancel()
return nil, fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
"total available memory for concurrent requests: %d bytes; "+
"requested memory: %d bytes; "+
"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3)
return nil, &UserReadableError{
Err: fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
"total available memory for concurrent requests: %d bytes; "+
"requested memory: %d bytes; "+
"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3),
}
}
defer rml.Put(uint64(rollupMemorySize))
@ -1018,7 +1032,9 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
tss, err = evalRollupNoIncrementalAggregate(qt, funcName, keepMetricNames, rss, rcs, preFunc, sharedTimestamps)
}
if err != nil {
return nil, err
return nil, &UserReadableError{
Err: err,
}
}
tss = mergeTimeseries(tssCached, tss, start, ec)
rollupResultCacheV.Put(qt, ec, expr, window, tss)

View file

@ -26,6 +26,24 @@ var (
`This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter`)
)
// UserReadableError is a type of error which supposed to be returned to the user without additional context.
type UserReadableError struct {
// Err is the error which needs to be returned to the user.
Err error
}
// Unwrap returns ure.Err.
//
// This is used by standard errors package. See https://golang.org/pkg/errors
func (ure *UserReadableError) Unwrap() error {
return ure.Err
}
// Error satisfies Error interface
func (ure *UserReadableError) Error() string {
return ure.Err.Error()
}
// Exec executes q for the given ec.
func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly bool) ([]netstorage.Result, error) {
if querystats.Enabled() {
@ -73,7 +91,7 @@ func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly boo
}
qt.Printf("round series values to %d decimal digits after the point", n)
}
return result, err
return result, nil
}
func maySortResults(e metricsql.Expr, tss []*timeseries) bool {

View file

@ -1,12 +1,12 @@
{
"files": {
"main.css": "./static/css/main.7e6d0c89.css",
"main.js": "./static/js/main.e97de856.js",
"main.css": "./static/css/main.9b22c3e0.css",
"main.js": "./static/js/main.b8df40e9.js",
"static/js/27.939f971b.chunk.js": "./static/js/27.939f971b.chunk.js",
"index.html": "./index.html"
},
"entrypoints": [
"static/css/main.7e6d0c89.css",
"static/js/main.e97de856.js"
"static/css/main.9b22c3e0.css",
"static/js/main.b8df40e9.js"
]
}

View file

@ -1 +1 @@
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="VM-UI is a metric explorer for Victoria Metrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700&display=swap"/><script src="./dashboards/index.js" type="module"></script><script defer="defer" src="./static/js/main.e97de856.js"></script><link href="./static/css/main.7e6d0c89.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="VM-UI is a metric explorer for Victoria Metrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700&display=swap"/><script src="./dashboards/index.js" type="module"></script><script defer="defer" src="./static/js/main.b8df40e9.js"></script><link href="./static/css/main.9b22c3e0.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>

View file

@ -1 +0,0 @@
body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif}code{font-family:source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace}.MuiAccordionSummary-content{margin:0!important}.uplot,.uplot *,.uplot :after,.uplot :before{box-sizing:border-box}.uplot{font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;line-height:1.5;width:-webkit-min-content;width:min-content}.u-title{font-size:18px;font-weight:700;text-align:center}.u-wrap{position:relative;-webkit-user-select:none;-ms-user-select:none;user-select:none}.u-over,.u-under{position:absolute}.u-under{overflow:hidden}.uplot canvas{display:block;height:100%;position:relative;width:100%}.u-axis{position:absolute}.u-legend{font-size:14px;margin:auto;text-align:center}.u-inline{display:block}.u-inline *{display:inline-block}.u-inline tr{margin-right:16px}.u-legend th{font-weight:600}.u-legend th>*{display:inline-block;vertical-align:middle}.u-legend .u-marker{background-clip:padding-box!important;height:1em;margin-right:4px;width:1em}.u-inline.u-live th:after{content:":";vertical-align:middle}.u-inline:not(.u-live) .u-value{display:none}.u-series>*{padding:4px}.u-series th{cursor:pointer}.u-legend .u-off>*{opacity:.3}.u-select{background:rgba(0,0,0,.07)}.u-cursor-x,.u-cursor-y,.u-select{pointer-events:none;position:absolute}.u-cursor-x,.u-cursor-y{left:0;top:0;will-change:transform;z-index:100}.u-hz .u-cursor-x,.u-vt .u-cursor-y{border-right:1px dashed #607d8b;height:100%}.u-hz .u-cursor-y,.u-vt .u-cursor-x{border-bottom:1px dashed #607d8b;width:100%}.u-cursor-pt{background-clip:padding-box!important;border:0 solid;border-radius:50%;left:0;pointer-events:none;position:absolute;top:0;will-change:transform;z-index:100}.u-axis.u-off,.u-cursor-pt.u-off,.u-cursor-x.u-off,.u-cursor-y.u-off,.u-select.u-off,.u-tooltip{display:none}.u-tooltip{grid-gap:12px;word-wrap:break-word;background:rgba(57,57,57,.9);border-radius:4px;color:#fff;font-family:monospace;font-size:10px;font-weight:500;line-height:1.4em;max-width:300px;padding:8px;pointer-events:none;position:absolute;z-index:100}.u-tooltip-data{align-items:center;display:flex;flex-wrap:wrap;font-size:11px;line-height:150%}.u-tooltip-data__value{font-weight:700;padding:4px}.u-tooltip__info{grid-gap:4px;display:grid}.u-tooltip__marker{height:12px;margin-right:4px;width:12px}.legendWrapper{cursor:default;display:flex;flex-wrap:wrap;margin-top:20px;position:relative}.legendGroup{margin:0 12px 24px 0;padding:10px 6px}.legendGroupTitle{align-items:center;border-bottom:1px solid #ecebe6;display:flex;font-size:11px;margin-bottom:5px;padding:0 10px 5px}.legendGroupQuery{font-weight:700;margin-right:4px}.legendGroupLine{margin-right:10px}.legendItem{grid-gap:6px;align-items:start;background-color:#fff;cursor:pointer;display:grid;grid-template-columns:auto auto;justify-content:start;padding:7px 50px 7px 10px;transition:.2s ease}.legendItemHide{opacity:.5;text-decoration:line-through}.legendItem:hover{background-color:rgba(0,0,0,.1)}.legendMarker{border-style:solid;border-width:2px;box-sizing:border-box;height:12px;transition:.2s ease;width:12px}.legendLabel{font-size:11px;font-weight:400;line-height:12px}.legendFreeFields{cursor:pointer;padding:3px}.legendFreeFields:hover{text-decoration:underline}.legendFreeFields:not(:last-child):after{content:","}.legendWrapperHotkey{align-items:center;display:flex;font-size:11px}.legendWrapperHotkey p{margin-right:20px}.legendWrapperHotkey code{word-wrap:break-word;background-color:#f2f2f2;border:1px solid #dedede;border-radius:2px;color:#0a0a0a;display:inline;font-size:10px;font-weight:400;max-width:100%;padding:4px 6px}.panelDescription ul{line-height:2.2}.panelDescription a{color:#fff}.panelDescription code{background-color:rgba(0,0,0,.3);border-radius:2px;color:#fff;display:inline;font-size:inherit;font-weight:400;max-width:100%;padding:4px 6px}

View file

@ -0,0 +1 @@
body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif}code{font-family:source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace}.MuiAccordionSummary-content{margin:0!important}.shortcut-key{align-items:center;border:1px solid #dedede;border-radius:4px;cursor:default;display:inline-flex;font-size:10px;justify-content:center;line-height:22px;padding:2px 6px 0;text-align:center;white-space:nowrap}.uplot,.uplot *,.uplot :after,.uplot :before{box-sizing:border-box}.uplot{font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;line-height:1.5;width:-webkit-min-content;width:min-content}.u-title{font-size:18px;font-weight:700;text-align:center}.u-wrap{position:relative;-webkit-user-select:none;-ms-user-select:none;user-select:none}.u-over,.u-under{position:absolute}.u-under{overflow:hidden}.uplot canvas{display:block;height:100%;position:relative;width:100%}.u-axis{position:absolute}.u-legend{font-size:14px;margin:auto;text-align:center}.u-inline{display:block}.u-inline *{display:inline-block}.u-inline tr{margin-right:16px}.u-legend th{font-weight:600}.u-legend th>*{display:inline-block;vertical-align:middle}.u-legend .u-marker{background-clip:padding-box!important;height:1em;margin-right:4px;width:1em}.u-inline.u-live th:after{content:":";vertical-align:middle}.u-inline:not(.u-live) .u-value{display:none}.u-series>*{padding:4px}.u-series th{cursor:pointer}.u-legend .u-off>*{opacity:.3}.u-select{background:rgba(0,0,0,.07)}.u-cursor-x,.u-cursor-y,.u-select{pointer-events:none;position:absolute}.u-cursor-x,.u-cursor-y{left:0;top:0;will-change:transform;z-index:100}.u-hz .u-cursor-x,.u-vt .u-cursor-y{border-right:1px dashed #607d8b;height:100%}.u-hz .u-cursor-y,.u-vt .u-cursor-x{border-bottom:1px dashed #607d8b;width:100%}.u-cursor-pt{background-clip:padding-box!important;border:0 solid;border-radius:50%;left:0;pointer-events:none;position:absolute;top:0;will-change:transform;z-index:100}.u-axis.u-off,.u-cursor-pt.u-off,.u-cursor-x.u-off,.u-cursor-y.u-off,.u-select.u-off,.u-tooltip{display:none}.u-tooltip{grid-gap:12px;word-wrap:break-word;background:rgba(57,57,57,.9);border-radius:4px;color:#fff;font-family:monospace;font-size:10px;font-weight:500;line-height:1.4em;max-width:300px;padding:8px;pointer-events:none;position:absolute;z-index:100}.u-tooltip-data{align-items:center;display:flex;flex-wrap:wrap;font-size:11px;line-height:150%}.u-tooltip-data__value{font-weight:700;padding:4px}.u-tooltip__info{grid-gap:4px;display:grid}.u-tooltip__marker{height:12px;margin-right:4px;width:12px}.legendWrapper{cursor:default;display:flex;flex-wrap:wrap;margin-top:20px;position:relative}.legendGroup{margin:0 12px 0 0;padding:10px 6px}.legendGroupTitle{align-items:center;border-bottom:1px solid #ecebe6;display:flex;font-size:11px;margin-bottom:5px;padding:0 10px 5px}.legendGroupQuery{font-weight:700;margin-right:4px}.legendGroupLine{margin-right:10px}.legendItem{grid-gap:6px;align-items:start;background-color:#fff;cursor:pointer;display:grid;grid-template-columns:auto auto;justify-content:start;padding:7px 50px 7px 10px;transition:.2s ease}.legendItemHide{opacity:.5;text-decoration:line-through}.legendItem:hover{background-color:rgba(0,0,0,.1)}.legendMarker{border-style:solid;border-width:2px;box-sizing:border-box;height:12px;transition:.2s ease;width:12px}.legendLabel{font-size:11px;font-weight:400;line-height:12px}.legendFreeFields{cursor:pointer;padding:3px}.legendFreeFields:hover{text-decoration:underline}.legendFreeFields:not(:last-child):after{content:","}.panelDescription ul{line-height:2.2}.panelDescription a{color:#fff}.panelDescription code{background-color:rgba(0,0,0,.3);border-radius:2px;color:#fff;display:inline;font-size:inherit;font-weight:400;max-width:100%;padding:4px 6px}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -16,6 +16,7 @@ import router, {RouterOptions, routerOptions} from "../../router/index";
import DatePicker from "../Main/DatePicker/DatePicker";
import {useCardinalityState, useCardinalityDispatch} from "../../state/cardinality/CardinalityStateContext";
import {useEffect} from "react";
import ShortcutKeys from "../ShortcutKeys/ShortcutKeys";
const classes = {
logo: {
@ -110,7 +111,7 @@ const Header: FC = () => {
to={`${router.cardinality}${search}`}/>
</Tabs>
</Box>
<Box display="grid" gridTemplateColumns="repeat(3, auto)" gap={1} alignItems="center" ml="auto" mr={0}>
<Box display="flex" gap={1} alignItems="center" ml="auto" mr={0}>
{headerSetup?.timeSelector && <TimeSelector/>}
{headerSetup?.datePicker && (
<DatePicker
@ -120,6 +121,7 @@ const Header: FC = () => {
)}
{headerSetup?.executionControls && <ExecutionControls/>}
{headerSetup?.globalSettings && <GlobalSettings/>}
<ShortcutKeys/>
</Box>
</Toolbar>
</AppBar>;

View file

@ -70,11 +70,6 @@ const Legend: FC<LegendProps> = ({labels, query, onChange}) => {
</div>
</div>)}
</div>
<div className="legendWrapperHotkey">
<p><code>Left click</code> - select series</p>
<p><code>Ctrl</code> + <code>Left click</code> - toggle multiple series</p>
</div>
</>;
};

View file

@ -7,7 +7,7 @@
}
.legendGroup {
margin: 0 12px 24px 0;
margin: 0 12px 0 0;
padding: 10px 6px;
}
@ -77,26 +77,3 @@
.legendFreeFields:not(:last-child):after {
content: ",";
}
.legendWrapperHotkey {
display: flex;
align-items: center;
font-size: 11px;
}
.legendWrapperHotkey p {
margin-right: 20px;
}
.legendWrapperHotkey code {
display: inline;
max-width: 100%;
padding: 4px 6px;
border: 1px solid #dedede;
background-color: #f2f2f2;
border-radius: 2px;
font-weight: 400;
font-size: 10px;
color: #0a0a0a;
word-wrap: break-word;
}

View file

@ -0,0 +1,145 @@
import React, {FC, useState} from "preact/compat";
import Tooltip from "@mui/material/Tooltip";
import Button from "@mui/material/Button";
import Modal from "@mui/material/Modal";
import Box from "@mui/material/Box";
import Typography from "@mui/material/Typography";
import IconButton from "@mui/material/IconButton";
import KeyboardIcon from "@mui/icons-material/Keyboard";
import CloseIcon from "@mui/icons-material/Close";
import Divider from "@mui/material/Divider";
import {isMacOs} from "../../utils/detect-os";
const modalStyle = {
position: "absolute" as const,
top: "50%",
left: "50%",
p: 3,
minWidth: "300px",
maxWidth: "800px",
borderRadius: "4px",
bgcolor: "background.paper",
transform: "translate(-50%, -50%)",
};
const ctrlMeta = isMacOs() ? "Cmd" : "Ctrl";
const keyList = [
{
title: "Query",
list: [
{
keys: ["Enter"],
description: "Run"
},
{
keys: ["Shift", "Enter"],
description: "Multi-line queries"
},
{
keys: [ctrlMeta, "Arrow Up"],
description: "Previous command from the Query history"
},
{
keys: [ctrlMeta, "Arrow Down"],
description: "Next command from the Query history"
}
]
},
{
title: "Graph",
list: [
{
keys: [ctrlMeta, "Scroll Up"],
description: "Zoom in"
},
{
keys: [ctrlMeta, "Scroll Down"],
description: "Zoom out"
},
{
keys: [ctrlMeta, "Click and Drag"],
description: "Move the graph left/right"
},
]
},
{
title: "Legend",
list: [
{
keys: ["Mouse Click"],
description: "Select series"
},
{
keys: [ctrlMeta, "Mouse Click"],
description: "Toggle multiple series"
}
]
}
];
const ShortcutKeys: FC = () => {
const [openList, setOpenList] = useState(false);
return <>
<Tooltip title={"Shortcut keys"}>
<Button variant="contained" color="primary"
sx={{
color: "white",
border: "1px solid rgba(0, 0, 0, 0.2)",
minWidth: "34px",
padding: "6px 8px",
boxShadow: "none",
}}
startIcon={<KeyboardIcon style={{marginRight: "-8px", marginLeft: "4px"}}/>}
onClick={() => setOpenList(prev => !prev)}>
</Button>
</Tooltip>
<Modal open={openList} onClose={() => setOpenList(false)}>
<Box sx={modalStyle}>
<Box display="grid" gridTemplateColumns="1fr auto" alignItems="center" mb={2}>
<Typography id="modal-modal-title" variant="h6" component="h2">
Shortcut keys
</Typography>
<IconButton size="small" onClick={() => setOpenList(false)}>
<CloseIcon/>
</IconButton>
</Box>
<Box>
{keyList.map(section => (
<Box key={section.title} mb={3}>
<Typography variant="body1" component="h3" fontWeight="bold" mb={0.5}>
{section.title}
</Typography>
<Divider sx={{mb: 1}}/>
<Box>
{section.list.map(l => (
<Box
key={l.keys.join("+")}
display="grid"
gridTemplateColumns="160px 1fr"
alignItems="center"
mb={1}
>
<Box display="flex" alignItems="center" fontSize="10px" gap={"4px"}>
{l.keys.map((k, i) => (
<>
<code key={k} className="shortcut-key">{k}</code> {i !== l.keys.length - 1 ? "+" : ""}
</>
))}
</Box>
<Typography variant="body2" component="p">
{l.description}
</Typography>
</Box>
))}
</Box>
</Box>
))}
</Box>
</Box>
</Modal>
</>;
};
export default ShortcutKeys;

View file

@ -14,4 +14,18 @@ code {
.MuiAccordionSummary-content {
margin: 0 !important;
}
}
.shortcut-key {
display: inline-flex;
align-items: center;
justify-content: center;
padding: 2px 6px 0;
text-align: center;
border-radius: 4px;
font-size: 10px;
line-height: 22px;
white-space: nowrap;
border: 1px solid #dedede;
cursor: default;
}

View file

@ -53,6 +53,7 @@ See also [case studies](https://docs.victoriametrics.com/CaseStudies.html).
* [Superset BI with Victoria Metrics](https://cer6erus.medium.com/superset-bi-with-victoria-metrics-a109d3e91bc6)
* [VictoriaMetrics Source Code Analysis - Bloom filter](https://www.sobyte.net/post/2022-05/victoriametrics-bloomfilter/)
* [How we tried using VictoriaMetrics and Thanos at the same time](https://habr.com/ru/company/sravni/blog/672908/)
* [The (Almost) Infinitely Scalable Open Source Monitoring Dream](https://www.forbes.com/sites/adrianbridgwater/2022/08/16/the-almost-infinitely-scalable-open-source-monitoring-dream/)
## Our articles

View file

@ -15,15 +15,25 @@ The following tip changes can be tested by building VictoriaMetrics components f
## tip
**Update note 1:** [vmalert](https://docs.victoriametrics.com/vmalert.html) by default hides values of `-remoteWrite.url`, `-remoteRead.url` and `-datasource.url` in logs and at `http://vmalert:8880/flags` for security reasons. See the corresponding SECURITY change in the Chagelog below for additional info.
**Update note 2:** [vmalert](https://docs.victoriametrics.com/vmalert.html) by default points alert source url to `/vmalert/alert?...` aka [web UI](https://docs.victoriametrics.com/vmalert.html#web) instead of `/vmalert/api/v1/alert?...` aka JSON handler. The old behavior can be returned back by seting `-external.alert.source=vmalert/api/v1/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}` command-line flag.
* SECURITY: [vmalert](https://docs.victoriametrics.com/vmalert.html): do not expose `-remoteWrite.url`, `-remoteRead.url` and `-datasource.url` command-line flag values in logs and at `http://vmalert:8880/flags` page by default, since they may contain sensitive data such as auth keys. This aligns `vmalert` behaviour with [vmagent](https://docs.victoriametrics.com/vmagent.html), which doesn't expose `-remoteWrite.url` command-line flag value in logs and at `http://vmagent:8429/flags` page by default. Specify `-remoteWrite.showURL`, `-remoteRead.showURL` and `-datasource.showURL` command-line flags for showing values for the corresponding `-*.url` flags in logs. Thanks to @mble for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2965).
* FEATURE: return shorter error messages to Grafana and to other clients requesting [/api/v1/query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries) and [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) endpoints. This should simplify reading these errors by humans. The long error message with full context is still written to logs.
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): improve performance for heavy queries on systems with many CPU cores.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): generate additional per-target metrics - `scrape_series_limit`, `scrape_series_current` and `scrape_series_limit_samples_dropped` if series limit is set according to [these docs](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). This simplifies alerting on targets with the exceeded series limit. See [these docs](https://docs.victoriametrics.com/vmagent.html#automatically-generated-metrics) for details on these metrics.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for MX record types in [dns_sd_configs](https://docs.victoriametrics.com/sd_configs.html#dns_sd_configs) in the same way as Prometheus 2.38 [does](https://github.com/prometheus/prometheus/pull/10099).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `__meta_kubernetes_service_port_number` meta-label for `role: service` in [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs) in the same way as Prometheus 2.38 [does](https://github.com/prometheus/prometheus/pull/11002).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `__meta_kubernetes_pod_container_image` meta-label for `role: pod` in [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs) in the same way as Prometheus 2.38 [does](https://github.com/prometheus/prometheus/pull/11034).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): retry HTTP requests after some wait time during service discovery and during target scrapes if the server returns 429 HTTP status code (aka `Too many requests`). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2940).
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add a legend in the top right corner for shortcut keys. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2813).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `toTime()` template function in the same way as Prometheus 2.38 [does](https://github.com/prometheus/prometheus/pull/10993). See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/template_reference/#numbers).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `$alertID` and `$groupID` template variables. These variables may be used for templating annotations or `-external.alert.source` command-line flag. See the full list of supported variables [here](https://docs.victoriametrics.com/vmalert.html#templating).
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): point alert source to [vmalert's UI](https://docs.victoriametrics.com/vmalert.html#web) at `/vmalert/alert?...` instead of JSON handler at `/vmalert/api/v1/alert?...`. This improves user experience. The old behavior can be returned back by setting `-external.alert.source=vmalert/api/v1/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}` command-line flag.
* BUGFIX: prevent from excess CPU usage when the storage enters [read-only mode](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#readonly-mode).
* BUGFIX: improve performance for requests to [/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and [/api/v1/label/.../values](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues) when the filter in the `match[]` query arg matches small number of time series. The performance for this case has been reduced in [v1.78.0](https://docs.victoriametrics.com/CHANGELOG.html#v1780). See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1533) issues.
## [v1.80.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.80.0)

View file

@ -325,3 +325,15 @@ The query engine may behave differently for some functions. Please see [this art
## If downsampling and deduplication are enabled how will this work?
[Deduplication](https://docs.victoriametrics.com/#deduplication) is a special case of zero-offset [downsampling](https://docs.victoriametrics.com/#downsampling). So, if both downsampling and deduplication are enabled, then deduplication is replaced by zero-offset downsampling
## How to upgrade or downgrade VictoriaMetrics without downtime?
Single-node VictoriaMetrics cannot be restarted / upgraded or downgraded without downtime, since it needs to be gracefully shut down and then started again. See [how to upgrade VictoriaMetrics](https://docs.victoriametrics.com/#how-to-upgrade-victoriametrics).
[Cluster version of VictoriaMetrics](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) can be restarted / upgraded / downgraded without downtime if the following conditions are met:
* If every component of the cluster - `vminsert`, `vmselect` and `vmstorage` - has at least 2 instances.
* If the cluster has enough compute resources (CPU, RAM, disk IO, network bandwidth) to perform rolling restart of all its components.
* If the version used for upgrade / downgrade is compatible with the currently running version. The [CHANGELOG](https://docs.victoriametrics.com/CHANGELOG.html) contains compatibility notes for the published releases.
See [updating / reconfiguring cluster nodes](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#updating--reconfiguring-cluster-nodes) for details on cluster upgrade.

View file

@ -80,6 +80,8 @@ MetricsQL provides the following functions:
See also [implicit query conversions](#implicit-query-conversions).
The list of supported rollup functions:
#### absent_over_time
`absent_over_time(series_selector[d])` returns 1 if the given lookbehind window `d` doesn't contain raw samples. Otherwise it returns an empty result. This function is supported by PromQL. See also [present_over_time](#present_over_time).
@ -377,6 +379,8 @@ See also [implicit query conversions](#implicit-query-conversions).
See also [implicit query conversions](#implicit-query-conversions).
The list of supported transform functions:
#### abs
`abs(q)` calculates the absolute value for every point of every time series returned by `q`. This function is supported by PromQL.
@ -717,6 +721,8 @@ See also [implicit query conversions](#implicit-query-conversions).
See also [implicit query conversions](#implicit-query-conversions).
The list of supported label manipulation functions:
#### alias
`alias(q, "name")` sets the given `name` to all the time series returned by `q`. For example, `alias(up, "foobar")` would rename `up` series to `foobar` series.
@ -803,6 +809,8 @@ sum by (__name__) (
See also [implicit query conversions](#implicit-query-conversions).
The list of supported aggregate functions:
#### any
`any(q) by (group_labels)` returns a single series per `group_labels` out of time series returned by `q`. See also [group](#group).

View file

@ -1139,7 +1139,11 @@ VictoriaMetrics also may scrape Prometheus targets - see [these docs](#how-to-sc
VictoriaMetrics supports Prometheus-compatible relabeling for all the ingested metrics if `-relabelConfig` command-line flag points
to a file containing a list of [relabel_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) entries.
The `-relabelConfig` also can point to http or https url. For example, `-relabelConfig=https://config-server/relabel_config.yml`.
See [this article with relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2).
The following docs can be useful in understanding the relabeling:
* [Cookbook for common relabeling tasks](https://docs.victoriametrics.com/relabeling.html).
* [Relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2).
The `-relabelConfig` files can contain special placeholders in the form `%{ENV_VAR}`, which are replaced by the corresponding environment variable values.

View file

@ -1143,7 +1143,11 @@ VictoriaMetrics also may scrape Prometheus targets - see [these docs](#how-to-sc
VictoriaMetrics supports Prometheus-compatible relabeling for all the ingested metrics if `-relabelConfig` command-line flag points
to a file containing a list of [relabel_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) entries.
The `-relabelConfig` also can point to http or https url. For example, `-relabelConfig=https://config-server/relabel_config.yml`.
See [this article with relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2).
The following docs can be useful in understanding the relabeling:
* [Cookbook for common relabeling tasks](https://docs.victoriametrics.com/relabeling.html).
* [Relabeling tips and tricks](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2).
The `-relabelConfig` files can contain special placeholders in the form `%{ENV_VAR}`, which are replaced by the corresponding environment variable values.

View file

@ -1,5 +1,5 @@
---
sort: 25
sort: 26
---
# Guides

View file

@ -239,7 +239,7 @@ But migration from InfluxDB might get easier when using [vmctl](https://docs.vic
VictoriaMetrics command-line tool. See more about
migrating [from InfluxDB v1.x versions](https://docs.victoriametrics.com/vmctl.html#migrating-data-from-influxdb-1x).
Migrating data from InfluxDB v2.x is not supported yet. But there is
useful [3rd party solution]((https://docs.victoriametrics.com/vmctl.html#migrating-data-from-influxdb-2x)) for this.
useful [3rd party solution](https://docs.victoriametrics.com/vmctl.html#migrating-data-from-influxdb-2x) for this.
Please note, that data migration is a backfilling process. So, please
consider [backfilling tips](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#backfilling).

View file

@ -1,5 +1,5 @@
---
sort: 27
sort: 28
---
# Managed VictoriaMetrics

View file

@ -1,5 +1,5 @@
---
sort: 26
sort: 27
---
# VictoriaMetrics Operator

466
docs/relabeling.md Normal file
View file

@ -0,0 +1,466 @@
---
sort: 25
---
# Relabeling cookbook
VictoriaMetrics and [vmagent](https://docs.victoriametrics.com/vmagent.html) support
[Prometheus-compatible relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling)
with [additional enhancements](https://docs.victoriametrics.com/vmagent.html#relabeling-enhancements).
The relabeling is mostly used for the following tasks:
* Dropping unneeded scrape targets during [service discovery](https://docs.victoriametrics.com/sd_configs.html#prometheus-service-discovery).
See [how to drop unneded targets with relabeling](#how-to-drop-discovered-targets).
* Adding or updating static labels at scrape targets. See [how to add labels to scrape targets](#how-to-add-labels-to-scrape-targets).
* Copying target labels from another labels. See [how to copy labels in scrape targets](#how-to-copy-labels-in-scrape-targets).
* Modifying scrape urls for discovered targets. See [how to modify scrape urls in targets](#how-to-modify-scrape-urls-in-targets).
* Modifying `instance` and `job` labels. See [how to modify instance and job](#how-to-modify-instance-and-job).
* Extracting label parts into another labels. See [how to extract label parts](#how-to-extract-label-parts).
* Removing prefixes from target label names. See [how to remove prefixes from target label names](#how-to-remove-prefixes-from-target-label-names).
* Removing some labels from discovered targets. See [how to remove labels from targets](#how-to-remove-labels-from-targets).
* Dropping some metrics during scape. See [how to drop metrics during scrape](#how-to-drop-metrics-during-scrape).
* Removing some labels from scraped metrics. See [how to remove labels from metrics](#how-to-remove-labels-from-metrics).
* Removing some labels from metrics matching some [series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors).
See [how to remove labels from metrics subset](#how-to-remove-labels-from-metrics-subset).
See also [relabeling docs at vmagent](https://docs.victoriametrics.com/vmagent.html#relabeling).
## How to remove labels from metrics subset
Sometimes it may be needed to remove labels from a subset of scraped metrics, while leaving these labels in the rest of scraped metrics.
In this case the `if` [series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
can be applied to `action: labeldrop` or `action: labelkeep`.
For example, the following config drops labels with names starting from `foo_` prefix from metrics matching `a{b="c"}` series selector:
```yaml
scrape_configs:
- job_name: test
static_configs:
- targets: [host123]
metric_relabel_configs:
- action: labeldrop
if: 'a{b="c"}'
regex: "foo_.*"
```
See also [how to remove labels from scraped metrics](#how-to-remove-labels-from-scraped-metrics).
## How to remove labels from scraped metrics
Sometimes it may be needed to remove labels from scraped metrics. For example, if some labels
lead to [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)
or [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues,
then it may be a good idea to drop these labels during scrapes.
This can be done with `action: labeldrop` or `action: labelkeep` relabeling rules at `metric_relabel_configs` section:
* `action: labeldrop` drops labels with names matching the given `regex` option
* `action: labelkeep` drops labels with names not matching the given `regex` option
For example, the following config drops labels with names starting with `foo_` prefix from all the metrics scraped from the `http://host123/metrics`:
```yaml
scrape_configs:
- job_name: test
static_configs:
- targets: [host123]
metric_relabel_configs:
- action: labeldrop
regex: "foo_.*"
```
The `regex` option can contain arbitrary regular expression in [RE2 format](https://github.com/google/re2/wiki/Syntax).
The `regex` option is applied to every label name in the target. It is automatically anchored, so it must match the whole label name.
The label name is left as is if the `regex` doesn't match it.
Important notes:
* Labels with `__` prefix are automatically removed after the relabeling, so there is no need in removing them with relabeling rules.
* Make sure that metrics exposed by the target can be uniquely identified by their names
and the remaining labels after label removal. Otherwise duplicate metrics with duplicate timestams
and different values will be pushed to the storage. This is an undesired issue in most cases.
See also [useful tips for metric relabeling](#useful-tips-for-metric-relabeling).
## How to drop metrics during scrape
Sometimes it is needed to drop some metrics during scrapes. For example, if some metrics result
in [high cardinality](https://docs.victoriametrics.com/FAQ.html#what-is-high-cardinality)
or [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate) issues,
then it may be a good idea to drop these metrics during scrapes. This can be done with the `action: drop` or `action: keep`
relabeling rules at `metric_relabel_configs` section:
* `action: drop` drops all the metrics, which match the `if` [series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
* `action: keep` drops all the metrics, which don't match the `if` [series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
For example, the following config drops all the metrics obtained from `http://host123/metrics`, which start with `foo_` prefix:
```yaml
scrape_configs:
- job_name: test
static_configs:
- targets: [host123]
metric_relabel_configs:
- if: '{__name__=~"foo_.*"}'
action: drop
```
Note that the relabeling config is specified under `metric_relabel_configs` section instead of `relabel_configs` section:
* The `relabel_configs` is applied to the configured/discovered targets.
* The `metric_relabel_configs` is applied to metrics scraped from the configured/discovered targets.
See also [useful tips for metric relabeling](#useful-tips-for-metric-relabeling).
## How to remove labels from a subset of targets
Sometimes it is needed to remove some labels from a subset of [discovered targets](https://docs.victoriametrics.com/sd_configs.html),
while leaving these labels in the rest of discovered targets.
In this case the `if` [selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
can be added to `action: labeldrop` or `action: labelkeep` relabeling rule.
For example, the following config discovers pod targets in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs),
[extracts pod-level labels](#how-to-remove-prefixes-from-target-label-names) into labels with `foo_` prefix and then drops all the labels
with `foo_bar_` prefix in their names for targets matching `{__address__=~"pod123.+"}` selector:
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: labelmap
regex: "__meta_kubernetes_pod_label_(.+)"
replacement: "foo_$1"
- action: labeldrop
if: '{__address__=~"pod123.+"}'
regex: "foo_bar_.*"
```
See also [how to remove labels from targets](#how-to-remove-labels-from-targets).
## How to remove labels from targets
Sometimes it is needed to remove some labels from [discovered targets](https://docs.victoriametrics.com/sd_configs.html).
In this case the `action: labeldrop` and `action: labelkeep` relabeling options can be used:
* `action: labeldrop` drops all the labels with names matching the `regex` option
* `action: labelkeep` drops all the labels with names not matching the `regex` option
For example, the following config discovers pod targets in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs),
[extracts pod-level labels](#how-to-remove-prefixes-from-target-label-names) into labels with `foo_` prefix and then drops all the labels
with `foo_bar_` prefix in their names:
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: labelmap
regex: "__meta_kubernetes_pod_label_(.+)"
replacement: "foo_$1"
- action: labeldrop
regex: "foo_bar_.*"
```
The `regex` option can contain arbitrary regular expression in [RE2 format](https://github.com/google/re2/wiki/Syntax).
The `regex` option is applied to every label name in the target. It is automatically anchored, so it must match the whole label name.
The label name is left as is if the `regex` doesn't match it.
Important notes:
* Labels with `__` prefix are automatically removed after the relabeling, so there is no need in removing them with relabeling rules.
* Do not remove `instance` and `job` labels, since this may result in duplicate scrape targets with identical sets of labels.
See also [useful tips for target relabeling](#useful-tips-for-target-relabeling).
## How to remove prefixes from target label names
Sometimes it is needed to remove `__meta_*` prefixes from meta-labels of the [discovered targets](https://docs.victoriametrics.com/sd_configs.html).
For example, [Kubernetes service discovery](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs) adds `__meta_kubernetes_pod_label_<labelname>`
labels per each pod-level label. In this case it may be needed to leave only the `<labelname>` part of such label names,
while removing the `__meta_kubernetes_pod_label_` prefix. This can be done with `action: labelmap` relabeling option:
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: labelmap
regex: "__meta_kubernetes_pod_label_(.+)"
replacement: "$1"
```
The `regex` option can contain arbitrary regular expression in [RE2 format](https://github.com/google/re2/wiki/Syntax).
The `regex` option is applied to every label name in the target. It is automatically anchored, so it must match the whole label name.
It can contain capture groups such as `(.+)` in the config above. These capture groups can be referenced then inside `replacement` option
with the `$N` syntax, where `N` is the number of the capture group in `regex`. The first capture group has the `$1` reference.
The label name is left as is if the `regex` doesn't match it.
See also [useful tips for target relabeling](#useful-tips-for-target-relabeling).
## How to extract label parts
Relabeling allows extracting parts from label values and storing them into arbitrary labels.
This is performed with `regex` and `replacement` options in relabeling rules.
For example, the following config discovers pod targets in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs),
extracts `bar` part from `foo/bar` container name and stores it into the `xyz` label with `abc_` prefix:
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_name]
regex: "[^/]+/(.+)"
replacement: "abc_$1"
target_label: xyz
```
The `regex` option can contain arbitrary regular expression in [RE2 format](https://github.com/google/re2/wiki/Syntax).
The `regex` option is automatically anchored, so it must match the whole value from `source_labels`.
It can contain capture groups such as `(.+)` in the config above. These capture groups can be referenced then inside `replacement` option
with the `$N` syntax, where `N` is the number of the capture group in `regex`. The first capture group has the `$1` reference.
It is possible to construct a label from multiple parts of different labels. In this case just specify the needed source labels inside `source_labels` list.
The values of labels specified in `source_labels` list are joined with `;` separator by default before being matched against the `regex`.
The separator can be overriden via `separator` option.
If the `regex` doesn't match the value constructed from `source_labels`, then the relabeling rule is skipped and the remaining relabeling rules are executed.
See also [useful tips for target relabeling](#useful-tips-for-target-relabeling).
## How to modify instance and job
Single-node VictoriaMetrics and [vmagent](https://docs.victoriametrics.com/vmagent.html) automatically add `instance` and `job` labels per each discovered target:
* The `job` label is set to `job_name` value specified in the corresponding [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
* The `instance` label is set to the final `__address__` label value after target-level relabeling.
The `__address__` label value is automatically set to the most suitable value depending
on the used [service discovery type](https://docs.victoriametrics.com/sd_configs.html#supported-service-discovery-configs).
The `__address__` label can be overriden during relabeling - see [these docs](#how-to-modify-scrape-urls-in-targets).
Both `instance` and `job` labels can be overriden during relabeling. For example, the following config discovers pod targets
in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs) and overrides `job` label from `k8s` to `foo`:
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- target_label: job
replacement: foo
```
See also [useful tips for target relabeling](#useful-tips-for-target-relabeling).
## How to modify scrape urls in targets
URLs for scrape targets are composed of the following parts:
* Scheme (e.g. `http` or `https`). The scheme is available during target relabeling in a special label - `__scheme__`.
By default the scheme is set to `http`. It can be overriden either by specifying the `scheme` option
at [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) level
or by updating the `__scheme__` label during relabeling.
* Host and port (e.g. `host12:3456`). This information is available during target relabeling in a special label - `__address__`.
Every [supported service discovery type](https://docs.victoriametrics.com/sd_configs.html#supported-service-discovery-configs)
sets the `__address__` label to the most suitable value. Sometimes this value needs to be modified. In this case
just update the `__address__` label during relabeling to the needed value.
The port part is optional. If it is missing, then it is automatically set either to `80` or `443` depending
on the used scheme (`http` or `https`).
The final `__address__` label is automatically converted into `instance` label per each target unless the `instance`
label is explicitly set during relabeling.
* URL path (e.g. `/metrics`). This information is available during target relabeling in a special label - `__metrics_path__`.
By default the `__metrics_path__` is set to `/metrics`. It can be overriden either by specifying the `metrics_path`
option at [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs)
or by updating the `__metrics_path__` label during relabeling.
* Query args (e.g. `?foo=bar&baz=xyz`). This information is available during target relabeling in special labels
with `__param_` prefix. For example, `__param_foo` would have the `bar` value, while `__param_baz` would have the `xyz` value
for `?foo=bar&baz=xyz` query string. The query args can be specified either via `params` section
at [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs)
or by updating/setting the corresponding `__param_*` labels during relabeling.
The resulting scrape url looks like the following:
```
<__scheme__> + "://" + <__address__> + <__metrics_path__> + <"?" + query_args_from_param_labels>
```
It is expected that the target exposes metrics
in [Prometheus text exposition format](https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format)
at the resulting scrape url.
Given the scrape url construction rules above, the following config discovers pod targets
in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs)
and constructs per-target scrape url as `https://<pod_name>/foo/bar?baz=<container_name>`:
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
metrics_path: /foo/bar
relabel_configs:
- target_label: __scheme__
replacement: https
- source_labels: [__meta_kubernetes_pod_name]
target_label: __address__
- source_labels: [__meta_kubernetes_pod_container_name]
target_label: __param_baz
```
See also [useful tips for target relabeling](#useful-tips-for-target-relabeling).
## How to copy labels in scrape targets
Labels can be copied by specifying the source labels via `source_labels` relabeling option
and specifying the target label via `target_label` relabeling option.
For example, the following config copies `__meta_kubernetes_pod_name` label to `pod` label
for all the discovered pods in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs):
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
```
Note that the `source_labels` option accepts a list of labels in square brackets. If multiple labels are specified
in the `source_labels` list, then the specified label values are joined into a single string with `;` delimiter by default.
The delimiter can be modified by specifying it via `separator` option.
For example, the following config sets the `pod_name:container_port` value to the `host_port` label
for all the discovered pod targets in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs):
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_name, __meta_kubernetes_pod_container_port_number]
separator: ":"
target_label: host_port
```
See also [useful tips for target relabeling](#useful-tips-for-target-relabeling).
## How to add labels to scrape targets
Additional labels can be added to scrape targets by specifying the label name in `target_label` relabeling option
and by specifying the label value in `replacement` relabeling option.
The same approach can be used for updating already existing label values at target level.
For example, the following config adds `{foo="bar"}` label to all the discovered pods in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs):
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
relabel_configs:
- target_label: "foo"
replacement: "bar"
```
The labels, which are added to the target, are automatically added to all the metrics scraped from the target.
For example, if the target exposes the metric `metric{label="value"}`, then the metric is transformed into `metric{label="value",foo="bar"}`
before being sent to the storage.
If the metric exported by the target contains the same label as the target itself, then the `exported_` prefix is added to the exported label name.
For example, if the target exposes the metric `metric{foo="baz"}`, then the metric is transformed into `metric{exported_foo="baz",foo="bar"}`.
This behaviour can be changed by specifying `honor_labels: true` option at the given scrape config. In this case the exported label overrides
the target's label. In this case the `metric{foo="baz"}` stays the same. Example config with `honor_labels: true`:
```yaml
scrape_configs:
- job_name: k8s
kubernetes_sd_configs:
- role: pod
honor_labels: true
relabel_configs:
- target_label: "foo"
replacement: "bar"
```
See also [useful tips for target relabeling](#useful-tips-for-target-relabeling).
## How to drop discovered targets
If a particular discovered target shouldn't be scraped, then `action: keep` or `action: drop` relabeling rules
must be used inside `relabel_configs` section.
The `action: keep` keeps only scrape targets with labels matching the `if` [selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors),
while dropping the rest of targets. For example, the following config discovers pod targets in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs)
and scrapes only pods with names starting with `foo` prefix:
```yaml
scrape_configs:
- job_name: foo_pods
kubernetes_sd_configs:
- role: pod
relabel_configs:
- if: '{__meta_kubernetes_pod_name=~"foo.*"}'
action: keep
```
The `action: drop` drops all the scrape targets with labels matching the `if` [selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors),
while keeping the rest of targets. For example, the followign config discovers pod targets in [Kubernetes](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs)
and scrapes only pods with names starting with prefixes other than `foo`:
```yaml
scrape_configs:
- job_name: not_foo_pods
kubernetes_sd_configs:
- role: pod
relabel_configs:
- if: '{__meta_kubernetes_pod_name=~"foo.*"}'
action: drop
```
See also [useful tips for target relabeling](#useful-tips-for-target-relabeling).
# Useful tips for target relabeling
* Every discovered target contains a set of meta-labels, which start with `__meta_` prefix.
The specific sets of labels per each supported service discovery option are listed
[here](https://docs.victoriametrics.com/sd_configs.html#prometheus-service-discovery).
* Every discovered target contains additional labels with `__` prefix other than `__meta_` labels.
See [these docs](#how-to-modify-scrape-urls-in-targets) for more details.
* All the labels, which start with `__` prefix, are automatically removed from targets after the relabeling.
So it is common practice to store temporary labels with names starting with `__` during target relabeling.
* All the target-level labels are automatically added to all the metrics scraped from targets.
* The list of discovered scrape targets with all the discovered meta-labels is available at `http://vmagent:8429/service-discovery` page for `vmagent`
and at `http://victoriametrics:8428/service-discovery` page for single-node VictoriaMetrics.
* The list of active targets with the final set of labels left after relabeling is available at `http://vmagent:8429/targets` page for `vmagent`
and at `http://victoriametrics:8428/targets` page for single-node VictoriaMetrics.
## Useful tips for metric relabeling
* All the labels, which start with `__` prefix, are automatically removed from metrics after the relabeling.
So it is common practice to store temporary labels with names startigh with `__` during metrics relabeling.
* All the target-level labels are automatically added to all the metrics scraped from targets,
so target-level labels are available during metrics relabeling.

View file

@ -1106,7 +1106,7 @@ scrape_configs:
# honor_labels controls how to handle conflicts between labels that are
# already present in scraped data and labels that would be attached
# server-side ("job" and "instance" labels, manually configured target
# labels, and labels generated by service discovery implementations).
# labels, labels generated by service discovery, etc.
#
# If honor_labels is set to "true", label conflicts are resolved by keeping label
# values from the scraped data and ignoring the conflicting server-side labels.

View file

@ -263,37 +263,37 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
up == 0
```
* `scrape_duration_seconds` - this metric exposes scrape duration. This allows monitoring slow scrapes. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns scrapes, which take more than 1.5 seconds to complete:
* `scrape_duration_seconds` - the duration of the scrape for the given target. This allows monitoring slow scrapes. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns scrapes, which take more than 1.5 seconds to complete:
```metricsql
scrape_duration_seconds > 1.5
```
* `scrape_timeout_seconds` - this metric exposes the configured timeout for the current scrape target (aka `scrape_timeout`). This allows detecting targets with scrape durations close to the configured scrape timeout. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets (identified by `instance` label), which take more than 80% of the configured `scrape_timeout` during scrapes:
* `scrape_timeout_seconds` - the configured timeout for the current scrape target (aka `scrape_timeout`). This allows detecting targets with scrape durations close to the configured scrape timeout. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets (identified by `instance` label), which take more than 80% of the configured `scrape_timeout` during scrapes:
```metricsql
scrape_duration_seconds / scrape_timeout_seconds > 0.8
```
* `scrape_samples_scraped` - this metric exposes the number of samples (aka metrics) parsed per each scrape. This allows detecting targets, which expose too many metrics. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets, which expose more than 10000 metrics:
* `scrape_samples_scraped` - the number of samples (aka metrics) parsed per each scrape. This allows detecting targets, which expose too many metrics. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets, which expose more than 10000 metrics:
```metricsql
scrape_samples_scraped > 10000
```
* `scrape_samples_limit` - this metric exposes the configured limit on the number of metrics the given target can expose. The limit can be set via `sample_limit` option at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs). This allows detecting targets, which expose too many metrics compared to the configured `sample_limit`. For example, the following query returns targets (identified by `instance` label), which expose more than 80% metrics compared to the configed `sample_limit`:
* `scrape_samples_limit` - the configured limit on the number of metrics the given target can expose. The limit can be set via `sample_limit` option at [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs). This metric is exposed only if the `sample_limit` is set. This allows detecting targets, which expose too many metrics compared to the configured `sample_limit`. For example, the following query returns targets (identified by `instance` label), which expose more than 80% metrics compared to the configed `sample_limit`:
```metricsql
scrape_samples_scraped / scrape_samples_limit > 0.8
```
* `scrape_samples_post_metric_relabeling` - this metric exposes the number of samples (aka metrics) left after applying metric-level relabeling from `metric_relabel_configs` section (see [relabeling docs](#relabeling) for more details). This allows detecting targets with too many metrics after the relabeling. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets with more than 10000 metrics after the relabeling:
* `scrape_samples_post_metric_relabeling` - the number of samples (aka metrics) left after applying metric-level relabeling from `metric_relabel_configs` section (see [relabeling docs](#relabeling) for more details). This allows detecting targets with too many metrics after the relabeling. For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets with more than 10000 metrics after the relabeling:
```metricsql
scrape_samples_post_metric_relabeling > 10000
```
* `scrape_series_added` - this metric exposes **an approximate** number of new series the given target generates during the current scrape. This metric allows detecting targets (identified by `instance` label), which lead to [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets, which generate more than 1000 new series during the last hour:
* `scrape_series_added` - **an approximate** number of new series the given target generates during the current scrape. This metric allows detecting targets (identified by `instance` label), which lead to [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). For example, the following [MetricsQL query](https://docs.victoriametrics.com/MetricsQL.html) returns targets, which generate more than 1000 new series during the last hour:
```metricsql
sum_over_time(scrape_series_added[1h]) > 1000
@ -301,10 +301,24 @@ Extra labels can be added to metrics collected by `vmagent` via the following me
`vmagent` sets `scrape_series_added` to zero when it runs with `-promscrape.noStaleMarkers` command-line option (e.g. when [staleness markers](#prometheus-staleness-markers) are disabled).
* `scrape_series_limit` - the limit on the number of unique time series the given target can expose according to [these docs](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). This metric is exposed only if the series limit is set.
* `scrape_series_current` - the number of unique series the given target exposed so far. This metric is exposed only if the series limit is set according to [these docs](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). This metric allows alerting when the number of exposed series by the given target reaches the limit. For example, the following query would alert when the target exposes more than 90% of unique series compared to the configured limit.
```metricsql
scrape_series_current / scrape_series_limit > 0.9
```
* `scrape_series_limit_samples_dropped` - exposes the number of dropped samples during the scrape because of the exceeded limit on the number of unique series. This metric is exposed only if the series limit is set according to [these docs](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). This metric allows alerting when scraped samples are dropped because of the exceeded limit. For example, the following query alerts when at least a single sample is dropped because of the exceeded limit during the last hour:
```metricsql
sum_over_time(scrape_series_limit_samples_dropped[1h]) > 0
```
## Relabeling
VictoriaMetrics components (including `vmagent`) support [Prometheus-compatible relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) with [additional enhancements](#relabeling-enhancements) at various stages of data processing. The relabeling can be defined in the following places processed by `vmagent`:
VictoriaMetrics components support [Prometheus-compatible relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) with [additional enhancements](#relabeling-enhancements) at various stages of data processing. The relabeling can be defined in the following places processed by `vmagent`:
* At the `scrape_config -> relabel_configs` section in `-promscrape.config` file. This relabeling is used for modifying labels in discovered targets and for dropping unneded targets. This relabeling can be debugged by passing `relabel_debug: true` option to the corresponding `scrape_config` section. In this case `vmagent` logs target labels before and after the relabeling and then drops the logged target.
* At the `scrape_config -> metric_relabel_configs` section in `-promscrape.config` file. This relabeling is used for modifying labels in scraped metrics and for dropping unneeded metrics. This relabeling can be debugged by passing `metric_relabel_debug: true` option to the corresponding `scrape_config` section. In this case `vmagent` logs metrics before and after the relabeling and then drops the logged metrics.
@ -315,6 +329,7 @@ All the files with relabeling configs can contain special placeholders in the fo
The following articles contain useful information about Prometheus relabeling:
* [Cookbook for common relabeling tasks](https://docs.victoriametrics.com/relabeling.html)
* [How to use Relabeling in Prometheus and VictoriaMetrics](https://valyala.medium.com/how-to-use-relabeling-in-prometheus-and-victoriametrics-8b90fc22c4b2)
* [Life of a label](https://www.robustperception.io/life-of-a-label)
* [Discarding targets and timeseries with relabeling](https://www.robustperception.io/relabelling-can-discard-targets-timeseries-and-alerts)
@ -566,10 +581,24 @@ By default `vmagent` doesn't limit the number of time series each scrape target
* Via `series_limit` config option at `scrape_config` section. This limit is applied individually to all the scrape targets defined in the given `scrape_config`.
* Via `__series_limit__` label, which can be set with [relabeling](#relabeling) at `relabel_configs` section. This limit is applied to the corresponding scrape targets. Typical use case: to set the limit via [Kubernetes annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) for targets, which may expose too high number of time series.
All the scraped metrics are dropped for time series exceeding the given limit. The exceeded limit can be [monitored](#monitoring) via `promscrape_series_limit_rows_dropped_total` metric.
See also `sample_limit` option at [scrape_config section](https://docs.victoriametrics.com/sd_configs.html#scrape_configs).
Scraped metrics are dropped for time series exceeding the given limit.
`vmagent` creates the following additional per-target metrics for targets with non-zero series limit:
- `scrape_series_limit_samples_dropped` - the number of dropped samples during the scrape when the unique series limit is exceeded.
- `scrape_series_limit` - the series limit for the given target.
- `scrape_series_current` - the current number of series for the given target.
These metrics are automatically sent to the configured `-remoteWrite.url` alongside with the scraped per-target metrics.
These metrics allow building the following alerting rules:
- `scrape_series_current / scrape_series_limit > 0.9` - alerts when the number of series exposed by the target reaches 90% of the limit.
- `rate(scrape_series_samples_dropped_total) > 0` - alerts when some samples are dropped because the series limit on a particular target is reached.
By default `vmagent` doesn't limit the number of time series written to remote storage systems specified at `-remoteWrite.url`. The limit can be enforced by setting the following command-line flags:
* `-remoteWrite.maxHourlySeries` - limits the number of unique time series `vmagent` can write to remote storage systems during the last hour. Useful for limiting the number of active time series.
@ -577,10 +606,14 @@ By default `vmagent` doesn't limit the number of time series written to remote s
Both limits can be set simultaneously. If any of these limits is reached, then samples for new time series are dropped instead of sending them to remote storage systems. A sample of dropped series is put in the log with `WARNING` level.
The exceeded limits can be [monitored](#monitoring) with the following metrics:
`vmagent` exposes the following metrics at `http://vmagent:8429/metrics` page (see [monitoring docs](#monitoring) for details):
* `vmagent_hourly_series_limit_rows_dropped_total` - the number of metrics dropped due to exceeded hourly limit on the number of unique time series.
* `vmagent_hourly_series_limit_max_series` - the hourly series limit set via `-remoteWrite.maxHourlySeries`.
* `vmagent_hourly_series_limit_current_series` - the current number of unique series registered during the last hour.
* `vmagent_daily_series_limit_rows_dropped_total` - the number of metrics dropped due to exceeded daily limit on the number of unique time series.
* `vmagent_daily_series_limit_max_series` - the daily series limit set via `-remoteWrite.maxDailySeries`.
* `vmagent_daily_series_limit_current_series` - the current number of unique series registered during the last day.
These limits are approximate, so `vmagent` can underflow/overflow the limit by a small percentage (usually less than 1%).

View file

@ -197,9 +197,25 @@ annotations:
[ <labelname>: <tmpl_string> ]
```
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations to format data, iterate over it or execute expressions.
#### Templating
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations to format data, iterate over
or execute expressions.
The following variables are available in templating:
| Variable | Description | Example |
|------------------------------------|-----------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------|
| $value or .Value | The current alert's value. Avoid using value in labels, it may cause unexpected issues. | {% raw %}"Number of connections is {{ $value }}{% endraw %} |
| $labels or .Labels | The list of labels of the current alert. Use as ".Labels.<label_name>". | {% raw %}"Too high number of connections for {{ .Labels.instance }}"{% endraw %} |
| $alertID or .AlertID | The current alert's ID generated by vmalert. | {% raw %}"Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}"{% endraw %} |
| $groupID or .GroupID | The current alert's group ID generated by vmalert. | {% raw %}"Link: vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}"{% endraw %} |
| $expr or .Expr | Alert's expression. Can be used for generating links to Grafana or other systems. | {% raw %}"/api/v1/query?query={{ $expr&vert;quotesEscape&vert;queryEscape }}"{% endraw %} |
| $externalLabels or .ExternalLabels | List of labels configured via `-external.label` command-line flag. | {% raw %}"Issues with {{ $labels.instance }} (datacenter-{{ $externalLabels.dc }})"{% endraw %} |
| $externalURL or .ExternalURL | URL configured via `-external.url` command-line flag. Used for cases when vmalert is hidden behind proxy. | {% raw %}"Visit {{ $externalURL }} for more details"{% endraw %} |
Additionally, `vmalert` provides some extra templating functions
listed [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/notifier/template_func.go) and [reusable templates](#reusable-templates).
listed [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/notifier/template_func.go)
and [reusable templates](#reusable-templates).
#### Reusable templates
@ -488,8 +504,9 @@ or time series modification via [relabeling](https://docs.victoriametrics.com/vm
* `http://<vmalert-addr>` - UI;
* `http://<vmalert-addr>/api/v1/rules` - list of all loaded groups and rules;
* `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
* `http://<vmalert-addr>/vmalert/api/v1/alert?group_id=<group_id>&alert_id=<alert_id>"` - get alert status by ID.
* `http://<vmalert-addr>/vmalert/api/v1/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in JSON format.
Used as alert source in AlertManager.
* `http://<vmalert-addr>/vmalert/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in web UI.
* `http://<vmalert-addr>/metrics` - application metrics.
* `http://<vmalert-addr>/-/reload` - hot configuration reload.
@ -624,7 +641,7 @@ Pass `-help` to `vmalert` in order to see the full list of supported
command-line flags with their descriptions.
The shortlist of configuration flags is the following:
{% raw %}
```
-clusterMode
If clusterMode is enabled, then vmalert automatically adds the tenant specified in config groups to -datasource.url, -remoteWrite.url and -remoteRead.url. See https://docs.victoriametrics.com/vmalert.html#multitenancy
@ -699,8 +716,8 @@ The shortlist of configuration flags is the following:
-evaluationInterval duration
How often to evaluate the rules (default 1m0s)
-external.alert.source string
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
eg. 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'.If empty '/vmalert/api/v1/alert?group_id=&alert_id=' is used
External Alert Source allows to override the Source link for alerts sent to AlertManager for cases where you want to build a custom link to Grafana, Prometheus or any other service.
Supports templating. For example, link to Grafana: 'explore?orgId=1&left=[\"now-1h\",\"now\",\"VictoriaMetrics\",{\"expr\": \"{{$expr|quotesEscape|crlfEscape|queryEscape}}\"},{\"mode\":\"Metrics\"},{\"ui\":[true,true,true,\"none\"]}]'. (default "{{.ExternalURL}}/vmalert/alert?group_id={{.GroupID}}&alert_id={{.AlertID}}")
-external.label array
Optional label in the form 'Name=value' to add to all generated recording rules and alerts. Pass multiple -label flags in order to add multiple label sets.
Supports an array of values separated by comma or specified via multiple flags.
@ -960,6 +977,7 @@ The shortlist of configuration flags is the following:
-version
Show VictoriaMetrics version
```
{% endraw %}
### Hot config reload

4
go.mod
View file

@ -1,6 +1,6 @@
module github.com/VictoriaMetrics/VictoriaMetrics
go 1.17
go 1.18
require (
cloud.google.com/go/storage v1.25.0
@ -9,7 +9,7 @@ require (
// Do not use the original github.com/valyala/fasthttp because of issues
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.22.1
github.com/VictoriaMetrics/metrics v1.22.2
github.com/VictoriaMetrics/metricsql v0.44.1
github.com/aws/aws-sdk-go v1.44.76
github.com/cespare/xxhash/v2 v2.1.2

4
go.sum
View file

@ -110,8 +110,8 @@ github.com/VictoriaMetrics/fastcache v1.10.0/go.mod h1:tjiYeEfYXCqacuvYw/7UoDIeJ
github.com/VictoriaMetrics/fasthttp v1.1.0 h1:3crd4YWHsMwu60GUXRH6OstowiFvqrwS4a/ueoLdLL0=
github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR2uydjiWvoLp5ZTqQ=
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metrics v1.22.1 h1:ExNLLZ0HLI41imYDaWbeVXfMB2+0W4ovBSk3It+Y9+c=
github.com/VictoriaMetrics/metrics v1.22.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metrics v1.22.2 h1:A6LsNidYwkAHetxsvNFaUWjtzu5ltdgNEoS6i7Bn+6I=
github.com/VictoriaMetrics/metrics v1.22.2/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc=
github.com/VictoriaMetrics/metricsql v0.44.1 h1:qGoRt0g84uMUscVjS7P3uDZKmjJubWKaIx9v0iHKgck=
github.com/VictoriaMetrics/metricsql v0.44.1/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=

View file

@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/proxy"
"github.com/VictoriaMetrics/fasthttp"
"github.com/VictoriaMetrics/metrics"
@ -189,6 +190,7 @@ func (c *client) GetStreamReader() (*streamReader, error) {
req.Header.Set("X-Prometheus-Scrape-Timeout-Seconds", c.scrapeTimeoutSecondsStr)
c.setHeaders(req)
c.setProxyHeaders(req)
scrapeRequests.Inc()
resp, err := c.sc.Do(req)
if err != nil {
cancel()
@ -327,33 +329,12 @@ var (
scrapesOK = metrics.NewCounter(`vm_promscrape_scrapes_total{status_code="200"}`)
scrapesGunzipped = metrics.NewCounter(`vm_promscrape_scrapes_gunziped_total`)
scrapesGunzipFailed = metrics.NewCounter(`vm_promscrape_scrapes_gunzip_failed_total`)
scrapeRequests = metrics.NewCounter(`vm_promscrape_scrape_requests_total`)
scrapeRetries = metrics.NewCounter(`vm_promscrape_scrape_retries_total`)
)
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error {
sleepTime := time.Second
for {
// Use DoDeadline instead of Do even if hc.ReadTimeout is already set in order to guarantee the given deadline
// across multiple retries.
err := hc.DoDeadline(req, resp, deadline)
if err == nil {
return nil
}
if err != fasthttp.ErrConnectionClosed && !strings.Contains(err.Error(), "broken pipe") {
return err
}
// Retry request if the server closes the keep-alive connection unless deadline exceeds.
maxSleepTime := time.Until(deadline)
if sleepTime > maxSleepTime {
return fmt.Errorf("the server closes all the connection attempts: %w", err)
}
sleepTime += sleepTime
if sleepTime > maxSleepTime {
sleepTime = maxSleepTime
}
time.Sleep(sleepTime)
scrapeRetries.Inc()
}
return discoveryutils.DoRequestWithPossibleRetry(hc, req, resp, deadline, scrapeRequests, scrapeRetries)
}
type streamReader struct {

View file

@ -240,20 +240,23 @@ func (c *Client) getAPIResponseWithParamsAndClient(client *fasthttp.HostClient,
return data, nil
}
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error {
// DoRequestWithPossibleRetry performs the given req at hc and stores the response at resp.
func DoRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time, requestCounter, retryCounter *metrics.Counter) error {
sleepTime := time.Second
discoveryRequests.Inc()
requestCounter.Inc()
for {
// Use DoDeadline instead of Do even if hc.ReadTimeout is already set in order to guarantee the given deadline
// across multiple retries.
err := hc.DoDeadline(req, resp, deadline)
if err == nil {
return nil
}
if err != fasthttp.ErrConnectionClosed && !strings.Contains(err.Error(), "broken pipe") {
statusCode := resp.StatusCode()
if statusCode != fasthttp.StatusTooManyRequests {
return nil
}
} else if err != fasthttp.ErrConnectionClosed && !strings.Contains(err.Error(), "broken pipe") {
return err
}
// Retry request if the server closes the keep-alive connection unless deadline exceeds.
// Retry request after exponentially increased sleep.
maxSleepTime := time.Until(deadline)
if sleepTime > maxSleepTime {
return fmt.Errorf("the server closes all the connection attempts: %w", err)
@ -263,11 +266,15 @@ func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request,
sleepTime = maxSleepTime
}
time.Sleep(sleepTime)
discoveryRetries.Inc()
retryCounter.Inc()
}
}
func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error {
return DoRequestWithPossibleRetry(hc, req, resp, deadline, discoveryRequests, discoveryRetries)
}
var (
discoveryRetries = metrics.NewCounter(`vm_promscrape_discovery_retries_total`)
discoveryRequests = metrics.NewCounter(`vm_promscrape_discovery_requests_total`)
discoveryRetries = metrics.NewCounter(`vm_promscrape_discovery_retries_total`)
)

View file

@ -209,9 +209,6 @@ type scrapeWork struct {
// Optional limiter on the number of unique series per scrape target.
seriesLimiter *bloomfilter.Limiter
// Optional counter on the number of dropped samples if the limit on the number of unique series is set.
seriesLimiterRowsDroppedTotal *metrics.Counter
// prevBodyLen contains the previous response body length for the given scrape work.
// It is used as a hint in order to reduce memory usage for body buffers.
prevBodyLen int
@ -343,14 +340,8 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}, globalStopCh <-chan struct{})
sw.sendStaleSeries(lastScrape, "", t, true)
}
if sw.seriesLimiter != nil {
job := sw.Config.Job()
metrics.UnregisterMetric(fmt.Sprintf(`promscrape_series_limit_rows_dropped_total{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`,
sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL))
metrics.UnregisterMetric(fmt.Sprintf(`promscrape_series_limit_max_series{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`,
sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL))
metrics.UnregisterMetric(fmt.Sprintf(`promscrape_series_limit_current_series{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`,
sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL))
sw.seriesLimiter.MustStop()
sw.seriesLimiter = nil
}
return
case tt := <-ticker.C:
@ -475,22 +466,22 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error
// This is a trade-off between performance and accuracy.
seriesAdded = sw.getSeriesAdded(lastScrape, bodyString)
}
samplesDropped := 0
if sw.seriesLimitExceeded || !areIdenticalSeries {
if sw.applySeriesLimit(wc) {
samplesDropped = sw.applySeriesLimit(wc)
if samplesDropped > 0 {
sw.seriesLimitExceeded = true
}
}
sw.addAutoTimeseries(wc, "up", float64(up), scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_duration_seconds", duration, scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_samples_scraped", float64(samplesScraped), scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_samples_post_metric_relabeling", float64(samplesPostRelabeling), scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_series_added", float64(seriesAdded), scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_timeout_seconds", sw.Config.ScrapeTimeout.Seconds(), scrapeTimestamp)
if sw.Config.SampleLimit > 0 {
// Expose scrape_samples_limit metric if sample_limt config is set for the target.
// See https://github.com/VictoriaMetrics/operator/issues/497
sw.addAutoTimeseries(wc, "scrape_samples_limit", float64(sw.Config.SampleLimit), scrapeTimestamp)
am := &autoMetrics{
up: up,
scrapeDurationSeconds: duration,
samplesScraped: samplesScraped,
samplesPostRelabeling: samplesPostRelabeling,
seriesAdded: seriesAdded,
seriesLimitSamplesDropped: samplesDropped,
}
sw.addAutoMetrics(am, wc, scrapeTimestamp)
sw.pushData(sw.Config.AuthToken, &wc.writeRequest)
sw.prevLabelsLen = len(wc.labels)
sw.prevBodyLen = len(bodyString)
@ -601,12 +592,14 @@ func (sw *scrapeWork) scrapeStream(scrapeTimestamp, realTimestamp int64) error {
// This is a trade-off between performance and accuracy.
seriesAdded = sw.getSeriesAdded(lastScrape, bodyString)
}
sw.addAutoTimeseries(wc, "up", float64(up), scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_duration_seconds", duration, scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_samples_scraped", float64(samplesScraped), scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_samples_post_metric_relabeling", float64(samplesPostRelabeling), scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_series_added", float64(seriesAdded), scrapeTimestamp)
sw.addAutoTimeseries(wc, "scrape_timeout_seconds", sw.Config.ScrapeTimeout.Seconds(), scrapeTimestamp)
am := &autoMetrics{
up: up,
scrapeDurationSeconds: duration,
samplesScraped: samplesScraped,
samplesPostRelabeling: samplesPostRelabeling,
seriesAdded: seriesAdded,
}
sw.addAutoMetrics(am, wc, scrapeTimestamp)
sw.pushData(sw.Config.AuthToken, &wc.writeRequest)
sw.prevLabelsLen = len(wc.labels)
sw.prevBodyLen = sbr.bodyLen
@ -699,44 +692,30 @@ func (sw *scrapeWork) getSeriesAdded(lastScrape, currScrape string) int {
return strings.Count(bodyString, "\n")
}
func (sw *scrapeWork) applySeriesLimit(wc *writeRequestCtx) bool {
func (sw *scrapeWork) applySeriesLimit(wc *writeRequestCtx) int {
seriesLimit := *seriesLimitPerTarget
if sw.Config.SeriesLimit > 0 {
seriesLimit = sw.Config.SeriesLimit
}
if sw.seriesLimiter == nil && seriesLimit > 0 {
job := sw.Config.Job()
sw.seriesLimiter = bloomfilter.NewLimiter(seriesLimit, 24*time.Hour)
sw.seriesLimiterRowsDroppedTotal = metrics.GetOrCreateCounter(fmt.Sprintf(`promscrape_series_limit_rows_dropped_total{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`,
sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL))
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`promscrape_series_limit_max_series{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`,
sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL), func() float64 {
return float64(sw.seriesLimiter.MaxItems())
})
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`promscrape_series_limit_current_series{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`,
sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL), func() float64 {
return float64(sw.seriesLimiter.CurrentItems())
})
}
hsl := sw.seriesLimiter
if hsl == nil {
return false
sl := sw.seriesLimiter
if sl == nil {
return 0
}
dstSeries := wc.writeRequest.Timeseries[:0]
limitExceeded := false
samplesDropped := 0
for _, ts := range wc.writeRequest.Timeseries {
h := sw.getLabelsHash(ts.Labels)
if !hsl.Add(h) {
// The limit on the number of hourly unique series per scrape target has been exceeded.
// Drop the metric.
sw.seriesLimiterRowsDroppedTotal.Inc()
limitExceeded = true
if !sl.Add(h) {
samplesDropped++
continue
}
dstSeries = append(dstSeries, ts)
}
wc.writeRequest.Timeseries = dstSeries
return limitExceeded
return samplesDropped
}
func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp int64, addAutoSeries bool) {
@ -756,11 +735,8 @@ func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp i
}
}
if addAutoSeries {
sw.addAutoTimeseries(wc, "up", 0, timestamp)
sw.addAutoTimeseries(wc, "scrape_duration_seconds", 0, timestamp)
sw.addAutoTimeseries(wc, "scrape_samples_scraped", 0, timestamp)
sw.addAutoTimeseries(wc, "scrape_samples_post_metric_relabeling", 0, timestamp)
sw.addAutoTimeseries(wc, "scrape_series_added", 0, timestamp)
am := &autoMetrics{}
sw.addAutoMetrics(am, wc, timestamp)
}
series := wc.writeRequest.Timeseries
if len(series) == 0 {
@ -791,6 +767,34 @@ func (sw *scrapeWork) getLabelsHash(labels []prompbmarshal.Label) uint64 {
return xxhash.Sum64(b)
}
type autoMetrics struct {
up int
scrapeDurationSeconds float64
samplesScraped int
samplesPostRelabeling int
seriesAdded int
seriesLimitSamplesDropped int
}
func (sw *scrapeWork) addAutoMetrics(am *autoMetrics, wc *writeRequestCtx, timestamp int64) {
sw.addAutoTimeseries(wc, "up", float64(am.up), timestamp)
sw.addAutoTimeseries(wc, "scrape_duration_seconds", am.scrapeDurationSeconds, timestamp)
sw.addAutoTimeseries(wc, "scrape_samples_scraped", float64(am.samplesScraped), timestamp)
sw.addAutoTimeseries(wc, "scrape_samples_post_metric_relabeling", float64(am.samplesPostRelabeling), timestamp)
sw.addAutoTimeseries(wc, "scrape_series_added", float64(am.seriesAdded), timestamp)
sw.addAutoTimeseries(wc, "scrape_timeout_seconds", sw.Config.ScrapeTimeout.Seconds(), timestamp)
if sampleLimit := sw.Config.SampleLimit; sampleLimit > 0 {
// Expose scrape_samples_limit metric if sample_limt config is set for the target.
// See https://github.com/VictoriaMetrics/operator/issues/497
sw.addAutoTimeseries(wc, "scrape_samples_limit", float64(sampleLimit), timestamp)
}
if sl := sw.seriesLimiter; sl != nil {
sw.addAutoTimeseries(wc, "scrape_series_limit_samples_dropped", float64(am.seriesLimitSamplesDropped), timestamp)
sw.addAutoTimeseries(wc, "scrape_series_limit", float64(sl.MaxItems()), timestamp)
sw.addAutoTimeseries(wc, "scrape_series_current", float64(sl.CurrentItems()), timestamp)
}
}
// addAutoTimeseries adds automatically generated time series with the given name, value and timestamp.
//
// See https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series

View file

@ -352,6 +352,25 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
scrape_series_added{job="xx",instance="foo.com"} 4 123
scrape_timeout_seconds{job="xx",instance="foo.com"} 42 123
`)
// Scrape success with the given SampleLimit.
f(`
foo{bar="baz"} 34.44
bar{a="b",c="d"} -3e4
`, &ScrapeWork{
ScrapeTimeout: time.Second * 42,
SampleLimit: 2,
}, `
foo{bar="baz"} 34.44 123
bar{a="b",c="d"} -3e4 123
up 1 123
scrape_samples_limit 2 123
scrape_samples_scraped 2 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 2 123
scrape_series_added 2 123
scrape_timeout_seconds 42 123
`)
// Scrape failure because of the exceeded SampleLimit
f(`
foo{bar="baz"} 34.44
bar{a="b",c="d"} -3e4
@ -367,6 +386,48 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
scrape_samples_post_metric_relabeling 2 123
scrape_samples_limit 1 123
scrape_series_added 0 123
scrape_series_current 0 123
scrape_series_limit 123 123
scrape_series_limit_samples_dropped 0 123
scrape_timeout_seconds 42 123
`)
// Scrape success with the given SeriesLimit.
f(`
foo{bar="baz"} 34.44
bar{a="b",c="d"} -3e4
`, &ScrapeWork{
ScrapeTimeout: time.Second * 42,
SeriesLimit: 123,
}, `
foo{bar="baz"} 34.44 123
bar{a="b",c="d"} -3e4 123
up 1 123
scrape_samples_scraped 2 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 2 123
scrape_series_added 2 123
scrape_series_current 2 123
scrape_series_limit 123 123
scrape_series_limit_samples_dropped 0 123
scrape_timeout_seconds 42 123
`)
// Exceed SeriesLimit.
f(`
foo{bar="baz"} 34.44
bar{a="b",c="d"} -3e4
`, &ScrapeWork{
ScrapeTimeout: time.Second * 42,
SeriesLimit: 1,
}, `
foo{bar="baz"} 34.44 123
up 1 123
scrape_samples_scraped 2 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 2 123
scrape_series_added 2 123
scrape_series_current 1 123
scrape_series_limit 1 123
scrape_series_limit_samples_dropped 1 123
scrape_timeout_seconds 42 123
`)
}

View file

@ -850,9 +850,13 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer,
if err != nil {
return err
}
if filter != nil && filter.Len() == 0 {
qt.Printf("found zero label names for filter=%s", tfss)
return nil
if filter != nil && filter.Len() <= 100e3 {
// It is faster to obtain label names by metricIDs from the filter
// instead of scanning the inverted index for the matching filters.
// This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978
metricIDs := filter.AppendTo(nil)
qt.Printf("sort %d metricIDs", len(metricIDs))
return is.getLabelNamesForMetricIDs(qt, metricIDs, lns, maxLabelNames)
}
var prevLabelName []byte
ts := &is.ts
@ -912,6 +916,41 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer,
return nil
}
func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) error {
lns["__name__"] = struct{}{}
var mn MetricName
foundLabelNames := 0
var buf []byte
for _, metricID := range metricIDs {
var err error
buf, err = is.searchMetricNameWithCache(buf[:0], metricID)
if err != nil {
if err == io.EOF {
// It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// Skip this metricID for now.
continue
}
return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err)
}
if err := mn.Unmarshal(buf); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err)
}
for _, tag := range mn.Tags {
_, ok := lns[string(tag.Key)]
if !ok {
foundLabelNames++
lns[string(tag.Key)] = struct{}{}
if len(lns) >= maxLabelNames {
qt.Printf("hit the limit on the number of unique label names: %d", maxLabelNames)
return nil
}
}
}
}
qt.Printf("get %d distinct label names from %d metricIDs", foundLabelNames, len(metricIDs))
return nil
}
// SearchLabelValuesWithFiltersOnTimeRange returns label values for the given labelName, tfss and tr.
func (db *indexDB) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*TagFilters, tr TimeRange,
maxLabelValues, maxMetrics int, deadline uint64) ([]string, error) {
@ -1007,9 +1046,13 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer
if err != nil {
return err
}
if filter != nil && filter.Len() == 0 {
qt.Printf("found zero label values for filter=%s", tfss)
return nil
if filter != nil && filter.Len() < 100e3 {
// It is faster to obtain label values by metricIDs from the filter
// instead of scanning the inverted index for the matching filters.
// This would help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978
metricIDs := filter.AppendTo(nil)
qt.Printf("sort %d metricIDs", len(metricIDs))
return is.getLabelValuesForMetricIDs(qt, lvs, labelName, metricIDs, maxLabelValues)
}
if labelName == "__name__" {
// __name__ label is encoded as empty string in indexdb.
@ -1068,6 +1111,42 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer
return nil
}
func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs map[string]struct{}, labelName string, metricIDs []uint64, maxLabelValues int) error {
if labelName == "" {
labelName = "__name__"
}
var mn MetricName
foundLabelValues := 0
var buf []byte
for _, metricID := range metricIDs {
var err error
buf, err = is.searchMetricNameWithCache(buf[:0], metricID)
if err != nil {
if err == io.EOF {
// It is likely the metricID->metricName entry didn't propagate to inverted index yet.
// Skip this metricID for now.
continue
}
return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err)
}
if err := mn.Unmarshal(buf); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err)
}
tagValue := mn.GetTagValue(labelName)
_, ok := lvs[string(tagValue)]
if !ok {
foundLabelValues++
lvs[string(tagValue)] = struct{}{}
if len(lvs) >= maxLabelValues {
qt.Printf("hit the limit on the number of unique label values for label %q: %d", labelName, maxLabelValues)
return nil
}
}
}
qt.Printf("get %d distinct values for label %q from %d metricIDs", foundLabelValues, labelName, len(metricIDs))
return nil
}
// SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr.
//
// This allows implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or similar APIs.

4
vendor/modules.txt vendored
View file

@ -25,8 +25,8 @@ github.com/VictoriaMetrics/fastcache
github.com/VictoriaMetrics/fasthttp
github.com/VictoriaMetrics/fasthttp/fasthttputil
github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.22.1
## explicit; go 1.12
# github.com/VictoriaMetrics/metrics v1.22.2
## explicit; go 1.15
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.44.1
## explicit; go 1.13