Merge branch 'public-single-node' into HEAD
26
.github/workflows/check-licenses.yml
vendored
|
@ -14,13 +14,25 @@ jobs:
|
|||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@main
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
id: go
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-licenses-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-licenses-
|
||||
|
||||
- name: Check License
|
||||
run: |
|
||||
make check-licenses
|
||||
run: make check-licenses
|
||||
|
|
19
.github/workflows/codeql-analysis.yml
vendored
|
@ -55,11 +55,22 @@ jobs:
|
|||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-
|
||||
if: ${{ matrix.language == 'go' }}
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
|
|
100
.github/workflows/main.yml
vendored
|
@ -7,6 +7,8 @@ on:
|
|||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "dashboards/**"
|
||||
- "deployment/**.yml"
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
@ -14,6 +16,8 @@ on:
|
|||
paths-ignore:
|
||||
- "docs/**"
|
||||
- "**.md"
|
||||
- "dashboards/**"
|
||||
- "deployment/**.yml"
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
|
@ -30,18 +34,55 @@ jobs:
|
|||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-check-all-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-check-all-
|
||||
|
||||
- name: Run check-all
|
||||
run: |
|
||||
make install-golangci-lint
|
||||
make check-all
|
||||
git diff --exit-code
|
||||
|
||||
build:
|
||||
needs: lint
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-crossbuild-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-crossbuild-
|
||||
|
||||
- name: Build
|
||||
run: make crossbuild
|
||||
|
||||
test:
|
||||
needs: lint
|
||||
strategy:
|
||||
|
@ -54,43 +95,26 @@ jobs:
|
|||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v4
|
||||
id: go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
go-version: stable
|
||||
cache: false
|
||||
|
||||
- name: Cache Go artifacts
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
~/go/bin
|
||||
key: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.sum', 'Makefile', 'app/**/Makefile') }}
|
||||
restore-keys: go-artifacts-${{ runner.os }}-${{ matrix.scenario }}-
|
||||
|
||||
- name: run tests
|
||||
run: |
|
||||
make ${{ matrix.scenario}}
|
||||
run: make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
file: ./coverage.txt
|
||||
|
||||
build:
|
||||
needs: test
|
||||
name: build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Code checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
id: go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.21.4
|
||||
check-latest: true
|
||||
cache: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
with:
|
||||
path: gocache-for-docker
|
||||
key: gocache-docker-${{ runner.os }}-${{ steps.go.outputs.go-version }}-${{ hashFiles('go.mod') }}
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
make victoria-metrics-crossbuild
|
||||
make vmuitils-crossbuild
|
||||
|
|
2
.github/workflows/sync-docs.yml
vendored
|
@ -7,7 +7,7 @@ on:
|
|||
- 'docs/**'
|
||||
workflow_dispatch: {}
|
||||
env:
|
||||
PAGEFIND_VERSION: "1.0.3"
|
||||
PAGEFIND_VERSION: "1.0.4"
|
||||
HUGO_VERSION: "latest"
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout and to commit back image update
|
||||
|
|
52
Makefile
|
@ -1,5 +1,7 @@
|
|||
PKG_PREFIX := github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
MAKE_CONCURRENCY ?= $(shell cat /proc/cpuinfo | grep -c processor)
|
||||
MAKE_PARALLEL := $(MAKE) -j $(MAKE_CONCURRENCY)
|
||||
DATEINFO_TAG ?= $(shell date -u +'%Y%m%d-%H%M%S')
|
||||
BUILDINFO_TAG ?= $(shell echo $$(git describe --long --all | tr '/' '-')$$( \
|
||||
git diff-index --quiet HEAD -- || echo '-dirty-'$$(git diff-index -u HEAD | openssl sha1 | cut -d' ' -f2 | cut -c 1-8)))
|
||||
|
@ -15,6 +17,7 @@ GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TA
|
|||
.PHONY: $(MAKECMDGOALS)
|
||||
|
||||
include app/*/Makefile
|
||||
include docs/Makefile
|
||||
include deployment/*/Makefile
|
||||
include dashboards/Makefile
|
||||
include snap/local/Makefile
|
||||
|
@ -163,12 +166,14 @@ vmutils-windows-amd64: \
|
|||
vmrestore-windows-amd64 \
|
||||
vmctl-windows-amd64
|
||||
|
||||
crossbuild:
|
||||
$(MAKE_PARALLEL) victoria-metrics-crossbuild vmutils-crossbuild
|
||||
|
||||
victoria-metrics-crossbuild: \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-amd64 \
|
||||
victoria-metrics-linux-arm64 \
|
||||
victoria-metrics-linux-arm \
|
||||
victoria-metrics-linux-386 \
|
||||
victoria-metrics-linux-ppc64le \
|
||||
victoria-metrics-darwin-amd64 \
|
||||
victoria-metrics-darwin-arm64 \
|
||||
|
@ -180,7 +185,6 @@ vmutils-crossbuild: \
|
|||
vmutils-linux-amd64 \
|
||||
vmutils-linux-arm64 \
|
||||
vmutils-linux-arm \
|
||||
vmutils-linux-386 \
|
||||
vmutils-linux-ppc64le \
|
||||
vmutils-darwin-amd64 \
|
||||
vmutils-darwin-arm64 \
|
||||
|
@ -190,14 +194,15 @@ vmutils-crossbuild: \
|
|||
|
||||
publish-release:
|
||||
rm -rf bin/*
|
||||
git checkout $(TAG) && LATEST_TAG=stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-cluster && LATEST_TAG=cluster-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise && LATEST_TAG=enterprise-stable $(MAKE) release publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && LATEST_TAG=enterprise-cluster-stable $(MAKE) release publish
|
||||
git checkout $(TAG) && $(MAKE) release && LATEST_TAG=stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-cluster && $(MAKE) release && LATEST_TAG=cluster-stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise && $(MAKE) release && LATEST_TAG=enterprise-stable $(MAKE) publish && \
|
||||
git checkout $(TAG)-enterprise-cluster && $(MAKE) release && LATEST_TAG=enterprise-cluster-stable $(MAKE) publish
|
||||
|
||||
release: \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
release:
|
||||
$(MAKE_PARALLEL) \
|
||||
release-victoria-metrics \
|
||||
release-vmutils
|
||||
|
||||
release-victoria-metrics: \
|
||||
release-victoria-metrics-linux-386 \
|
||||
|
@ -256,16 +261,16 @@ release-victoria-metrics-windows-goarch: victoria-metrics-windows-$(GOARCH)-prod
|
|||
cd bin && rm -rf \
|
||||
victoria-metrics-windows-$(GOARCH)-prod.exe
|
||||
|
||||
release-victoria-logs: \
|
||||
release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
release-victoria-logs:
|
||||
$(MAKE_PARALLEL) release-victoria-logs-linux-386 \
|
||||
release-victoria-logs-linux-amd64 \
|
||||
release-victoria-logs-linux-arm \
|
||||
release-victoria-logs-linux-arm64 \
|
||||
release-victoria-logs-darwin-amd64 \
|
||||
release-victoria-logs-darwin-arm64 \
|
||||
release-victoria-logs-freebsd-amd64 \
|
||||
release-victoria-logs-openbsd-amd64 \
|
||||
release-victoria-logs-windows-amd64
|
||||
|
||||
release-victoria-logs-linux-386:
|
||||
GOOS=linux GOARCH=386 $(MAKE) release-victoria-logs-goos-goarch
|
||||
|
@ -529,12 +534,3 @@ copy-docs:
|
|||
docs-sync:
|
||||
SRC=README.md DST=docs/README.md OLD_URL='' ORDER=0 TITLE=VictoriaMetrics $(MAKE) copy-docs
|
||||
SRC=README.md DST=docs/Single-server-VictoriaMetrics.md OLD_URL='/Single-server-VictoriaMetrics.html' TITLE=VictoriaMetrics ORDER=1 $(MAKE) copy-docs
|
||||
SRC=app/vmagent/README.md DST=docs/vmagent.md OLD_URL='/vmagent.html' ORDER=3 TITLE=vmagent $(MAKE) copy-docs
|
||||
SRC=app/vmalert/README.md DST=docs/vmalert.md OLD_URL='/vmalert.html' ORDER=4 TITLE=vmalert $(MAKE) copy-docs
|
||||
SRC=app/vmauth/README.md DST=docs/vmauth.md OLD_URL='/vmauth.html' ORDER=5 TITLE=vmauth $(MAKE) copy-docs
|
||||
SRC=app/vmbackup/README.md DST=docs/vmbackup.md OLD_URL='/vmbackup.html' ORDER=6 TITLE=vmbackup $(MAKE) copy-docs
|
||||
SRC=app/vmrestore/README.md DST=docs/vmrestore.md OLD_URL='/vmrestore.html' ORDER=7 TITLE=vmrestore $(MAKE) copy-docs
|
||||
SRC=app/vmctl/README.md DST=docs/vmctl.md OLD_URL='/vmctl.html' ORDER=8 TITLE=vmctl $(MAKE) copy-docs
|
||||
SRC=app/vmgateway/README.md DST=docs/vmgateway.md OLD_URL='/vmgateway.html' ORDER=9 TITLE=vmgateway $(MAKE) copy-docs
|
||||
SRC=app/vmbackupmanager/README.md DST=docs/vmbackupmanager.md OLD_URL='/vmbackupmanager.html' ORDER=10 TITLE=vmbackupmanager $(MAKE) copy-docs
|
||||
SRC=app/vmalert-tool/README.md DST=docs/vmalert-tool.md OLD_URL='' ORDER=12 TITLE=vmalert-tool $(MAKE) copy-docs
|
||||
|
|
78
README.md
|
@ -9,7 +9,7 @@
|
|||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions)
|
||||
[](https://codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
|
||||
<img src="logo.png" width="300" alt="VictoriaMetrics logo">
|
||||
<img src="docs/logo.webp" width="300" alt="VictoriaMetrics logo">
|
||||
|
||||
VictoriaMetrics is a fast, cost-effective and scalable monitoring solution and time series database.
|
||||
|
||||
|
@ -348,6 +348,12 @@ http://<victoriametrics-addr>:8428
|
|||
|
||||
Substitute `<victoriametrics-addr>` with the hostname or IP address of VictoriaMetrics.
|
||||
|
||||
In the "Type and version" section it is recommended to set the type to "Prometheus" and the version to at least "2.24.x":
|
||||
|
||||
<img src="docs/grafana-datasource-prometheus.webp" alt="Grafana datasource" />
|
||||
|
||||
This allows Grafana to use a more efficient API to get label values.
|
||||
|
||||
Then build graphs and dashboards for the created datasource using [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/)
|
||||
or [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html).
|
||||
|
||||
|
@ -390,6 +396,9 @@ The UI allows exploring query results via graphs and tables. It also provides th
|
|||
- [WITH expressions playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/expand-with-exprs) - test how WITH expressions work;
|
||||
- [Metric relabel debugger](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/prometheus/graph/#/relabeling) - playground for [relabeling](#relabeling) configs.
|
||||
|
||||
VMUI provides auto-completion for [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) functions, metric names, label names and label values. The auto-completion can be enabled
|
||||
by checking the `Autocomplete` toggle. When the auto-completion is disabled, it can still be triggered for the current cursor position by pressing `ctrl+space`.
|
||||
|
||||
VMUI automatically switches from graph view to heatmap view when the query returns [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) buckets
|
||||
(both [Prometheus histograms](https://prometheus.io/docs/concepts/metric_types/#histogram)
|
||||
and [VictoriaMetrics histograms](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) are supported).
|
||||
|
@ -507,9 +516,9 @@ See also [vmagent](https://docs.victoriametrics.com/vmagent.html), which can be
|
|||
|
||||
## How to send data from DataDog agent
|
||||
|
||||
VictoriaMetrics accepts data from [DataDog agent](https://docs.datadoghq.com/agent/)
|
||||
or [DogStatsD](https://docs.datadoghq.com/developers/dogstatsd/)
|
||||
via ["submit metrics" API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics)
|
||||
VictoriaMetrics accepts data from [DataDog agent](https://docs.datadoghq.com/agent/)
|
||||
or [DogStatsD](https://docs.datadoghq.com/developers/dogstatsd/)
|
||||
via ["submit metrics" API](https://docs.datadoghq.com/api/latest/metrics/#submit-metrics)
|
||||
at `/datadog/api/v1/series` path.
|
||||
|
||||
### Sending metrics to VictoriaMetrics
|
||||
|
@ -518,7 +527,7 @@ DataDog agent allows configuring destinations for metrics sending via ENV variab
|
|||
or via [configuration file](https://docs.datadoghq.com/agent/guide/agent-configuration-files/) in section `dd_url`.
|
||||
|
||||
<p align="center">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.png" width="800">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM.webp" width="800">
|
||||
</p>
|
||||
|
||||
To configure DataDog agent via ENV variable add the following prefix:
|
||||
|
@ -552,7 +561,7 @@ DataDog allows configuring [Dual Shipping](https://docs.datadoghq.com/agent/guid
|
|||
sending via ENV variable `DD_ADDITIONAL_ENDPOINTS` or via configuration file `additional_endpoints`.
|
||||
|
||||
<p align="center">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.png" width="800">
|
||||
<img src="docs/Single-server-VictoriaMetrics-sending_DD_metrics_to_VM_and_DD.webp" width="800">
|
||||
</p>
|
||||
|
||||
Run DataDog using the following ENV variable with VictoriaMetrics as additional metrics receiver:
|
||||
|
@ -1414,7 +1423,12 @@ For example, `/api/v1/import?extra_label=foo=bar` would add `"foo":"bar"` label
|
|||
|
||||
Note that it could be required to flush response cache after importing historical data. See [these docs](#backfilling) for detail.
|
||||
|
||||
VictoriaMetrics parses input JSON lines one-by-one. It loads the whole JSON line in memory, then parses it and then saves the parsed samples into persistent storage. This means that VictoriaMetrics can occupy big amounts of RAM when importing too long JSON lines. The solution is to split too long JSON lines into smaller lines. It is OK if samples for a single time series are split among multiple JSON lines.
|
||||
VictoriaMetrics parses input JSON lines one-by-one. It loads the whole JSON line in memory, then parses it and then saves the parsed samples into persistent storage.
|
||||
This means that VictoriaMetrics can occupy big amounts of RAM when importing too long JSON lines.
|
||||
The solution is to split too long JSON lines into shorter lines. It is OK if samples for a single time series are split among multiple JSON lines.
|
||||
JSON line length can be limited via `max_rows_per_line` query arg when exporting via [/api/v1/export](#how-to-export-data-in-json-line-format).
|
||||
|
||||
The maximum JSON line length, which can be parsed by VictoriaMetrics, is limited by `-import.maxLineLen` command-line flag value.
|
||||
|
||||
### How to import data in native format
|
||||
|
||||
|
@ -1591,15 +1605,18 @@ The format follows [JSON streaming concept](http://ndjson.org/), e.g. each line
|
|||
```
|
||||
|
||||
Note that every JSON object must be written in a single line, e.g. all the newline chars must be removed from it.
|
||||
Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 100MB).
|
||||
[/api/v1/import](#how-to-import-data-in-json-line-format) handler doesn't accept JSON lines longer than the value
|
||||
passed to `-import.maxLineLen` command-line flag (by default this is 10MB).
|
||||
|
||||
It is recommended passing 1K-10K samples per line for achieving the maximum data ingestion performance at [/api/v1/import](#how-to-import-data-in-json-line-format).
|
||||
Too long JSON lines may increase RAM usage at VictoriaMetrics side.
|
||||
|
||||
[/api/v1/export](#how-to-export-data-in-json-line-format) handler accepts `max_rows_per_line` query arg, which allows limiting the number of samples per each exported line.
|
||||
|
||||
It is OK to split [raw samples](https://docs.victoriametrics.com/keyConcepts.html#raw-samples)
|
||||
for the same [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) across multiple lines.
|
||||
|
||||
The number of lines in JSON line document can be arbitrary.
|
||||
The number of lines in the request to [/api/v1/import](#how-to-import-data-in-json-line-format) can be arbitrary - they are imported in streaming manner.
|
||||
|
||||
## Relabeling
|
||||
|
||||
|
@ -1687,6 +1704,8 @@ By default, VictoriaMetrics is tuned for an optimal resource usage under typical
|
|||
- `-search.maxConcurrentRequests` limits the number of concurrent requests VictoriaMetrics can process. Bigger number of concurrent requests usually means bigger memory usage. For example, if a single query needs 100 MiB of additional memory during its execution, then 100 concurrent queries may need `100 * 100 MiB = 10 GiB` of additional memory. So it is better to limit the number of concurrent queries, while suspending additional incoming queries if the concurrency limit is reached. VictoriaMetrics provides `-search.maxQueueDuration` command-line flag for limiting the max wait time for suspended queries. See also `-search.maxMemoryPerQuery` command-line flag.
|
||||
- `-search.maxSamplesPerSeries` limits the number of raw samples the query can process per each time series. VictoriaMetrics sequentially processes raw samples per each found time series during the query. It unpacks raw samples on the selected time range per each time series into memory and then applies the given [rollup function](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions). The `-search.maxSamplesPerSeries` command-line flag allows limiting memory usage in the case when the query is executed on a time range, which contains hundreds of millions of raw samples per each located time series.
|
||||
- `-search.maxSamplesPerQuery` limits the number of raw samples a single query can process. This allows limiting CPU usage for heavy queries.
|
||||
- `-search.maxResponseSeries` limits the number of time series a single query can return from [`/api/v1/query`](https://docs.victoriametrics.com/keyConcepts.html#instant-query)
|
||||
and [`/api/v1/query_range`](https://docs.victoriametrics.com/keyConcepts.html#range-query).
|
||||
- `-search.maxPointsPerTimeseries` limits the number of calculated points, which can be returned per each matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
|
||||
- `-search.maxPointsSubqueryPerTimeseries` limits the number of calculated points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation.
|
||||
- `-search.maxSeriesPerAggrFunc` limits the number of time series, which can be generated by [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) in a single query.
|
||||
|
@ -1695,7 +1714,8 @@ By default, VictoriaMetrics is tuned for an optimal resource usage under typical
|
|||
- `-search.maxTagValues` limits the number of items, which may be returned from [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values). This endpoint is used mostly by Grafana for auto-completion of label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagValues` to quite low value in order to limit CPU and memory usage.
|
||||
- `-search.maxTagValueSuffixesPerSearch` limits the number of entries, which may be returned from `/metrics/find` endpoint. See [Graphite Metrics API usage docs](#graphite-metrics-api-usage).
|
||||
|
||||
See also [cardinality limiter](#cardinality-limiter) and [capacity planning docs](#capacity-planning).
|
||||
See also [resource usage limits at VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#resource-usage-limits),
|
||||
[cardinality limiter](#cardinality-limiter) and [capacity planning docs](#capacity-planning).
|
||||
|
||||
|
||||
## High availability
|
||||
|
@ -2035,6 +2055,8 @@ Alternatively, single-node VictoriaMetrics can self-scrape the metrics when `-se
|
|||
set to duration greater than 0. For example, `-selfScrapeInterval=10s` would enable self-scraping of `/metrics` page
|
||||
with 10 seconds interval.
|
||||
|
||||
_Please note, never use loadbalancer address for scraping metrics. All monitored components should be scraped directly by their address._
|
||||
|
||||
Official Grafana dashboards available for [single-node](https://grafana.com/grafana/dashboards/10229-victoriametrics/)
|
||||
and [clustered](https://grafana.com/grafana/dashboards/11176-victoriametrics-cluster/) VictoriaMetrics.
|
||||
See an [alternative dashboard for clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11831)
|
||||
|
@ -2361,7 +2383,7 @@ It is recommended disabling query cache with `-search.disableCache` command-line
|
|||
historical data with timestamps from the past, since the cache assumes that the data is written with
|
||||
the current timestamps. Query cache can be enabled after the backfilling is complete.
|
||||
|
||||
An alternative solution is to query [/internal/resetRollupResultCache](https://docs.victoriametrics.com/url-examples.html#internalresetRollupResultCache) handler after the backfilling is complete. This will reset the query cache, which could contain incomplete data cached during the backfilling.
|
||||
An alternative solution is to query [/internal/resetRollupResultCache](https://docs.victoriametrics.com/url-examples.html#internalresetrollupresultcache) handler after the backfilling is complete. This will reset the query cache, which could contain incomplete data cached during the backfilling.
|
||||
|
||||
Yet another solution is to increase `-search.cacheTimestampOffset` flag value in order to disable caching
|
||||
for data with timestamps close to the current time. Single-node VictoriaMetrics automatically resets response
|
||||
|
@ -2494,6 +2516,20 @@ Adhering `KISS` principle simplifies the resulting code and architecture, so it
|
|||
|
||||
Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues).
|
||||
|
||||
## Images in documentation
|
||||
|
||||
Please, keep image size and number of images per single page low. Keep the docs page as lightweight as possible.
|
||||
|
||||
If the page needs to have many images, consider using WEB-optimized image format [webp](https://developers.google.com/speed/webp).
|
||||
When adding a new doc with many images use `webp` format right away. Or use a Makefile command below to
|
||||
convert already existing images at `docs` folder automatically to `web` format:
|
||||
|
||||
```console
|
||||
make docs-images-to-webp
|
||||
```
|
||||
|
||||
Once conversion is done, update the path to images in your docs and verify everything is correct.
|
||||
|
||||
## VictoriaMetrics Logo
|
||||
|
||||
[Zip](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/VM_logo.zip) contains three folders with different image orientations (main color and inverted version).
|
||||
|
@ -2606,12 +2642,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8428")
|
||||
TCP address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol (default ":8428")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
-import.maxLineLen size
|
||||
The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600)
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10485760)
|
||||
-influx.databaseNames array
|
||||
Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
|
@ -2661,7 +2697,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerMaxArgLen int
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 500)
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 1000)
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
|
@ -2669,7 +2705,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 8)
|
||||
The maximum number of concurrent insert requests. Default value should work for most cases, since it minimizes the memory usage. The default value can be increased when clients send data over slow networks. See also -insert.maxQueueDuration (default 32)
|
||||
-maxInsertRequestSize size
|
||||
The maximum size in bytes of a single Prometheus remote_write API request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 33554432)
|
||||
|
@ -2714,6 +2750,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
If non-empty, then the label with this name and the -promscrape.cluster.memberNum value is added to all the scraped metrics. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info
|
||||
-promscrape.cluster.memberNum string
|
||||
The number of vmagent instance in the cluster of scrapers. It must be a unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name. See also -promscrape.cluster.memberLabel . See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default "0")
|
||||
-promscrape.cluster.memberURLTemplate string
|
||||
An optional template for URL to access vmagent instance with the given -promscrape.cluster.memberNum value. Every %d occurence in the template is substituted with -promscrape.cluster.memberNum at urls to vmagent instances responsible for scraping the given target at /service-discovery page. For example -promscrape.cluster.memberURLTemplate='http://vmagent-%d:8429/targets'. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more details
|
||||
-promscrape.cluster.membersCount int
|
||||
The number of members in a cluster of scrapers. Each member must have a unique -promscrape.cluster.memberNum in the range 0 ... promscrape.cluster.membersCount-1 . Each member then scrapes roughly 1/N of all the targets. By default, cluster scraping is disabled, i.e. a single scraper scrapes all the targets. See https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets for more info (default 1)
|
||||
-promscrape.cluster.name string
|
||||
|
@ -2835,7 +2873,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
-search.logSlowQueryDuration duration
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
|
||||
-search.maxConcurrentRequests int
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration and -search.maxMemoryPerQuery (default 8)
|
||||
The maximum number of concurrent search requests. It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. See also -search.maxQueueDuration and -search.maxMemoryPerQuery (default 16)
|
||||
-search.maxExportDuration duration
|
||||
The maximum duration for /api/v1/export call (default 720h0m0s)
|
||||
-search.maxExportSeries int
|
||||
|
@ -2854,7 +2892,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
The maximum amounts of memory a single query may consume. Queries requiring more memory are rejected. The total memory limit for concurrently executed queries can be estimated as -search.maxMemoryPerQuery multiplied by -search.maxConcurrentRequests . See also -search.logQueryMemoryUsage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-search.maxPointsPerTimeseries int
|
||||
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph (default 30000)
|
||||
The maximum points per a single timeseries returned from /api/v1/query_range. This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph. See also -search.maxResponseSeries (default 30000)
|
||||
-search.maxPointsSubqueryPerTimeseries int
|
||||
The maximum number of points per series, which can be generated by subquery. See https://valyala.medium.com/prometheus-subqueries-in-victoriametrics-9b1492b720b3 (default 100000)
|
||||
-search.maxQueryDuration duration
|
||||
|
@ -2864,6 +2902,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
|
||||
-search.maxQueueDuration duration
|
||||
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
|
||||
-search.maxResponseSeries int
|
||||
The maximum number of time series which can be returned from /api/v1/query and /api/v1/query_range . The limit is disabled if it equals to 0. See also -search.maxPointsPerTimeseries and -search.maxUniqueTimeseries
|
||||
-search.maxSamplesPerQuery int
|
||||
The maximum number of raw samples a single query can process across all time series. This protects from heavy queries, which select unexpectedly high number of raw samples. See also -search.maxSamplesPerSeries (default 1000000000)
|
||||
-search.maxSamplesPerSeries int
|
||||
|
@ -2889,12 +2929,12 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
|||
-search.maxUniqueTimeseries int
|
||||
The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage (default 300000)
|
||||
-search.maxWorkersPerQuery int
|
||||
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 4)
|
||||
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default 16)
|
||||
-search.minStalenessInterval duration
|
||||
The minimum interval for staleness calculations. This flag could be useful for removing gaps on graphs generated from time series with irregular intervals between samples. See also '-search.maxStalenessInterval'
|
||||
-search.minWindowForInstantRollupOptimization value
|
||||
Enable cache-based optimization for repeated queries to /api/v1/query (aka instant queries), which contain rollup functions with lookbehind window exceeding the given value
|
||||
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 6h)
|
||||
The following optional suffixes are supported: s (second), m (minute), h (hour), d (day), w (week), y (year). If suffix isn't set, then the duration is counted in months (default 3h)
|
||||
-search.noStaleMarkers
|
||||
Set this flag to true if the database doesn't contain Prometheus stale markers, so there is no need in spending additional CPU time on its handling. Staleness markers may exist only in data obtained from Prometheus scrape targets
|
||||
-search.queryStats.lastQueriesCount int
|
||||
|
|
|
@ -26,7 +26,7 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
|
|
|
@ -105,7 +105,7 @@ func RequestHandler(path string, w http.ResponseWriter, r *http.Request) bool {
|
|||
vlstorage.MustAddRows(lr)
|
||||
logstorage.PutLogRows(lr)
|
||||
if err != nil {
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s", n, err)
|
||||
logger.Warnf("cannot decode log message #%d in /_bulk request: %s, stream fields: %s", n, err, cp.StreamFields)
|
||||
return true
|
||||
}
|
||||
|
||||
|
|
|
@ -65,7 +65,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
|
|
@ -88,7 +88,9 @@ func insertRows(at *auth.Token, series []datadog.Series, extraLabels []prompbmar
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/graphite/stream"
|
||||
|
@ -20,10 +21,12 @@ var (
|
|||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return stream.Parse(r, insertRows)
|
||||
return stream.Parse(r, false, func(rows []parser.Row) error {
|
||||
return insertRows(nil, rows)
|
||||
})
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
func insertRows(at *auth.Token, rows []parser.Row) error {
|
||||
ctx := common.GetPushCtx()
|
||||
defer common.PutPushCtx(ctx)
|
||||
|
||||
|
@ -56,7 +59,9 @@ func insertRows(rows []parser.Row) error {
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
|
|
@ -36,9 +36,9 @@ var (
|
|||
// InsertHandlerForReader processes remote write for influx line protocol.
|
||||
//
|
||||
// See https://github.com/influxdata/telegraf/tree/master/plugins/inputs/socket_listener/
|
||||
func InsertHandlerForReader(r io.Reader, isGzipped bool) error {
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, isGzipped bool) error {
|
||||
return stream.Parse(r, isGzipped, "", "", func(db string, rows []parser.Row) error {
|
||||
return insertRows(nil, db, rows, nil)
|
||||
return insertRows(at, db, rows, nil)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -130,7 +130,9 @@ func insertRows(at *auth.Token, db string, rows []parser.Row, extraLabels []prom
|
|||
ctx.ctx.Labels = labels
|
||||
ctx.ctx.Samples = samples
|
||||
ctx.commonLabels = commonLabels
|
||||
remotewrite.Push(at, &ctx.ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
|
|
@ -11,8 +11,6 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/csvimport"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/datadog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmagent/graphite"
|
||||
|
@ -42,12 +40,13 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8429", "TCP address to listen for http connections. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vmagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -httpListenAddr.useProxyProtocol")
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
|
@ -125,7 +124,7 @@ func main() {
|
|||
common.StartUnmarshalWorkers()
|
||||
if len(*influxListenAddr) > 0 {
|
||||
influxServer = influxserver.MustStart(*influxListenAddr, *influxUseProxyProtocol, func(r io.Reader) error {
|
||||
return influx.InsertHandlerForReader(r, false)
|
||||
return influx.InsertHandlerForReader(nil, r, false)
|
||||
})
|
||||
}
|
||||
if len(*graphiteListenAddr) > 0 {
|
||||
|
@ -140,7 +139,7 @@ func main() {
|
|||
opentsdbhttpServer = opentsdbhttpserver.MustStart(*opentsdbHTTPListenAddr, *opentsdbHTTPUseProxyProtocol, httpInsertHandler)
|
||||
}
|
||||
|
||||
promscrape.Init(remotewrite.Push)
|
||||
promscrape.Init(remotewrite.PushDropSamplesOnFailure)
|
||||
|
||||
if len(*httpListenAddr) > 0 {
|
||||
go httpserver.Serve(*httpListenAddr, *useProxyProtocol, requestHandler)
|
||||
|
|
|
@ -84,6 +84,8 @@ func insertRows(at *auth.Token, block *stream.Block, extraLabels []prompbmarshal
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -76,7 +76,9 @@ func insertRows(at *auth.Token, rows []newrelic.Row, extraLabels []prompbmarshal
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(samplesCount)
|
||||
|
|
|
@ -59,7 +59,9 @@ func insertRows(at *auth.Token, tss []prompbmarshal.TimeSeries, extraLabels []pr
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
|
|
@ -56,7 +56,9 @@ func insertRows(rows []parser.Row) error {
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(nil, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(nil, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
|
|
@ -64,7 +64,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
rowsPerInsert.Update(float64(len(rows)))
|
||||
return nil
|
||||
|
|
|
@ -73,7 +73,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(len(rows))
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(len(rows))
|
||||
|
|
|
@ -69,7 +69,9 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, extraLabels []pr
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
|
|
@ -305,7 +305,7 @@ func (c *client) runWorker() {
|
|||
continue
|
||||
}
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return
|
||||
case <-c.stopCh:
|
||||
// c must be stopped. Wait for a while in the hope the block will be sent.
|
||||
|
@ -314,11 +314,11 @@ func (c *client) runWorker() {
|
|||
case ok := <-ch:
|
||||
if !ok {
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
case <-time.After(graceDuration):
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlock(block)
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -37,9 +37,9 @@ type pendingSeries struct {
|
|||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingSeries(pushBlock func(block []byte), isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
func newPendingSeries(fq *persistentqueue.FastQueue, isVMRemoteWrite bool, significantFigures, roundDigits int) *pendingSeries {
|
||||
var ps pendingSeries
|
||||
ps.wr.pushBlock = pushBlock
|
||||
ps.wr.fq = fq
|
||||
ps.wr.isVMRemoteWrite = isVMRemoteWrite
|
||||
ps.wr.significantFigures = significantFigures
|
||||
ps.wr.roundDigits = roundDigits
|
||||
|
@ -57,10 +57,11 @@ func (ps *pendingSeries) MustStop() {
|
|||
ps.periodicFlusherWG.Wait()
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) Push(tss []prompbmarshal.TimeSeries) {
|
||||
func (ps *pendingSeries) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
ps.mu.Lock()
|
||||
ps.wr.push(tss)
|
||||
ok := ps.wr.tryPush(tss)
|
||||
ps.mu.Unlock()
|
||||
return ok
|
||||
}
|
||||
|
||||
func (ps *pendingSeries) periodicFlusher() {
|
||||
|
@ -70,18 +71,20 @@ func (ps *pendingSeries) periodicFlusher() {
|
|||
}
|
||||
ticker := time.NewTicker(*flushInterval)
|
||||
defer ticker.Stop()
|
||||
mustStop := false
|
||||
for !mustStop {
|
||||
for {
|
||||
select {
|
||||
case <-ps.stopCh:
|
||||
mustStop = true
|
||||
ps.mu.Lock()
|
||||
ps.wr.mustFlushOnStop()
|
||||
ps.mu.Unlock()
|
||||
return
|
||||
case <-ticker.C:
|
||||
if fasttime.UnixTimestamp()-atomic.LoadUint64(&ps.wr.lastFlushTime) < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
ps.mu.Lock()
|
||||
ps.wr.flush()
|
||||
_ = ps.wr.tryFlush()
|
||||
ps.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
@ -90,16 +93,16 @@ type writeRequest struct {
|
|||
// Move lastFlushTime to the top of the struct in order to guarantee atomic access on 32-bit architectures.
|
||||
lastFlushTime uint64
|
||||
|
||||
// pushBlock is called when whe write request is ready to be sent.
|
||||
pushBlock func(block []byte)
|
||||
// The queue to send blocks to.
|
||||
fq *persistentqueue.FastQueue
|
||||
|
||||
// Whether to encode the write request with VictoriaMetrics remote write protocol.
|
||||
isVMRemoteWrite bool
|
||||
|
||||
// How many significant figures must be left before sending the writeRequest to pushBlock.
|
||||
// How many significant figures must be left before sending the writeRequest to fq.
|
||||
significantFigures int
|
||||
|
||||
// How many decimal digits after point must be left before sending the writeRequest to pushBlock.
|
||||
// How many decimal digits after point must be left before sending the writeRequest to fq.
|
||||
roundDigits int
|
||||
|
||||
wr prompbmarshal.WriteRequest
|
||||
|
@ -112,7 +115,7 @@ type writeRequest struct {
|
|||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
// Do not reset lastFlushTime, pushBlock, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
// Do not reset lastFlushTime, fq, isVMRemoteWrite, significantFigures and roundDigits, since they are re-used.
|
||||
|
||||
wr.wr.Timeseries = nil
|
||||
|
||||
|
@ -130,23 +133,40 @@ func (wr *writeRequest) reset() {
|
|||
wr.buf = wr.buf[:0]
|
||||
}
|
||||
|
||||
func (wr *writeRequest) flush() {
|
||||
// mustFlushOnStop force pushes wr data into wr.fq
|
||||
//
|
||||
// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
|
||||
func (wr *writeRequest) mustFlushOnStop() {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
wr.adjustSampleValues()
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
pushWriteRequest(&wr.wr, wr.pushBlock, wr.isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(&wr.wr, wr.mustWriteBlock, wr.isVMRemoteWrite) {
|
||||
logger.Panicf("BUG: final flush must always return true")
|
||||
}
|
||||
wr.reset()
|
||||
}
|
||||
|
||||
func (wr *writeRequest) adjustSampleValues() {
|
||||
samples := wr.samples
|
||||
if n := wr.significantFigures; n > 0 {
|
||||
func (wr *writeRequest) mustWriteBlock(block []byte) bool {
|
||||
wr.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) tryFlush() bool {
|
||||
wr.wr.Timeseries = wr.tss
|
||||
atomic.StoreUint64(&wr.lastFlushTime, fasttime.UnixTimestamp())
|
||||
if !tryPushWriteRequest(&wr.wr, wr.fq.TryWriteBlock, wr.isVMRemoteWrite) {
|
||||
return false
|
||||
}
|
||||
wr.reset()
|
||||
return true
|
||||
}
|
||||
|
||||
func adjustSampleValues(samples []prompbmarshal.Sample, significantFigures, roundDigits int) {
|
||||
if n := significantFigures; n > 0 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToSignificantFigures(s.Value, n)
|
||||
}
|
||||
}
|
||||
if n := wr.roundDigits; n < 100 {
|
||||
if n := roundDigits; n < 100 {
|
||||
for i := range samples {
|
||||
s := &samples[i]
|
||||
s.Value = decimal.RoundToDecimalDigits(s.Value, n)
|
||||
|
@ -154,21 +174,27 @@ func (wr *writeRequest) adjustSampleValues() {
|
|||
}
|
||||
}
|
||||
|
||||
func (wr *writeRequest) push(src []prompbmarshal.TimeSeries) {
|
||||
func (wr *writeRequest) tryPush(src []prompbmarshal.TimeSeries) bool {
|
||||
tssDst := wr.tss
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
for i := range src {
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], &src[i])
|
||||
if len(wr.samples) >= maxSamplesPerBlock || len(wr.labels) >= maxLabelsPerBlock {
|
||||
wr.tss = tssDst
|
||||
wr.flush()
|
||||
if !wr.tryFlush() {
|
||||
return false
|
||||
}
|
||||
tssDst = wr.tss
|
||||
}
|
||||
tsSrc := &src[i]
|
||||
adjustSampleValues(tsSrc.Samples, wr.significantFigures, wr.roundDigits)
|
||||
tssDst = append(tssDst, prompbmarshal.TimeSeries{})
|
||||
wr.copyTimeSeries(&tssDst[len(tssDst)-1], tsSrc)
|
||||
}
|
||||
|
||||
wr.tss = tssDst
|
||||
return true
|
||||
}
|
||||
|
||||
func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
||||
|
@ -196,10 +222,10 @@ func (wr *writeRequest) copyTimeSeries(dst, src *prompbmarshal.TimeSeries) {
|
|||
wr.buf = buf
|
||||
}
|
||||
|
||||
func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byte), isVMRemoteWrite bool) {
|
||||
func tryPushWriteRequest(wr *prompbmarshal.WriteRequest, tryPushBlock func(block []byte) bool, isVMRemoteWrite bool) bool {
|
||||
if len(wr.Timeseries) == 0 {
|
||||
// Nothing to push
|
||||
return
|
||||
return true
|
||||
}
|
||||
bb := writeRequestBufPool.Get()
|
||||
bb.B = prompbmarshal.MarshalWriteRequest(bb.B[:0], wr)
|
||||
|
@ -212,11 +238,13 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
|||
}
|
||||
writeRequestBufPool.Put(bb)
|
||||
if len(zb.B) <= persistentqueue.MaxBlockSize {
|
||||
pushBlock(zb.B)
|
||||
if !tryPushBlock(zb.B) {
|
||||
return false
|
||||
}
|
||||
blockSizeRows.Update(float64(len(wr.Timeseries)))
|
||||
blockSizeBytes.Update(float64(len(zb.B)))
|
||||
snappyBufPool.Put(zb)
|
||||
return
|
||||
return true
|
||||
}
|
||||
snappyBufPool.Put(zb)
|
||||
} else {
|
||||
|
@ -229,23 +257,36 @@ func pushWriteRequest(wr *prompbmarshal.WriteRequest, pushBlock func(block []byt
|
|||
samples := wr.Timeseries[0].Samples
|
||||
if len(samples) == 1 {
|
||||
logger.Warnf("dropping a sample for metric with too long labels exceeding -remoteWrite.maxBlockSize=%d bytes", maxUnpackedBlockSize.N)
|
||||
return
|
||||
return true
|
||||
}
|
||||
n := len(samples) / 2
|
||||
wr.Timeseries[0].Samples = samples[:n]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return false
|
||||
}
|
||||
wr.Timeseries[0].Samples = samples[n:]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return false
|
||||
}
|
||||
wr.Timeseries[0].Samples = samples
|
||||
return
|
||||
return true
|
||||
}
|
||||
timeseries := wr.Timeseries
|
||||
n := len(timeseries) / 2
|
||||
wr.Timeseries = timeseries[:n]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
return false
|
||||
}
|
||||
wr.Timeseries = timeseries[n:]
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if !tryPushWriteRequest(wr, tryPushBlock, isVMRemoteWrite) {
|
||||
wr.Timeseries = timeseries
|
||||
return false
|
||||
}
|
||||
wr.Timeseries = timeseries
|
||||
return true
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
|
@ -26,13 +26,16 @@ func testPushWriteRequest(t *testing.T, rowsCount, expectedBlockLenProm, expecte
|
|||
t.Helper()
|
||||
wr := newTestWriteRequest(rowsCount, 20)
|
||||
pushBlockLen := 0
|
||||
pushBlock := func(block []byte) {
|
||||
pushBlock := func(block []byte) bool {
|
||||
if pushBlockLen > 0 {
|
||||
panic(fmt.Errorf("BUG: pushBlock called multiple times; pushBlockLen=%d at first call, len(block)=%d at second call", pushBlockLen, len(block)))
|
||||
}
|
||||
pushBlockLen = len(block)
|
||||
return true
|
||||
}
|
||||
if !tryPushWriteRequest(wr, pushBlock, isVMRemoteWrite) {
|
||||
t.Fatalf("cannot push data to to remote storage")
|
||||
}
|
||||
pushWriteRequest(wr, pushBlock, isVMRemoteWrite)
|
||||
if math.Abs(float64(pushBlockLen-expectedBlockLen)/float64(expectedBlockLen)*100) > tolerancePrc {
|
||||
t.Fatalf("unexpected block len for rowsCount=%d, isVMRemoteWrite=%v; got %d bytes; expecting %d bytes +- %.0f%%",
|
||||
rowsCount, isVMRemoteWrite, pushBlockLen, expectedBlockLen, tolerancePrc)
|
||||
|
|
|
@ -3,6 +3,7 @@ package remotewrite
|
|||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
|
@ -92,6 +93,7 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro
|
|||
// Nothing to change.
|
||||
return tss
|
||||
}
|
||||
rctx.reset()
|
||||
tssDst := tss[:0]
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
|
@ -120,6 +122,7 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
|
|||
if len(extraLabels) == 0 {
|
||||
return
|
||||
}
|
||||
rctx.reset()
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
|
@ -139,6 +142,34 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL
|
|||
rctx.labels = labels
|
||||
}
|
||||
|
||||
func (rctx *relabelCtx) tenantToLabels(tss []prompbmarshal.TimeSeries, accountID, projectID uint32) {
|
||||
rctx.reset()
|
||||
accountIDStr := strconv.FormatUint(uint64(accountID), 10)
|
||||
projectIDStr := strconv.FormatUint(uint64(projectID), 10)
|
||||
labels := rctx.labels[:0]
|
||||
for i := range tss {
|
||||
ts := &tss[i]
|
||||
labelsLen := len(labels)
|
||||
for _, label := range ts.Labels {
|
||||
labelName := label.Name
|
||||
if labelName == "vm_account_id" || labelName == "vm_project_id" {
|
||||
continue
|
||||
}
|
||||
labels = append(labels, label)
|
||||
}
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "vm_account_id",
|
||||
Value: accountIDStr,
|
||||
})
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: "vm_project_id",
|
||||
Value: projectIDStr,
|
||||
})
|
||||
ts.Labels = labels[labelsLen:]
|
||||
}
|
||||
rctx.labels = labels
|
||||
}
|
||||
|
||||
type relabelCtx struct {
|
||||
// pool for labels, which are used during the relabeling.
|
||||
labels []prompbmarshal.Label
|
||||
|
@ -160,7 +191,7 @@ func getRelabelCtx() *relabelCtx {
|
|||
}
|
||||
|
||||
func putRelabelCtx(rctx *relabelCtx) {
|
||||
rctx.labels = rctx.labels[:0]
|
||||
rctx.reset()
|
||||
relabelCtxPool.Put(rctx)
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@ package remotewrite
|
|||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
@ -10,6 +11,8 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
|
@ -34,18 +37,22 @@ var (
|
|||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+
|
||||
"or Prometheus remote_write protocol. Example url: http://<victoriametrics-host>:8428/api/v1/write . "+
|
||||
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. "+
|
||||
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set. "+
|
||||
"See also -remoteWrite.multitenantURL")
|
||||
"The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set")
|
||||
remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://<vminsert>:8480 . "+
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url")
|
||||
"Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. "+
|
||||
"This flag is deprecated in favor of -enableMultitenantHandlers . See https://docs.victoriametrics.com/vmagent.html#multitenancy")
|
||||
enableMultitenantHandlers = flag.Bool("enableMultitenantHandlers", false, "Whether to process incoming data via multitenant insert handlers according to "+
|
||||
"https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format . By default incoming data is processed via single-node insert handlers "+
|
||||
"according to https://docs.victoriametrics.com/#how-to-import-time-series-data ."+
|
||||
"See https://docs.victoriametrics.com/vmagent.html#multitenancy for details")
|
||||
shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+
|
||||
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
|
||||
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
|
||||
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
|
||||
"even distribution of series over the specified -remoteWrite.url systems")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory where temporary data for remote write component is stored. "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
|
@ -84,6 +91,11 @@ var (
|
|||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/stream-aggregation.html")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before being aggregated. "+
|
||||
"Only the last sample per each time series per each interval is aggregated if the interval is greater than zero")
|
||||
disableOnDiskQueue = flag.Bool("remoteWrite.disableOnDiskQueue", false, "Whether to disable storing pending data to -remoteWrite.tmpDataPath "+
|
||||
"when the configured remote storage systems cannot keep up with the data ingestion rate. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence ."+
|
||||
"See also -remoteWrite.dropSamplesOnOverload")
|
||||
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
|
||||
"cannot be pushed into the configured remote storage systems in a timely manner. See https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence")
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -96,11 +108,19 @@ var (
|
|||
|
||||
// Data without tenant id is written to defaultAuthToken if -remoteWrite.multitenantURL is specified.
|
||||
defaultAuthToken = &auth.Token{}
|
||||
|
||||
// ErrQueueFullHTTPRetry must be returned when TryPush() returns false.
|
||||
ErrQueueFullHTTPRetry = &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf("remote storage systems cannot keep up with the data ingestion rate; retry the request later " +
|
||||
"or remove -remoteWrite.disableOnDiskQueue from vmagent command-line flags, so it could save pending data to -remoteWrite.tmpDataPath; " +
|
||||
"see https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence"),
|
||||
StatusCode: http.StatusTooManyRequests,
|
||||
}
|
||||
)
|
||||
|
||||
// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified.
|
||||
// MultitenancyEnabled returns true if -enableMultitenantHandlers or -remoteWrite.multitenantURL is specified.
|
||||
func MultitenancyEnabled() bool {
|
||||
return len(*remoteWriteMultitenantURLs) > 0
|
||||
return *enableMultitenantHandlers || len(*remoteWriteMultitenantURLs) > 0
|
||||
}
|
||||
|
||||
// Contains the current relabelConfigs.
|
||||
|
@ -183,6 +203,7 @@ func Init() {
|
|||
if len(*remoteWriteURLs) > 0 {
|
||||
rwctxsDefault = newRemoteWriteCtxs(nil, *remoteWriteURLs)
|
||||
}
|
||||
dropDanglingQueues()
|
||||
|
||||
// Start config reloader.
|
||||
configReloaderWG.Add(1)
|
||||
|
@ -200,6 +221,42 @@ func Init() {
|
|||
}()
|
||||
}
|
||||
|
||||
func dropDanglingQueues() {
|
||||
if *keepDanglingQueues {
|
||||
return
|
||||
}
|
||||
if len(*remoteWriteMultitenantURLs) > 0 {
|
||||
// Do not drop dangling queues for *remoteWriteMultitenantURLs, since it is impossible to determine
|
||||
// unused queues for multitenant urls - they are created on demand when new sample for the given
|
||||
// tenant is pushed to remote storage.
|
||||
return
|
||||
}
|
||||
// Remove dangling persistent queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
//
|
||||
existingQueues := make(map[string]struct{}, len(rwctxsDefault))
|
||||
for _, rwctx := range rwctxsDefault {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxsDefault))
|
||||
}
|
||||
}
|
||||
|
||||
func reloadRelabelConfigs() {
|
||||
relabelConfigReloads.Inc()
|
||||
logger.Infof("reloading relabel configs pointed by -remoteWrite.relabelConfig and -remoteWrite.urlRelabelConfig")
|
||||
|
@ -273,33 +330,6 @@ func newRemoteWriteCtxs(at *auth.Token, urls []string) []*remoteWriteCtx {
|
|||
}
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
}
|
||||
|
||||
if !*keepDanglingQueues {
|
||||
// Remove dangling queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
existingQueues := make(map[string]struct{}, len(rwctxs))
|
||||
for _, rwctx := range rwctxs {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxs))
|
||||
}
|
||||
}
|
||||
|
||||
return rwctxs
|
||||
}
|
||||
|
||||
|
@ -308,7 +338,7 @@ var configReloaderWG sync.WaitGroup
|
|||
|
||||
// Stop stops remotewrite.
|
||||
//
|
||||
// It is expected that nobody calls Push during and after the call to this func.
|
||||
// It is expected that nobody calls TryPush during and after the call to this func.
|
||||
func Stop() {
|
||||
close(configReloaderStopCh)
|
||||
configReloaderWG.Wait()
|
||||
|
@ -318,7 +348,7 @@ func Stop() {
|
|||
}
|
||||
rwctxsDefault = nil
|
||||
|
||||
// There is no need in locking rwctxsMapLock here, since nobody should call Push during the Stop call.
|
||||
// There is no need in locking rwctxsMapLock here, since nobody should call TryPush during the Stop call.
|
||||
for _, rwctxs := range rwctxsMap {
|
||||
for _, rwctx := range rwctxs {
|
||||
rwctx.MustStop()
|
||||
|
@ -334,24 +364,47 @@ func Stop() {
|
|||
}
|
||||
}
|
||||
|
||||
// Push sends wr to remote storage systems set via `-remoteWrite.url`.
|
||||
// PushDropSamplesOnFailure pushes wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured `-remoteWrite.url`.
|
||||
// If at isn't nil, the data is pushed to the configured `-remoteWrite.multitenantURL`.
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
//
|
||||
// Note that wr may be modified by Push because of relabeling and rounding.
|
||||
func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
if at == nil && len(*remoteWriteMultitenantURLs) > 0 {
|
||||
// Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set.
|
||||
// PushDropSamplesOnFailure can modify wr contents.
|
||||
func PushDropSamplesOnFailure(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||
_ = tryPush(at, wr, true)
|
||||
}
|
||||
|
||||
// TryPush tries sending wr to the configured remote storage systems set via -remoteWrite.url and -remoteWrite.multitenantURL
|
||||
//
|
||||
// If at is nil, then the data is pushed to the configured -remoteWrite.url.
|
||||
// If at isn't nil, the data is pushed to the configured -remoteWrite.multitenantURL.
|
||||
//
|
||||
// TryPush can modify wr contents, so the caller must re-initialize wr before calling TryPush() after unsuccessful attempt.
|
||||
// TryPush may send partial data from wr on unsuccessful attempt, so repeated call for the same wr may send the data multiple times.
|
||||
//
|
||||
// The caller must return ErrQueueFullHTTPRetry to the client, which sends wr, if TryPush returns false.
|
||||
func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool {
|
||||
return tryPush(at, wr, *dropSamplesOnOverload)
|
||||
}
|
||||
|
||||
func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool {
|
||||
tss := wr.Timeseries
|
||||
|
||||
if at == nil && MultitenancyEnabled() {
|
||||
// Write data to default tenant if at isn't set when multitenancy is enabled.
|
||||
at = defaultAuthToken
|
||||
}
|
||||
|
||||
var tenantRctx *relabelCtx
|
||||
var rwctxs []*remoteWriteCtx
|
||||
if at == nil {
|
||||
rwctxs = rwctxsDefault
|
||||
} else if len(*remoteWriteMultitenantURLs) == 0 {
|
||||
// Convert at to (vm_account_id, vm_project_id) labels.
|
||||
tenantRctx = getRelabelCtx()
|
||||
defer putRelabelCtx(tenantRctx)
|
||||
rwctxs = rwctxsDefault
|
||||
} else {
|
||||
if len(*remoteWriteMultitenantURLs) == 0 {
|
||||
logger.Panicf("BUG: -remoteWrite.multitenantURL command-line flag must be set when __tenant_id__=%q label is set", at)
|
||||
}
|
||||
rwctxsMapLock.Lock()
|
||||
tenantID := tenantmetrics.TenantID{
|
||||
AccountID: at.AccountID,
|
||||
|
@ -365,18 +418,37 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
|||
rwctxsMapLock.Unlock()
|
||||
}
|
||||
|
||||
rowsCount := getRowsCount(tss)
|
||||
|
||||
if *disableOnDiskQueue {
|
||||
// Quick check whether writes to configured remote storage systems are blocked.
|
||||
// This allows saving CPU time spent on relabeling and block compression
|
||||
// if some of remote storage systems cannot keep up with the data ingestion rate.
|
||||
for _, rwctx := range rwctxs {
|
||||
if rwctx.fq.IsWriteBlocked() {
|
||||
pushFailures.Inc()
|
||||
if dropSamplesOnFailure {
|
||||
// Just drop samples
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var rctx *relabelCtx
|
||||
rcs := allRelabelConfigs.Load()
|
||||
pcsGlobal := rcs.global
|
||||
if pcsGlobal.Len() > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
defer putRelabelCtx(rctx)
|
||||
}
|
||||
tss := wr.Timeseries
|
||||
rowsCount := getRowsCount(tss)
|
||||
globalRowsPushedBeforeRelabel.Add(rowsCount)
|
||||
maxSamplesPerBlock := *maxRowsPerBlock
|
||||
// Allow up to 10x of labels per each block on average.
|
||||
maxLabelsPerBlock := 10 * maxSamplesPerBlock
|
||||
|
||||
for len(tss) > 0 {
|
||||
// Process big tss in smaller blocks in order to reduce the maximum memory usage
|
||||
samplesCount := 0
|
||||
|
@ -384,7 +456,7 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
|||
i := 0
|
||||
for i < len(tss) {
|
||||
samplesCount += len(tss[i].Samples)
|
||||
labelsCount += len(tss[i].Labels)
|
||||
labelsCount += len(tss[i].Samples) * len(tss[i].Labels)
|
||||
i++
|
||||
if samplesCount >= maxSamplesPerBlock || labelsCount >= maxLabelsPerBlock {
|
||||
break
|
||||
|
@ -397,6 +469,9 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
|||
} else {
|
||||
tss = nil
|
||||
}
|
||||
if tenantRctx != nil {
|
||||
tenantRctx.tenantToLabels(tssBlock, at.AccountID, at.ProjectID)
|
||||
}
|
||||
if rctx != nil {
|
||||
rowsCountBeforeRelabel := getRowsCount(tssBlock)
|
||||
tssBlock = rctx.applyRelabeling(tssBlock, pcsGlobal)
|
||||
|
@ -405,25 +480,35 @@ func Push(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
|||
}
|
||||
sortLabelsIfNeeded(tssBlock)
|
||||
tssBlock = limitSeriesCardinality(tssBlock)
|
||||
pushBlockToRemoteStorages(rwctxs, tssBlock)
|
||||
if rctx != nil {
|
||||
rctx.reset()
|
||||
if !tryPushBlockToRemoteStorages(rwctxs, tssBlock) {
|
||||
if !*disableOnDiskQueue {
|
||||
logger.Panicf("BUG: tryPushBlockToRemoteStorages must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if dropSamplesOnFailure {
|
||||
samplesDropped.Add(rowsCount)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
if rctx != nil {
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) {
|
||||
var (
|
||||
samplesDropped = metrics.NewCounter(`vmagent_remotewrite_samples_dropped_total`)
|
||||
pushFailures = metrics.NewCounter(`vmagent_remotewrite_push_failures_total`)
|
||||
)
|
||||
|
||||
func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarshal.TimeSeries) bool {
|
||||
if len(tssBlock) == 0 {
|
||||
// Nothing to push
|
||||
return
|
||||
return true
|
||||
}
|
||||
|
||||
if len(rwctxs) == 1 {
|
||||
// Fast path - just push data to the configured single remote storage
|
||||
rwctxs[0].Push(tssBlock)
|
||||
return
|
||||
return rwctxs[0].TryPush(tssBlock)
|
||||
}
|
||||
|
||||
// We need to push tssBlock to multiple remote storages.
|
||||
|
@ -452,6 +537,7 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
|||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed uint64
|
||||
for i, rwctx := range rwctxs {
|
||||
tssShard := tssByURL[i]
|
||||
if len(tssShard) == 0 {
|
||||
|
@ -459,11 +545,13 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
|||
}
|
||||
go func(rwctx *remoteWriteCtx, tss []prompbmarshal.TimeSeries) {
|
||||
defer wg.Done()
|
||||
rwctx.Push(tss)
|
||||
if !rwctx.TryPush(tss) {
|
||||
atomic.StoreUint64(&anyPushFailed, 1)
|
||||
}
|
||||
}(rwctx, tssShard)
|
||||
}
|
||||
wg.Wait()
|
||||
return
|
||||
return atomic.LoadUint64(&anyPushFailed) == 0
|
||||
}
|
||||
|
||||
// Replicate data among rwctxs.
|
||||
|
@ -471,13 +559,17 @@ func pushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmarsha
|
|||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(rwctxs))
|
||||
var anyPushFailed uint64
|
||||
for _, rwctx := range rwctxs {
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
rwctx.Push(tssBlock)
|
||||
if !rwctx.TryPush(tssBlock) {
|
||||
atomic.StoreUint64(&anyPushFailed, 1)
|
||||
}
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
return atomic.LoadUint64(&anyPushFailed) == 0
|
||||
}
|
||||
|
||||
// sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
|
||||
|
@ -597,13 +689,19 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
|||
logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
|
||||
maxPendingBytes = persistentqueue.DefaultChunkFileSize
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes)
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, *disableOnDiskQueue)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queue_blocked{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
if fq.IsWriteBlocked() {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
|
@ -625,7 +723,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
|||
}
|
||||
pss := make([]*pendingSeries, pssLen)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq.MustWriteBlock, c.useVMProto, sf, rd)
|
||||
pss[i] = newPendingSeries(fq, c.useVMProto, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
|
@ -642,7 +740,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks in
|
|||
sasFile := streamAggrConfig.GetOptionalArg(argIdx)
|
||||
if sasFile != "" {
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(argIdx)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
sas, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize stream aggregators from -remoteWrite.streamAggr.config=%q: %s", sasFile, err)
|
||||
}
|
||||
|
@ -678,7 +776,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
|||
rwctx.rowsDroppedByRelabel = nil
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
||||
func (rwctx *remoteWriteCtx) TryPush(tss []prompbmarshal.TimeSeries) bool {
|
||||
// Apply relabeling
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
|
@ -716,7 +814,9 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
|||
}
|
||||
matchIdxsPool.Put(matchIdxs)
|
||||
}
|
||||
rwctx.pushInternal(tss)
|
||||
|
||||
// Try pushing the data to remote storage
|
||||
ok := rwctx.tryPushInternal(tss)
|
||||
|
||||
// Return back relabeling contexts to the pool
|
||||
if rctx != nil {
|
||||
|
@ -724,6 +824,8 @@ func (rwctx *remoteWriteCtx) Push(tss []prompbmarshal.TimeSeries) {
|
|||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
var matchIdxsPool bytesutil.ByteBufferPool
|
||||
|
@ -743,7 +845,21 @@ func dropAggregatedSeries(src []prompbmarshal.TimeSeries, matchIdxs []byte, drop
|
|||
return dst
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
||||
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompbmarshal.TimeSeries) {
|
||||
if rwctx.tryPushInternal(tss) {
|
||||
return
|
||||
}
|
||||
if !*disableOnDiskQueue {
|
||||
logger.Panicf("BUG: tryPushInternal must return true if -remoteWrite.disableOnDiskQueue isn't set")
|
||||
}
|
||||
pushFailures.Inc()
|
||||
if *dropSamplesOnOverload {
|
||||
rowsCount := getRowsCount(tss)
|
||||
samplesDropped.Add(rowsCount)
|
||||
}
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) tryPushInternal(tss []prompbmarshal.TimeSeries) bool {
|
||||
var rctx *relabelCtx
|
||||
var v *[]prompbmarshal.TimeSeries
|
||||
if len(labelsGlobal) > 0 {
|
||||
|
@ -757,13 +873,16 @@ func (rwctx *remoteWriteCtx) pushInternal(tss []prompbmarshal.TimeSeries) {
|
|||
|
||||
pss := rwctx.pss
|
||||
idx := atomic.AddUint64(&rwctx.pssNextIdx, 1) % uint64(len(pss))
|
||||
pss[idx].Push(tss)
|
||||
|
||||
ok := pss[idx].TryPush(tss)
|
||||
|
||||
if rctx != nil {
|
||||
*v = prompbmarshal.ResetTimeSeries(tss)
|
||||
tssPool.Put(v)
|
||||
putRelabelCtx(rctx)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
||||
|
@ -776,7 +895,7 @@ func (rwctx *remoteWriteCtx) reinitStreamAggr() {
|
|||
logger.Infof("reloading stream aggregation configs pointed by -remoteWrite.streamAggr.config=%q", sasFile)
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_total{path=%q}`, sasFile)).Inc()
|
||||
dedupInterval := streamAggrDedupInterval.GetOptionalArg(rwctx.idx)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternal, dedupInterval)
|
||||
sasNew, err := streamaggr.LoadFromFile(sasFile, rwctx.pushInternalTrackDropped, dedupInterval)
|
||||
if err != nil {
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reloads_errors_total{path=%q}`, sasFile)).Inc()
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_streamaggr_config_reload_successful{path=%q}`, sasFile)).Set(0)
|
||||
|
|
|
@ -76,7 +76,9 @@ func insertRows(at *auth.Token, rows []parser.Row, extraLabels []prompbmarshal.L
|
|||
ctx.WriteRequest.Timeseries = tssDst
|
||||
ctx.Labels = labels
|
||||
ctx.Samples = samples
|
||||
remotewrite.Push(at, &ctx.WriteRequest)
|
||||
if !remotewrite.TryPush(at, &ctx.WriteRequest) {
|
||||
return remotewrite.ErrQueueFullHTTPRetry
|
||||
}
|
||||
rowsInserted.Add(rowsTotal)
|
||||
if at != nil {
|
||||
rowsTenantInserted.Get(at).Add(rowsTotal)
|
||||
|
|
|
@ -1,246 +1,3 @@
|
|||
See vmalert-tool docs [here](https://docs.victoriametrics.com/vmalert-tool.html).
|
||||
|
||||
# vmalert-tool
|
||||
|
||||
VMAlert command-line tool
|
||||
|
||||
## Unit testing for rules
|
||||
|
||||
You can use `vmalert-tool` to run unit tests for alerting and recording rules.
|
||||
It will perform the following actions:
|
||||
* sets up an isolated VictoriaMetrics instance;
|
||||
* simulates the periodic ingestion of time series;
|
||||
* queries the ingested data for recording and alerting rules evaluation like [vmalert](https://docs.victoriametrics.com/vmalert.html);
|
||||
* checks whether the firing alerts or resulting recording rules match the expected results.
|
||||
|
||||
See how to run vmalert-tool for unit test below:
|
||||
|
||||
```
|
||||
# Run vmalert-tool with one or multiple test files via --files cmd-line flag
|
||||
./vmalert-tool unittest --files test1.yaml --files test2.yaml
|
||||
```
|
||||
|
||||
vmalert-tool unittest is compatible with [Prometheus config format for tests](https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-file-format)
|
||||
except `promql_expr_test` field. Use `metricsql_expr_test` field name instead. The name is different because vmalert-tool
|
||||
validates and executes [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html) expressions,
|
||||
which aren't always backward compatible with [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/).
|
||||
|
||||
### Test file format
|
||||
|
||||
The configuration format for files specified in `--files` cmd-line flag is the following:
|
||||
|
||||
```yaml
|
||||
# Path to the files or http url containing [rule groups](https://docs.victoriametrics.com/vmalert.html#groups) configuration.
|
||||
# Enterprise version of vmalert-tool supports S3 and GCS paths to rules.
|
||||
rule_files:
|
||||
[ - <string> ]
|
||||
|
||||
# The evaluation interval for rules specified in `rule_files`
|
||||
[ evaluation_interval: <duration> | default = 1m ]
|
||||
|
||||
# Groups listed below will be evaluated by order.
|
||||
# Not All the groups need not be mentioned, if not, they will be evaluated by define order in rule_files.
|
||||
group_eval_order:
|
||||
[ - <string> ]
|
||||
|
||||
# The list of unit test files to be checked during evaluation.
|
||||
tests:
|
||||
[ - <test_group> ]
|
||||
```
|
||||
|
||||
#### `<test_group>`
|
||||
|
||||
```yaml
|
||||
# Interval between samples for input series
|
||||
interval: <duration>
|
||||
# Time series to persist into the database according to configured <interval> before running tests.
|
||||
input_series:
|
||||
[ - <series> ]
|
||||
|
||||
# Name of the test group, optional
|
||||
[ name: <string> ]
|
||||
|
||||
# Unit tests for alerting rules
|
||||
alert_rule_test:
|
||||
[ - <alert_test_case> ]
|
||||
|
||||
# Unit tests for Metricsql expressions.
|
||||
metricsql_expr_test:
|
||||
[ - <metricsql_expr_test> ]
|
||||
|
||||
# External labels accessible for templating.
|
||||
external_labels:
|
||||
[ <labelname>: <string> ... ]
|
||||
|
||||
```
|
||||
|
||||
#### `<series>`
|
||||
|
||||
```yaml
|
||||
# series in the following format '<metric name>{<label name>=<label value>, ...}'
|
||||
# Examples:
|
||||
# series_name{label1="value1", label2="value2"}
|
||||
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||
series: <string>
|
||||
|
||||
# values support several special equations:
|
||||
# 'a+bxc' becomes 'a a+b a+(2*b) a+(3*b) … a+(c*b)'
|
||||
# Read this as series starts at a, then c further samples incrementing by b.
|
||||
# 'a-bxc' becomes 'a a-b a-(2*b) a-(3*b) … a-(c*b)'
|
||||
# Read this as series starts at a, then c further samples decrementing by b (or incrementing by negative b).
|
||||
# '_' represents a missing sample from scrape
|
||||
# 'stale' indicates a stale sample
|
||||
# Examples:
|
||||
# 1. '-2+4x3' becomes '-2 2 6 10' - series starts at -2, then 3 further samples incrementing by 4.
|
||||
# 2. ' 1-2x4' becomes '1 -1 -3 -5 -7' - series starts at 1, then 4 further samples decrementing by 2.
|
||||
# 3. ' 1x4' becomes '1 1 1 1 1' - shorthand for '1+0x4', series starts at 1, then 4 further samples incrementing by 0.
|
||||
# 4. ' 1 _x3 stale' becomes '1 _ _ _ stale' - the missing sample cannot increment, so 3 missing samples are produced by the '_x3' expression.
|
||||
values: <string>
|
||||
```
|
||||
|
||||
#### `<alert_test_case>`
|
||||
|
||||
vmalert by default adds `alertgroup` and `alertname` to the generated alerts and time series.
|
||||
So you will need to specify both `groupname` and `alertname` under a single `<alert_test_case>`,
|
||||
but no need to add them under `exp_alerts`.
|
||||
You can also pass `--disableAlertgroupLabel` to skip `alertgroup` check.
|
||||
|
||||
```yaml
|
||||
# The time elapsed from time=0s when this alerting rule should be checked.
|
||||
# Means this rule should be firing at this point, or shouldn't be firing if 'exp_alerts' is empty.
|
||||
eval_time: <duration>
|
||||
|
||||
# Name of the group name to be tested.
|
||||
groupname: <string>
|
||||
|
||||
# Name of the alert to be tested.
|
||||
alertname: <string>
|
||||
|
||||
# List of the expected alerts that are firing under the given alertname at
|
||||
# the given evaluation time. If you want to test if an alerting rule should
|
||||
# not be firing, then you can mention only the fields above and leave 'exp_alerts' empty.
|
||||
exp_alerts:
|
||||
[ - <alert> ]
|
||||
```
|
||||
|
||||
#### `<alert>`
|
||||
|
||||
```yaml
|
||||
# These are the expanded labels and annotations of the expected alert.
|
||||
# Note: labels also include the labels of the sample associated with the alert
|
||||
exp_labels:
|
||||
[ <labelname>: <string> ]
|
||||
exp_annotations:
|
||||
[ <labelname>: <string> ]
|
||||
```
|
||||
|
||||
#### `<metricsql_expr_test>`
|
||||
|
||||
```yaml
|
||||
# Expression to evaluate
|
||||
expr: <string>
|
||||
|
||||
# The time elapsed from time=0s when this expression be evaluated.
|
||||
eval_time: <duration>
|
||||
|
||||
# Expected samples at the given evaluation time.
|
||||
exp_samples:
|
||||
[ - <sample> ]
|
||||
```
|
||||
|
||||
#### `<sample>`
|
||||
|
||||
```yaml
|
||||
# Labels of the sample in usual series notation '<metric name>{<label name>=<label value>, ...}'
|
||||
# Examples:
|
||||
# series_name{label1="value1", label2="value2"}
|
||||
# go_goroutines{job="prometheus", instance="localhost:9090"}
|
||||
labels: <string>
|
||||
|
||||
# The expected value of the Metricsql expression.
|
||||
value: <number>
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
This is an example input file for unit testing which will pass.
|
||||
`test.yaml` is the test file which follows the syntax above and `alerts.yaml` contains the alerting rules.
|
||||
|
||||
With `rules.yaml` in the same directory, run `./vmalert-tool unittest --files=./unittest/testdata/test.yaml`.
|
||||
|
||||
#### `test.yaml`
|
||||
|
||||
```yaml
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
evaluation_interval: 1m
|
||||
|
||||
tests:
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'up{job="prometheus", instance="localhost:9090"}'
|
||||
values: "0+0x1440"
|
||||
|
||||
metricsql_expr_test:
|
||||
- expr: suquery_interval_test
|
||||
eval_time: 4m
|
||||
exp_samples:
|
||||
- labels: '{__name__="suquery_interval_test", datacenter="dc-123", instance="localhost:9090", job="prometheus"}'
|
||||
value: 1
|
||||
|
||||
alert_rule_test:
|
||||
- eval_time: 2h
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
job: prometheus
|
||||
severity: page
|
||||
instance: localhost:9090
|
||||
datacenter: dc-123
|
||||
exp_annotations:
|
||||
summary: "Instance localhost:9090 down"
|
||||
description: "localhost:9090 of job prometheus has been down for more than 5 minutes."
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: AlwaysFiring
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
datacenter: dc-123
|
||||
|
||||
- eval_time: 0
|
||||
groupname: group1
|
||||
alertname: InstanceDown
|
||||
exp_alerts: []
|
||||
|
||||
external_labels:
|
||||
datacenter: dc-123
|
||||
```
|
||||
|
||||
#### `alerts.yaml`
|
||||
|
||||
```yaml
|
||||
# This is the rules file.
|
||||
|
||||
groups:
|
||||
- name: group1
|
||||
rules:
|
||||
- alert: InstanceDown
|
||||
expr: up == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
|
||||
- alert: AlwaysFiring
|
||||
expr: 1
|
||||
|
||||
- name: group2
|
||||
rules:
|
||||
- record: job:test:count_over_time1m
|
||||
expr: sum without(instance) (count_over_time(test[1m]))
|
||||
- record: suquery_interval_test
|
||||
expr: count_over_time(up[5m:])
|
||||
```
|
||||
vmalert-tool docs can be edited at [docs/vmalert-tool.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmalert-tool.md).
|
||||
|
|
|
@ -21,6 +21,11 @@ tests:
|
|||
groupname: group2
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts: []
|
||||
- eval_time: 150s
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
exp_alerts:
|
||||
- {}
|
||||
- eval_time: 6m
|
||||
groupname: group1
|
||||
alertname: SameAlertNameWithDifferentGroup
|
||||
|
|
|
@ -336,7 +336,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
|||
if disableAlertgroupLabel {
|
||||
g.Name = ""
|
||||
}
|
||||
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name]; !ok {
|
||||
if _, ok := alertExpResultMap[alertEvalTimes[evalIndex]][g.Name]; !ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := gotAlertsMap[g.Name]; !ok {
|
||||
|
@ -347,7 +347,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
|||
if !isAlertRule {
|
||||
continue
|
||||
}
|
||||
if _, ok := alertExpResultMap[time.Duration(ts.UnixNano())][g.Name][ar.Name]; ok {
|
||||
if _, ok := alertExpResultMap[alertEvalTimes[evalIndex]][g.Name][ar.Name]; ok {
|
||||
for _, got := range ar.GetAlerts() {
|
||||
if got.State != notifier.StateFiring {
|
||||
continue
|
||||
|
|
|
@ -59,7 +59,7 @@ absolute path to all .tpl files in root.
|
|||
configCheckInterval = flag.Duration("configCheckInterval", 0, "Interval for checking for changes in '-rule' or '-notifier.config' files. "+
|
||||
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes.")
|
||||
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8880", "Address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
|
|
|
@ -191,7 +191,7 @@ func (a Alert) toPromLabels(relabelCfg *promrelabel.ParsedConfigs) []prompbmarsh
|
|||
var labels []prompbmarshal.Label
|
||||
for k, v := range a.Labels {
|
||||
labels = append(labels, prompbmarshal.Label{
|
||||
Name: k,
|
||||
Name: promrelabel.SanitizeMetricName(k),
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -237,6 +237,11 @@ func TestAlert_toPromLabels(t *testing.T) {
|
|||
[]prompbmarshal.Label{{Name: "a", Value: "baz"}, {Name: "foo", Value: "bar"}},
|
||||
nil,
|
||||
)
|
||||
fn(
|
||||
map[string]string{"foo.bar": "baz", "service!name": "qux"},
|
||||
[]prompbmarshal.Label{{Name: "foo_bar", Value: "baz"}, {Name: "service_name", Value: "qux"}},
|
||||
nil,
|
||||
)
|
||||
|
||||
pcs, err := promrelabel.ParseRelabelConfigsData([]byte(`
|
||||
- target_label: "foo"
|
||||
|
|
|
@ -23,7 +23,7 @@ var (
|
|||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
blackHole = flag.Bool("notifier.blackhole", false, "Whether to blackhole alerting notifications. "+
|
||||
"Enable this flag if you want vmalert to evaluate alerting rules without sending any notifications to external receivers (eg. alertmanager). "+
|
||||
"`-notifier.url`, `-notifier.config` and `-notifier.blackhole` are mutually exclusive.")
|
||||
"-notifier.url, -notifier.config and -notifier.blackhole are mutually exclusive.")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("notifier.basicAuth.username", "Optional basic auth username for -notifier.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("notifier.basicAuth.password", "Optional basic auth password for -notifier.url")
|
||||
|
|
|
@ -30,6 +30,7 @@ type AlertingRule struct {
|
|||
Annotations map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
File string
|
||||
EvalInterval time.Duration
|
||||
Debug bool
|
||||
|
||||
|
@ -47,7 +48,7 @@ type AlertingRule struct {
|
|||
}
|
||||
|
||||
type alertingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
errors *utils.Counter
|
||||
pending *utils.Gauge
|
||||
active *utils.Gauge
|
||||
samples *utils.Gauge
|
||||
|
@ -67,6 +68,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
|||
Annotations: cfg.Annotations,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
File: group.File,
|
||||
EvalInterval: group.Interval,
|
||||
Debug: cfg.Debug,
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
|
@ -116,14 +118,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule
|
|||
}
|
||||
return float64(num)
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
if e.Err == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
ar.metrics.errors = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{%s}`, labels))
|
||||
ar.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerting_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := ar.state.getLast()
|
||||
|
@ -383,6 +378,9 @@ func (ar *AlertingRule) exec(ctx context.Context, ts time.Time, limit int) ([]pr
|
|||
|
||||
defer func() {
|
||||
ar.state.add(curState)
|
||||
if curState.Err != nil {
|
||||
ar.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
ar.alertsMu.Lock()
|
||||
|
|
|
@ -3,6 +3,7 @@ package rule
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
|
@ -13,6 +14,7 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
@ -1078,6 +1080,9 @@ func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
|||
EvalInterval: waitFor,
|
||||
alerts: make(map[uint64]*notifier.Alert),
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
metrics: &alertingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{alertname=%q}`, name)),
|
||||
},
|
||||
}
|
||||
return &rule
|
||||
}
|
||||
|
|
|
@ -17,12 +17,14 @@ import (
|
|||
// to evaluate configured Expression and
|
||||
// return TimeSeries as result.
|
||||
type RecordingRule struct {
|
||||
Type config.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
Labels map[string]string
|
||||
GroupID uint64
|
||||
Type config.Type
|
||||
RuleID uint64
|
||||
Name string
|
||||
Expr string
|
||||
Labels map[string]string
|
||||
GroupID uint64
|
||||
GroupName string
|
||||
File string
|
||||
|
||||
q datasource.Querier
|
||||
|
||||
|
@ -34,7 +36,7 @@ type RecordingRule struct {
|
|||
}
|
||||
|
||||
type recordingRuleMetrics struct {
|
||||
errors *utils.Gauge
|
||||
errors *utils.Counter
|
||||
samples *utils.Gauge
|
||||
}
|
||||
|
||||
|
@ -52,13 +54,15 @@ func (rr *RecordingRule) ID() uint64 {
|
|||
// NewRecordingRule creates a new RecordingRule
|
||||
func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule) *RecordingRule {
|
||||
rr := &RecordingRule{
|
||||
Type: group.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
Labels: cfg.Labels,
|
||||
GroupID: group.ID(),
|
||||
metrics: &recordingRuleMetrics{},
|
||||
Type: group.Type,
|
||||
RuleID: cfg.ID,
|
||||
Name: cfg.Record,
|
||||
Expr: cfg.Expr,
|
||||
Labels: cfg.Labels,
|
||||
GroupID: group.ID(),
|
||||
GroupName: group.Name,
|
||||
File: group.File,
|
||||
metrics: &recordingRuleMetrics{},
|
||||
q: qb.BuildWithParams(datasource.QuerierParams{
|
||||
DataSourceType: group.Type.String(),
|
||||
EvaluationInterval: group.Interval,
|
||||
|
@ -79,14 +83,7 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul
|
|||
}
|
||||
|
||||
labels := fmt.Sprintf(`recording=%q, group=%q, file=%q, id="%d"`, rr.Name, group.Name, group.File, rr.ID())
|
||||
rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
if e.Err == nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
rr.metrics.errors = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_recording_rules_errors_total{%s}`, labels))
|
||||
rr.metrics.samples = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_last_evaluation_samples{%s}`, labels),
|
||||
func() float64 {
|
||||
e := rr.state.getLast()
|
||||
|
@ -138,6 +135,9 @@ func (rr *RecordingRule) exec(ctx context.Context, ts time.Time, limit int) ([]p
|
|||
|
||||
defer func() {
|
||||
rr.state.add(curState)
|
||||
if curState.Err != nil {
|
||||
rr.metrics.errors.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
|
@ -201,9 +202,15 @@ func TestRecordingRuleLimit(t *testing.T) {
|
|||
metricWithValuesAndLabels(t, []float64{2, 3}, "__name__", "bar", "job", "bar"),
|
||||
metricWithValuesAndLabels(t, []float64{4, 5, 6}, "__name__", "baz", "job", "baz"),
|
||||
}
|
||||
rule := &RecordingRule{Name: "job:foo", state: &ruleState{entries: make([]StateEntry, 10)}, Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
}}
|
||||
rule := &RecordingRule{Name: "job:foo",
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
Labels: map[string]string{
|
||||
"source": "test_limit",
|
||||
},
|
||||
metrics: &recordingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(`vmalert_recording_rules_errors_total{alertname="job:foo"}`),
|
||||
},
|
||||
}
|
||||
var err error
|
||||
for _, testCase := range testCases {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
|
@ -223,6 +230,9 @@ func TestRecordingRule_ExecNegative(t *testing.T) {
|
|||
"job": "test",
|
||||
},
|
||||
state: &ruleState{entries: make([]StateEntry, 10)},
|
||||
metrics: &recordingRuleMetrics{
|
||||
errors: utils.GetOrCreateCounter(`vmalert_recording_rules_errors_total{alertname="job:foo"}`),
|
||||
},
|
||||
}
|
||||
fq := &datasource.FakeQuerier{}
|
||||
expErr := "connection reset by peer"
|
||||
|
|
|
@ -43,26 +43,26 @@ type ruleState struct {
|
|||
// StateEntry stores rule's execution states
|
||||
type StateEntry struct {
|
||||
// stores last moment of time rule.Exec was called
|
||||
Time time.Time
|
||||
Time time.Time `json:"time"`
|
||||
// stores the timesteamp with which rule.Exec was called
|
||||
At time.Time
|
||||
At time.Time `json:"at"`
|
||||
// stores the duration of the last rule.Exec call
|
||||
Duration time.Duration
|
||||
Duration time.Duration `json:"duration"`
|
||||
// stores last error that happened in Exec func
|
||||
// resets on every successful Exec
|
||||
// may be used as Health ruleState
|
||||
Err error
|
||||
Err error `json:"error"`
|
||||
// stores the number of samples returned during
|
||||
// the last evaluation
|
||||
Samples int
|
||||
Samples int `json:"samples"`
|
||||
// stores the number of time series fetched during
|
||||
// the last evaluation.
|
||||
// Is supported by VictoriaMetrics only, starting from v1.90.0
|
||||
// If seriesFetched == nil, then this attribute was missing in
|
||||
// datasource response (unsupported).
|
||||
SeriesFetched *int
|
||||
SeriesFetched *int `json:"series_fetched"`
|
||||
// stores the curl command reflecting the HTTP request used during rule.Exec
|
||||
Curl string
|
||||
Curl string `json:"curl"`
|
||||
}
|
||||
|
||||
// GetLastEntry returns latest stateEntry of rule
|
||||
|
|
Before Width: | Height: | Size: 73 KiB |
Before Width: | Height: | Size: 151 KiB |
Before Width: | Height: | Size: 122 KiB |
Before Width: | Height: | Size: 80 KiB |
Before Width: | Height: | Size: 62 KiB |
Before Width: | Height: | Size: 109 KiB |
Before Width: | Height: | Size: 41 KiB |
Before Width: | Height: | Size: 41 KiB |
|
@ -132,6 +132,24 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
|||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
||||
rule, err := rh.getRule(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
rwu := apiRuleWithUpdates{
|
||||
apiRule: rule,
|
||||
StateUpdates: rule.Updates,
|
||||
}
|
||||
data, err := json.Marshal(rwu)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "failed to marshal rule: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/-/reload":
|
||||
logger.Infof("api config reload was called, sending sighup")
|
||||
procutil.SelfSIGHUP()
|
||||
|
|
|
@ -143,6 +143,28 @@ func TestHandler(t *testing.T) {
|
|||
t.Errorf("expected 1 group got %d", length)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
||||
expRule := ruleToAPI(ar)
|
||||
gotRule := apiRule{}
|
||||
getResp(ts.URL+"/"+expRule.APILink(), &gotRule, 200)
|
||||
|
||||
if expRule.ID != gotRule.ID {
|
||||
t.Errorf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||
}
|
||||
|
||||
gotRule = apiRule{}
|
||||
getResp(ts.URL+"/vmalert/"+expRule.APILink(), &gotRule, 200)
|
||||
|
||||
if expRule.ID != gotRule.ID {
|
||||
t.Errorf("expected to get Rule %q; got %q instead", expRule.ID, gotRule.ID)
|
||||
}
|
||||
|
||||
gotRuleWithUpdates := apiRuleWithUpdates{}
|
||||
getResp(ts.URL+"/"+expRule.APILink(), &gotRuleWithUpdates, 200)
|
||||
if gotRuleWithUpdates.StateUpdates == nil || len(gotRuleWithUpdates.StateUpdates) < 1 {
|
||||
t.Fatalf("expected %+v to have state updates field not empty", gotRuleWithUpdates.StateUpdates)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestEmptyResponse(t *testing.T) {
|
||||
|
|
|
@ -151,6 +151,10 @@ type apiRule struct {
|
|||
ID string `json:"id"`
|
||||
// GroupID is an unique Group's ID
|
||||
GroupID string `json:"group_id"`
|
||||
// GroupName is Group name rule belong to
|
||||
GroupName string `json:"group_name"`
|
||||
// File is file name where rule is defined
|
||||
File string `json:"file"`
|
||||
// Debug shows whether debug mode is enabled
|
||||
Debug bool `json:"debug"`
|
||||
|
||||
|
@ -160,6 +164,19 @@ type apiRule struct {
|
|||
Updates []rule.StateEntry `json:"-"`
|
||||
}
|
||||
|
||||
// apiRuleWithUpdates represents apiRule but with extra fields for marshalling
|
||||
type apiRuleWithUpdates struct {
|
||||
apiRule
|
||||
// Updates contains the ordered list of recorded ruleStateEntry objects
|
||||
StateUpdates []rule.StateEntry `json:"updates,omitempty"`
|
||||
}
|
||||
|
||||
// APILink returns a link to the rule's JSON representation.
|
||||
func (ar apiRule) APILink() string {
|
||||
return fmt.Sprintf("api/v1/rule?%s=%s&%s=%s",
|
||||
paramGroupID, ar.GroupID, paramRuleID, ar.ID)
|
||||
}
|
||||
|
||||
// WebLink returns a link to the alert which can be used in UI.
|
||||
func (ar apiRule) WebLink() string {
|
||||
return fmt.Sprintf("rule?%s=%s&%s=%s",
|
||||
|
@ -227,8 +244,10 @@ func alertingToAPI(ar *rule.AlertingRule) apiRule {
|
|||
Debug: ar.Debug,
|
||||
|
||||
// encode as strings to avoid rounding in JSON
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
ID: fmt.Sprintf("%d", ar.ID()),
|
||||
GroupID: fmt.Sprintf("%d", ar.GroupID),
|
||||
GroupName: ar.GroupName,
|
||||
File: ar.File,
|
||||
}
|
||||
if lastState.Err != nil {
|
||||
r.LastError = lastState.Err.Error()
|
||||
|
|
|
@ -1,576 +1,3 @@
|
|||
# vmauth
|
||||
See vmauth docs [here](https://docs.victoriametrics.com/vmauth.html).
|
||||
|
||||
`vmauth` is a simple auth proxy, router and [load balancer](#load-balancing) for [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It reads auth credentials from `Authorization` http header ([Basic Auth](https://en.wikipedia.org/wiki/Basic_access_authentication), `Bearer token` and [InfluxDB authorization](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1897) is supported),
|
||||
matches them against configs pointed by [-auth.config](#auth-config) command-line flag and proxies incoming HTTP requests to the configured per-user `url_prefix` on successful match.
|
||||
The `-auth.config` can point to either local file or to http url.
|
||||
|
||||
## Quick start
|
||||
|
||||
Just download `vmutils-*` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), unpack it
|
||||
and pass the following flag to `vmauth` binary in order to start authorizing and routing requests:
|
||||
|
||||
```console
|
||||
/path/to/vmauth -auth.config=/path/to/auth/config.yml
|
||||
```
|
||||
|
||||
After that `vmauth` starts accepting HTTP requests on port `8427` and routing them according to the provided [-auth.config](#auth-config).
|
||||
The port can be modified via `-httpListenAddr` command-line flag.
|
||||
|
||||
The auth config can be reloaded via the following ways:
|
||||
|
||||
- By passing `SIGHUP` signal to `vmauth`.
|
||||
- By querying `/-/reload` http endpoint. This endpoint can be protected with `-reloadAuthKey` command-line flag. See [security docs](#security) for more details.
|
||||
- By specifying `-configCheckInterval` command-line flag to the interval between config re-reads. For example, `-configCheckInterval=5s` will re-read the config
|
||||
and apply new changes every 5 seconds.
|
||||
|
||||
Docker images for `vmauth` are available [here](https://hub.docker.com/r/victoriametrics/vmauth/tags).
|
||||
See how `vmauth` used in [docker-compose env](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/README.md#victoriametrics-cluster).
|
||||
|
||||
Pass `-help` to `vmauth` in order to see all the supported command-line flags with their descriptions.
|
||||
|
||||
Feel free [contacting us](mailto:info@victoriametrics.com) if you need customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML,
|
||||
accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.com/vmgateway.html).
|
||||
|
||||
## Dropping request path prefix
|
||||
|
||||
By default `vmauth` doesn't drop the path prefix from the original request when proxying the request to the matching backend.
|
||||
Sometimes it is needed to drop path prefix before routing the request to the backend. This can be done by specifying the number of `/`-delimited
|
||||
prefix parts to drop from the request path via `drop_src_path_prefix_parts` option at `url_map` level or at `user` level.
|
||||
|
||||
For example, if you need to serve requests to [vmalert](https://docs.victoriametrics.com/vmalert.html) at `/vmalert/` path prefix,
|
||||
while serving requests to [vmagent](https://docs.victoriametrics.com/vmagent.html) at `/vmagent/` path prefix for a particular user,
|
||||
then the following [-auth.config](#auth-config) can be used:
|
||||
|
||||
```yml
|
||||
users:
|
||||
- username: foo
|
||||
url_map:
|
||||
|
||||
# proxy all the requests, which start with `/vmagent/`, to vmagent backend
|
||||
- src_paths:
|
||||
- "/vmagent/.+"
|
||||
|
||||
# drop /vmagent/ path prefix from the original request before proxying it to url_prefix.
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vmagent-backend:8429/"
|
||||
|
||||
# proxy all the requests, which start with `/vmalert`, to vmalert backend
|
||||
- src_paths:
|
||||
- "/vmalert/.+"
|
||||
|
||||
# drop /vmalert/ path prefix from the original request before proxying it to url_prefix.
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vmalert-backend:8880/"
|
||||
```
|
||||
|
||||
## Load balancing
|
||||
|
||||
Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls.
|
||||
In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner.
|
||||
|
||||
If the backend at the configured url isn't available, then `vmauth` tries sending the request to the remaining configured urls.
|
||||
|
||||
It is possible to configure automatic retry of requests if the backend responds with status code from optional `retry_status_codes` list.
|
||||
|
||||
Load balancing feature can be used in the following cases:
|
||||
|
||||
- Balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
|
||||
The following `-auth.config` file can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests
|
||||
or requests with 500 and 502 response status codes:
|
||||
|
||||
```yml
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
- http://vmselect1:8481/
|
||||
- http://vmselect2:8481/
|
||||
- http://vmselect3:8481/
|
||||
retry_status_codes: [500, 502]
|
||||
```
|
||||
|
||||
- Spreading select queries among multiple availability zones (AZs) with identical data. For example, the following config spreads select queries
|
||||
among 3 AZs. Requests are re-tried if some AZs are temporarily unavailable or if some `vmstorage` nodes in some AZs are temporarily unavailable.
|
||||
`vmauth` adds `deny_partial_response=1` query arg to all the queries in order to guarantee to get full response from every AZ.
|
||||
See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-availability) for details.
|
||||
|
||||
```yml
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
- https://vmselect-az1/?deny_partial_response=1
|
||||
- https://vmselect-az2/?deny_partial_response=1
|
||||
- https://vmselect-az3/?deny_partial_response=1
|
||||
retry_status_codes: [500, 502, 503]
|
||||
```
|
||||
|
||||
Load balancig can also be configured independently per each user and per each `url_map` entry.
|
||||
See [auth config docs](#auth-config) for more details.
|
||||
|
||||
## Concurrency limiting
|
||||
|
||||
`vmauth` limits the number of concurrent requests it can proxy according to the following command-line flags:
|
||||
|
||||
- `-maxConcurrentRequests` limits the global number of concurrent requests `vmauth` can serve across all the configured users.
|
||||
- `-maxConcurrentPerUserRequests` limits the number of concurrent requests `vmauth` can serve per each configured user.
|
||||
|
||||
It is also possible to set individual limits on the number of concurrent requests per each user
|
||||
with the `max_concurrent_requests` option - see [auth config example](#auth-config).
|
||||
|
||||
`vmauth` responds with `429 Too Many Requests` HTTP error when the number of concurrent requests exceeds the configured limits.
|
||||
|
||||
The following [metrics](#monitoring) related to concurrency limits are exposed by `vmauth`:
|
||||
|
||||
- `vmauth_concurrent_requests_capacity` - the global limit on the number of concurrent requests `vmauth` can serve.
|
||||
It is set via `-maxConcurrentRequests` command-line flag.
|
||||
- `vmauth_concurrent_requests_current` - the current number of concurrent requests `vmauth` processes.
|
||||
- `vmauth_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the global concurrency limit has been reached.
|
||||
- `vmauth_user_concurrent_requests_capacity{username="..."}` - the limit on the number of concurrent requests for the given `username`.
|
||||
- `vmauth_user_concurrent_requests_current{username="..."}` - the current number of concurrent requests for the given `username`.
|
||||
- `vmauth_user_concurrent_requests_limit_reached_total{username="foo"}` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for the given `username`.
|
||||
- `vmauth_unauthorized_user_concurrent_requests_capacity` - the limit on the number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
- `vmauth_unauthorized_user_concurrent_requests_current` - the current number of concurrent requests for unauthorized users (if `unauthorized_user` section is used).
|
||||
- `vmauth_unauthorized_user_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
|
||||
because of the concurrency limit has been reached for unauthorized users (if `unauthorized_user` section is used).
|
||||
|
||||
## Backend TLS setup
|
||||
|
||||
By default `vmauth` uses system settings when performing requests to HTTPS backends specified via `url_prefix` option
|
||||
in the [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config). These settings can be overridden with the following command-line flags:
|
||||
|
||||
- `-backend.tlsInsecureSkipVerify` allows skipping TLS verification when connecting to HTTPS backends.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
|
||||
via `tls_insecure_skip_verify` option. For example:
|
||||
|
||||
```yml
|
||||
- username: "foo"
|
||||
url_prefix: "https://localhost"
|
||||
tls_insecure_skip_verify: true
|
||||
```
|
||||
|
||||
- `-backend.tlsCAFile` allows specifying the path to TLS Root CA, which will be used for TLS verification when connecting to HTTPS backends.
|
||||
The `-backend.tlsCAFile` may point either to local file or to `http` / `https` url.
|
||||
This global setting can be overridden at per-user level inside [`-auth.config`](https://docs.victoriametrics.com/vmauth.html#auth-config)
|
||||
via `tls_ca_file` option. For example:
|
||||
|
||||
```yml
|
||||
- username: "foo"
|
||||
url_prefix: "https://localhost"
|
||||
tls_ca_file: "/path/to/tls/root/ca"
|
||||
```
|
||||
|
||||
## IP filters
|
||||
|
||||
[Enterprise version](https://docs.victoriametrics.com/enterprise.html) of `vmauth` can be configured to allow / deny incoming requests via global and per-user IP filters.
|
||||
|
||||
For example, the following config allows requests to `vmauth` from `10.0.0.0/24` network and from `1.2.3.4` IP address, while denying requests from `10.0.0.42` IP address:
|
||||
|
||||
```yml
|
||||
users:
|
||||
# User configs here
|
||||
|
||||
ip_filters:
|
||||
allow_list:
|
||||
- 10.0.0.0/24
|
||||
- 1.2.3.4
|
||||
deny_list: [10.0.0.42]
|
||||
```
|
||||
|
||||
The following config allows requests for the user 'foobar' only from the IP `127.0.0.1`:
|
||||
|
||||
```yml
|
||||
users:
|
||||
- username: "foobar"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428"
|
||||
ip_filters:
|
||||
allow_list: [127.0.0.1]
|
||||
```
|
||||
|
||||
See config example of using IP filters [here](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmauth/example_config_ent.yml).
|
||||
|
||||
## Auth config
|
||||
|
||||
`-auth.config` is represented in the following simple `yml` format:
|
||||
|
||||
```yml
|
||||
# Arbitrary number of usernames may be put here.
|
||||
# It is possible to set multiple identical usernames with different passwords.
|
||||
# Such usernames can be differentiated by `name` option.
|
||||
|
||||
users:
|
||||
# Requests with the 'Authorization: Bearer XXXX' and 'Authorization: Token XXXX'
|
||||
# header are proxied to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
# Requests with the Basic Auth username=XXXX are proxied to http://localhost:8428 as well.
|
||||
- bearer_token: "XXXX"
|
||||
url_prefix: "http://localhost:8428"
|
||||
|
||||
# Requests with the 'Authorization: Bearer YYY' header are proxied to http://localhost:8428 ,
|
||||
# The `X-Scope-OrgID: foobar` http header is added to every proxied request.
|
||||
# The `X-Server-Hostname` http header is removed from the proxied response.
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
- bearer_token: "YYY"
|
||||
url_prefix: "http://localhost:8428"
|
||||
# extra headers to add to the request or remove from the request (if header value is empty)
|
||||
headers:
|
||||
- "X-Scope-OrgID: foobar"
|
||||
# extra headers to add to the response or remove from the response (if header value is empty)
|
||||
response_headers:
|
||||
- "X-Server-Hostname:" # empty value means the header will be removed from the response
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to http://localhost:8428 .
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query
|
||||
#
|
||||
# The given user can send maximum 10 concurrent requests according to the provided max_concurrent_requests.
|
||||
# Excess concurrent requests are rejected with 429 HTTP status code.
|
||||
# See also -maxConcurrentPerUserRequests and -maxConcurrentRequests command-line flags.
|
||||
- username: "local-single-node"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428"
|
||||
max_concurrent_requests: 10
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to http://localhost:8428 with extra_label=team=dev query arg.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query?extra_label=team=dev
|
||||
- username: "local-single-node2"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428?extra_label=team=dev"
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to https://localhost:8428.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to https://localhost/api/v1/query
|
||||
# TLS verification is skipped for https://localhost.
|
||||
- username: "local-single-node-with-tls"
|
||||
password: "***"
|
||||
url_prefix: "https://localhost"
|
||||
tls_insecure_skip_verify: true
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are load-balanced among http://vmselect1:8481/select/123/prometheus and http://vmselect2:8481/select/123/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to the following urls in a round-robin manner:
|
||||
# - http://vmselect1:8481/select/123/prometheus/api/v1/select
|
||||
# - http://vmselect2:8481/select/123/prometheus/api/v1/select
|
||||
- username: "cluster-select-account-123"
|
||||
password: "***"
|
||||
url_prefix:
|
||||
- "http://vmselect1:8481/select/123/prometheus"
|
||||
- "http://vmselect2:8481/select/123/prometheus"
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are load-balanced between http://vminsert1:8480/insert/42/prometheus and http://vminsert2:8480/insert/42/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/write is proxied to the following urls in a round-robin manner:
|
||||
# - http://vminsert1:8480/insert/42/prometheus/api/v1/write
|
||||
# - http://vminsert2:8480/insert/42/prometheus/api/v1/write
|
||||
- username: "cluster-insert-account-42"
|
||||
password: "***"
|
||||
url_prefix:
|
||||
- "http://vminsert1:8480/insert/42/prometheus"
|
||||
- "http://vminsert2:8480/insert/42/prometheus"
|
||||
|
||||
# A single user for querying and inserting data:
|
||||
#
|
||||
# - Requests to http://vmauth:8427/api/v1/query, http://vmauth:8427/api/v1/query_range
|
||||
# and http://vmauth:8427/api/v1/label/<label_name>/values are proxied to the following urls in a round-robin manner:
|
||||
# - http://vmselect1:8481/select/42/prometheus
|
||||
# - http://vmselect2:8481/select/42/prometheus
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect1:8480/select/42/prometheus/api/v1/query
|
||||
# or to http://vmselect2:8480/select/42/prometheus/api/v1/query .
|
||||
# Requests are re-tried at other url_prefix backends if response status codes match 500 or 502.
|
||||
#
|
||||
# - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write .
|
||||
# The "X-Scope-OrgID: abc" http header is added to these requests.
|
||||
# The "X-Server-Hostname" http header is removed from the proxied response.
|
||||
#
|
||||
# Request which do not match `src_paths` from the `url_map` are proxied to the urls from `default_url`
|
||||
# in a round-robin manner. The original request path is passed in `request_path` query arg.
|
||||
# For example, request to http://vmauth:8427/non/existing/path are proxied:
|
||||
# - to http://default1:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
# - or http://default2:8888/unsupported_url_handler?request_path=/non/existing/path
|
||||
#
|
||||
# Regular expressions are allowed in `src_paths` entries.
|
||||
- username: "foobar"
|
||||
url_map:
|
||||
- src_paths:
|
||||
- "/api/v1/query"
|
||||
- "/api/v1/query_range"
|
||||
- "/api/v1/label/[^/]+/values"
|
||||
url_prefix:
|
||||
- "http://vmselect1:8481/select/42/prometheus"
|
||||
- "http://vmselect2:8481/select/42/prometheus"
|
||||
retry_status_codes: [500, 502]
|
||||
- src_paths: ["/api/v1/write"]
|
||||
url_prefix: "http://vminsert:8480/insert/42/prometheus"
|
||||
headers:
|
||||
- "X-Scope-OrgID: abc"
|
||||
response_headers:
|
||||
- "X-Server-Hostname:" # empty value means the header will be removed from the response
|
||||
ip_filters:
|
||||
deny_list: [127.0.0.1]
|
||||
default_url:
|
||||
- "http://default1:8888/unsupported_url_handler"
|
||||
- "http://default2:8888/unsupported_url_handler"
|
||||
|
||||
# Requests without Authorization header are routed according to `unauthorized_user` section.
|
||||
# Requests are routed in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the routed requests.
|
||||
# The requests are re-tried if url_prefix backends send 500 or 503 response status codes.
|
||||
# Note that the unauthorized_user section takes precedence when processing a route without credentials,
|
||||
# even if such a route also exists in the users section (see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5236).
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
- http://vmselect-az1/?deny_partial_response=1
|
||||
- http://vmselect-az2/?deny_partial_response=1
|
||||
retry_status_codes: [503, 500]
|
||||
|
||||
ip_filters:
|
||||
allow_list: ["1.2.3.0/24", "127.0.0.1"]
|
||||
deny_list:
|
||||
- 10.1.0.1
|
||||
```
|
||||
|
||||
The config may contain `%{ENV_VAR}` placeholders, which are substituted by the corresponding `ENV_VAR` environment variable values.
|
||||
This may be useful for passing secrets to the config.
|
||||
|
||||
Please note, vmauth doesn't follow redirects. If destination redirects request to a new location, make sure this
|
||||
location is supported in vmauth `url_map` config.
|
||||
|
||||
## Security
|
||||
|
||||
It is expected that all the backend services protected by `vmauth` are located in an isolated private network, so they can be accessed by external users only via `vmauth`.
|
||||
|
||||
Do not transfer Basic Auth headers in plaintext over untrusted networks. Enable https. This can be done by passing the following `-tls*` command-line flags to `vmauth`:
|
||||
|
||||
```console
|
||||
-tls
|
||||
Whether to enable TLS (aka HTTPS) for incoming requests. -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate. Used only if -tls is set. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key. Used only if -tls is set
|
||||
```
|
||||
|
||||
Alternatively, [https termination proxy](https://en.wikipedia.org/wiki/TLS_termination_proxy) may be put in front of `vmauth`.
|
||||
|
||||
It is recommended protecting the following endpoints with authKeys:
|
||||
* `/-/reload` with `-reloadAuthKey` command-line flag, so external users couldn't trigger config reload.
|
||||
* `/flags` with `-flagsAuthKey` command-line flag, so unauthorized users couldn't get application command-line flags.
|
||||
* `/metrics` with `-metricsAuthKey` command-line flag, so unauthorized users couldn't get access to [vmauth metrics](#monitoring).
|
||||
* `/debug/pprof` with `-pprofAuthKey` command-line flag, so unauthorized users couldn't get access to [profiling information](#profiling).
|
||||
|
||||
`vmauth` also supports the ability to restrict access by IP - see [these docs](#ip-filters). See also [concurrency limiting docs](#concurrency-limiting).
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vmauth` exports various metrics in Prometheus exposition format at `http://vmauth-host:8427/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://docs.victoriametrics.com/vmagent.html) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
`vmauth` exports `vmauth_user_requests_total` [counter](https://docs.victoriametrics.com/keyConcepts.html#counter) metric
|
||||
and `vmauth_user_request_duration_seconds_*` [summary](https://docs.victoriametrics.com/keyConcepts.html#summary) metric
|
||||
with `username` label. The `username` label value equals to `username` field value set in the `-auth.config` file.
|
||||
It is possible to override or hide the value in the label by specifying `name` field.
|
||||
For example, the following config will result in `vmauth_user_requests_total{username="foobar"}`
|
||||
instead of `vmauth_user_requests_total{username="secret_user"}`:
|
||||
|
||||
```yml
|
||||
users:
|
||||
- username: "secret_user"
|
||||
name: "foobar"
|
||||
# other config options here
|
||||
```
|
||||
|
||||
For unauthorized users `vmauth` exports `vmauth_unauthorized_user_requests_total`
|
||||
[counter](https://docs.victoriametrics.com/keyConcepts.html#counter) metric and
|
||||
`vmauth_unauthorized_user_request_duration_seconds_*` [summary](https://docs.victoriametrics.com/keyConcepts.html#summary)
|
||||
metric without label (if `unauthorized_user` section of config is used).
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - `vmauth` is located in `vmutils-*` archives there.
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
|
||||
1. Run `make vmauth` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmauth` binary and puts it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
1. Run `make vmauth-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmauth-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmauth`. It builds `victoriametrics/vmauth:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmauth`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```console
|
||||
ROOT_IMAGE=scratch make package-vmauth
|
||||
```
|
||||
|
||||
## Profiling
|
||||
|
||||
`vmauth` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
* Memory profile. It can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```console
|
||||
curl http://0.0.0.0:8427/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
* CPU profile. It can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
<div class="with-copy" markdown="1">
|
||||
|
||||
```console
|
||||
curl http://0.0.0.0:8427/debug/pprof/profile > cpu.pprof
|
||||
```
|
||||
|
||||
</div>
|
||||
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
It is safe sharing the collected profiles from security point of view, since they do not contain sensitive information.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
Pass `-help` command-line arg to `vmauth` in order to see all the configuration options:
|
||||
|
||||
```console
|
||||
./vmauth -help
|
||||
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/vmauth.html .
|
||||
|
||||
-auth.config string
|
||||
Path to auth config. It can point either to local file or to http url. See https://docs.victoriametrics.com/vmauth.html for details on the format of this auth config
|
||||
-configCheckInterval duration
|
||||
interval for config file re-read. Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
-failTimeout duration
|
||||
Sets a delay period for load balancing to skip a malfunctioning backend (default 3s)
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8427")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-license string
|
||||
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-logInvalidAuthTokens
|
||||
Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxConcurrentPerUserRequests int
|
||||
The maximum number of concurrent requests vmauth can process per each configured user. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option in per-user config (default 300)
|
||||
-maxConcurrentRequests int
|
||||
The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options (default 1000)
|
||||
-maxIdleConnsPerBackend int
|
||||
The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100)
|
||||
-maxRequestBodySizeToRetry size
|
||||
The maximum request body size, which can be cached and re-tried at other backends. Bigger values may require more memory
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-reloadAuthKey string
|
||||
Auth key for /-/reload http endpoint. It must be passed as authKey=...
|
||||
-responseTimeout duration
|
||||
The timeout for receiving a response from backend (default 5m0s)
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
vmauth docs can be edited at [docs/vmauth.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmauth.md).
|
||||
|
|
|
@ -20,6 +20,7 @@ import (
|
|||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
|
@ -30,6 +31,10 @@ var (
|
|||
"See https://docs.victoriametrics.com/vmauth.html for details on the format of this auth config")
|
||||
configCheckInterval = flag.Duration("configCheckInterval", 0, "interval for config file re-read. "+
|
||||
"Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.")
|
||||
defaultRetryStatusCodes = flagutil.NewArrayInt("retryStatusCodes", 0, "Comma-separated list of default HTTP response status codes when vmauth re-tries the request on other backends. "+
|
||||
"See https://docs.victoriametrics.com/vmauth.html#load-balancing for details")
|
||||
defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
|
||||
"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/vmauth.html#load-balancing for more details")
|
||||
)
|
||||
|
||||
// AuthConfig represents auth config.
|
||||
|
@ -50,6 +55,7 @@ type UserInfo struct {
|
|||
MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"`
|
||||
DefaultURL *URLPrefix `yaml:"default_url,omitempty"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||
DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
TLSInsecureSkipVerify *bool `yaml:"tls_insecure_skip_verify,omitempty"`
|
||||
TLSCAFile string `yaml:"tls_ca_file,omitempty"`
|
||||
|
@ -124,6 +130,7 @@ type URLMap struct {
|
|||
URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"`
|
||||
HeadersConf HeadersConf `yaml:",inline"`
|
||||
RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"`
|
||||
LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"`
|
||||
DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"`
|
||||
}
|
||||
|
||||
|
@ -135,8 +142,28 @@ type SrcPath struct {
|
|||
|
||||
// URLPrefix represents passed `url_prefix`
|
||||
type URLPrefix struct {
|
||||
n uint32
|
||||
n uint32
|
||||
|
||||
// the list of backend urls
|
||||
bus []*backendURL
|
||||
|
||||
// requests are re-tried on other backend urls for these http response status codes
|
||||
retryStatusCodes []int
|
||||
|
||||
// load balancing policy used
|
||||
loadBalancingPolicy string
|
||||
}
|
||||
|
||||
func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
|
||||
switch loadBalancingPolicy {
|
||||
case "", // empty string is equivalent to least_loaded
|
||||
"least_loaded",
|
||||
"first_available":
|
||||
up.loadBalancingPolicy = loadBalancingPolicy
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unexpected load_balancing_policy: %q; want least_loaded or first_available", loadBalancingPolicy)
|
||||
}
|
||||
}
|
||||
|
||||
type backendURL struct {
|
||||
|
@ -155,6 +182,10 @@ func (bu *backendURL) setBroken() {
|
|||
atomic.StoreUint64(&bu.brokenDeadline, deadline)
|
||||
}
|
||||
|
||||
func (bu *backendURL) get() {
|
||||
atomic.AddInt32(&bu.concurrentRequests, 1)
|
||||
}
|
||||
|
||||
func (bu *backendURL) put() {
|
||||
atomic.AddInt32(&bu.concurrentRequests, -1)
|
||||
}
|
||||
|
@ -163,6 +194,40 @@ func (up *URLPrefix) getBackendsCount() int {
|
|||
return len(up.bus)
|
||||
}
|
||||
|
||||
// getBackendURL returns the backendURL depending on the load balance policy.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func (up *URLPrefix) getBackendURL() *backendURL {
|
||||
if up.loadBalancingPolicy == "first_available" {
|
||||
return up.getFirstAvailableBackendURL()
|
||||
}
|
||||
return up.getLeastLoadedBackendURL()
|
||||
}
|
||||
|
||||
// getFirstAvailableBackendURL returns the first available backendURL, which isn't broken.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
func (up *URLPrefix) getFirstAvailableBackendURL() *backendURL {
|
||||
bus := up.bus
|
||||
|
||||
bu := bus[0]
|
||||
if !bu.isBroken() {
|
||||
// Fast path - send the request to the first url.
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
// Slow path - the first url is temporarily unavailabel. Fall back to the remaining urls.
|
||||
for i := 1; i < len(bus); i++ {
|
||||
if !bus[i].isBroken() {
|
||||
bu = bus[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
|
||||
//
|
||||
// backendURL.put() must be called on the returned backendURL after the request is complete.
|
||||
|
@ -171,7 +236,7 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
|
|||
if len(bus) == 1 {
|
||||
// Fast path - return the only backend url.
|
||||
bu := bus[0]
|
||||
atomic.AddInt32(&bu.concurrentRequests, 1)
|
||||
bu.get()
|
||||
return bu
|
||||
}
|
||||
|
||||
|
@ -202,7 +267,7 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
|
|||
minRequests = n
|
||||
}
|
||||
}
|
||||
atomic.AddInt32(&buMin.concurrentRequests, 1)
|
||||
buMin.get()
|
||||
return buMin
|
||||
}
|
||||
|
||||
|
@ -212,6 +277,7 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error {
|
|||
if err := f(&v); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var urls []string
|
||||
switch x := v.(type) {
|
||||
case string:
|
||||
|
@ -232,6 +298,7 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error {
|
|||
default:
|
||||
return fmt.Errorf("unexpected type for `url_prefix`: %T; want string or []string", v)
|
||||
}
|
||||
|
||||
bus := make([]*backendURL, len(urls))
|
||||
for i, u := range urls {
|
||||
pu, err := url.Parse(u)
|
||||
|
@ -440,6 +507,9 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
|
|||
if ui.Name != "" {
|
||||
return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
|
||||
}
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total`)
|
||||
ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds`)
|
||||
ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
|
||||
|
@ -480,30 +550,11 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
|||
if byAuthToken[at2] != nil {
|
||||
return nil, fmt.Errorf("duplicate auth token found for bearer_token=%q, username=%q: %q", ui.BearerToken, ui.Username, at2)
|
||||
}
|
||||
if ui.URLPrefix != nil {
|
||||
if err := ui.URLPrefix.sanitize(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
if err := ui.DefaultURL.sanitize(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
for _, e := range ui.URLMaps {
|
||||
if len(e.SrcPaths) == 0 {
|
||||
return nil, fmt.Errorf("missing `src_paths` in `url_map`")
|
||||
}
|
||||
if e.URLPrefix == nil {
|
||||
return nil, fmt.Errorf("missing `url_prefix` in `url_map`")
|
||||
}
|
||||
if err := e.URLPrefix.sanitize(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if len(ui.URLMaps) == 0 && ui.URLPrefix == nil {
|
||||
return nil, fmt.Errorf("missing `url_prefix`")
|
||||
|
||||
if err := ui.initURLs(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
name := ui.name()
|
||||
if ui.BearerToken != "" {
|
||||
if ui.Password != "" {
|
||||
|
@ -525,6 +576,7 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
|||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_current{username=%q}`, name), func() float64 {
|
||||
return float64(len(ui.concurrencyLimitCh))
|
||||
})
|
||||
|
||||
tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
|
||||
|
@ -537,6 +589,58 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
|
|||
return byAuthToken, nil
|
||||
}
|
||||
|
||||
func (ui *UserInfo) initURLs() error {
|
||||
retryStatusCodes := defaultRetryStatusCodes.Values()
|
||||
loadBalancingPolicy := *defaultLoadBalancingPolicy
|
||||
if ui.URLPrefix != nil {
|
||||
if err := ui.URLPrefix.sanitize(); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(ui.RetryStatusCodes) > 0 {
|
||||
retryStatusCodes = ui.RetryStatusCodes
|
||||
}
|
||||
if ui.LoadBalancingPolicy != "" {
|
||||
loadBalancingPolicy = ui.LoadBalancingPolicy
|
||||
}
|
||||
ui.URLPrefix.retryStatusCodes = retryStatusCodes
|
||||
if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if ui.DefaultURL != nil {
|
||||
if err := ui.DefaultURL.sanitize(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, e := range ui.URLMaps {
|
||||
if len(e.SrcPaths) == 0 {
|
||||
return fmt.Errorf("missing `src_paths` in `url_map`")
|
||||
}
|
||||
if e.URLPrefix == nil {
|
||||
return fmt.Errorf("missing `url_prefix` in `url_map`")
|
||||
}
|
||||
if err := e.URLPrefix.sanitize(); err != nil {
|
||||
return err
|
||||
}
|
||||
rscs := retryStatusCodes
|
||||
lbp := loadBalancingPolicy
|
||||
if len(e.RetryStatusCodes) > 0 {
|
||||
rscs = e.RetryStatusCodes
|
||||
}
|
||||
if e.LoadBalancingPolicy != "" {
|
||||
lbp = e.LoadBalancingPolicy
|
||||
}
|
||||
e.URLPrefix.retryStatusCodes = rscs
|
||||
if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if len(ui.URLMaps) == 0 && ui.URLPrefix == nil {
|
||||
return fmt.Errorf("missing `url_prefix`")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ui *UserInfo) name() string {
|
||||
if ui.Name != "" {
|
||||
return ui.Name
|
||||
|
|
|
@ -250,6 +250,7 @@ users:
|
|||
- http://node2:343/bbb
|
||||
tls_insecure_skip_verify: false
|
||||
retry_status_codes: [500, 501]
|
||||
load_balancing_policy: first_available
|
||||
drop_src_path_prefix_parts: 1
|
||||
`, map[string]*UserInfo{
|
||||
getAuthToken("", "foo", "bar"): {
|
||||
|
@ -261,6 +262,7 @@ users:
|
|||
}),
|
||||
TLSInsecureSkipVerify: &insecureSkipVerifyFalse,
|
||||
RetryStatusCodes: []int{500, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: 1,
|
||||
},
|
||||
})
|
||||
|
|
|
@ -37,14 +37,14 @@ users:
|
|||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to http://localhost:8428 with extra_label=team=dev query arg.
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to http://localhost:8428/api/v1/query?extra_label=team=dev
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to http://localhost:8428/api/v1/query?extra_label=team=dev
|
||||
- username: "local-single-node2"
|
||||
password: "***"
|
||||
url_prefix: "http://localhost:8428?extra_label=team=dev"
|
||||
|
||||
# All the requests to http://vmauth:8427 with the given Basic Auth (username:password)
|
||||
# are proxied to https://localhost:8428
|
||||
# For example, http://vmauth:8427/api/v1/query is routed to https://localhost/api/v1/query
|
||||
# For example, http://vmauth:8427/api/v1/query is proxied to https://localhost/api/v1/query
|
||||
# TLS verification is ignored for https://localhost.
|
||||
- username: "local-single-node-with-tls"
|
||||
password: "***"
|
||||
|
@ -111,9 +111,9 @@ users:
|
|||
- "http://default1:8888/unsupported_url_handler"
|
||||
- "http://default2:8888/unsupported_url_handler"
|
||||
|
||||
# Requests without Authorization header are routed according to `unauthorized_user` section.
|
||||
# Requests are routed in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the routed requests.
|
||||
# Requests without Authorization header are proxied according to `unauthorized_user` section.
|
||||
# Requests are proxied in round-robin fashion between `url_prefix` backends.
|
||||
# The deny_partial_response query arg is added to all the proxied requests.
|
||||
# The requests are re-tried if url_prefix backends send 500 or 503 response status codes.
|
||||
unauthorized_user:
|
||||
url_prefix:
|
||||
|
|
|
@ -33,7 +33,7 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol")
|
||||
httpListenAddr = flag.String("httpListenAddr", ":8427", "TCP address to listen for http connections. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flag.Bool("httpListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted at -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
|
@ -164,7 +164,7 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
|||
|
||||
func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
u := normalizeURL(r.URL)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
isDefault := false
|
||||
if up == nil {
|
||||
if ui.DefaultURL == nil {
|
||||
|
@ -180,7 +180,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
|||
httpserver.Errorf(w, r, "missing route for %q", u.String())
|
||||
return
|
||||
}
|
||||
up, hc, retryStatusCodes = ui.DefaultURL, ui.HeadersConf, ui.RetryStatusCodes
|
||||
up, hc = ui.DefaultURL, ui.HeadersConf
|
||||
isDefault = true
|
||||
}
|
||||
maxAttempts := up.getBackendsCount()
|
||||
|
@ -190,17 +190,17 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
|||
}
|
||||
}
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
bu := up.getLeastLoadedBackendURL()
|
||||
bu := up.getBackendURL()
|
||||
targetURL := bu.url
|
||||
// Don't change path and add request_path query param for default route.
|
||||
if isDefault {
|
||||
query := targetURL.Query()
|
||||
query.Set("request_path", u.Path)
|
||||
query.Set("request_path", u.String())
|
||||
targetURL.RawQuery = query.Encode()
|
||||
} else { // Update path for regular routes.
|
||||
targetURL = mergeURLs(targetURL, u, dropSrcPathPrefixParts)
|
||||
}
|
||||
ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes, ui.httpTransport)
|
||||
ok := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui.httpTransport)
|
||||
bu.put()
|
||||
if ok {
|
||||
return
|
||||
|
|
|
@ -49,18 +49,18 @@ func dropPrefixParts(path string, parts int) string {
|
|||
return path
|
||||
}
|
||||
|
||||
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int, int) {
|
||||
func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, int) {
|
||||
for _, e := range ui.URLMaps {
|
||||
for _, sp := range e.SrcPaths {
|
||||
if sp.match(u.Path) {
|
||||
return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes, e.DropSrcPathPrefixParts
|
||||
return e.URLPrefix, e.HeadersConf, e.DropSrcPathPrefixParts
|
||||
}
|
||||
}
|
||||
}
|
||||
if ui.URLPrefix != nil {
|
||||
return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes, ui.DropSrcPathPrefixParts
|
||||
return ui.URLPrefix, ui.HeadersConf, ui.DropSrcPathPrefixParts
|
||||
}
|
||||
return nil, HeadersConf{}, nil, 0
|
||||
return nil, HeadersConf{}, 0
|
||||
}
|
||||
|
||||
func normalizeURL(uOrig *url.URL) *url.URL {
|
||||
|
|
|
@ -79,14 +79,17 @@ func TestDropPrefixParts(t *testing.T) {
|
|||
|
||||
func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string,
|
||||
expectedRetryStatusCodes []int, expectedDropSrcPathPrefixParts int) {
|
||||
expectedRetryStatusCodes []int, expectedLoadBalancingPolicy string, expectedDropSrcPathPrefixParts int) {
|
||||
t.Helper()
|
||||
if err := ui.initURLs(); err != nil {
|
||||
t.Fatalf("cannot initialize urls inside UserInfo: %s", err)
|
||||
}
|
||||
u, err := url.Parse(requestURI)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
u = normalizeURL(u)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
if up == nil {
|
||||
t.Fatalf("cannot determie backend: %s", err)
|
||||
}
|
||||
|
@ -100,8 +103,11 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
|||
if headersStr != expectedRequestHeaders {
|
||||
t.Fatalf("unexpected request headers; got %s; want %s", headersStr, expectedRequestHeaders)
|
||||
}
|
||||
if !reflect.DeepEqual(retryStatusCodes, expectedRetryStatusCodes) {
|
||||
t.Fatalf("unexpected retryStatusCodes; got %d; want %d", retryStatusCodes, expectedRetryStatusCodes)
|
||||
if !reflect.DeepEqual(up.retryStatusCodes, expectedRetryStatusCodes) {
|
||||
t.Fatalf("unexpected retryStatusCodes; got %d; want %d", up.retryStatusCodes, expectedRetryStatusCodes)
|
||||
}
|
||||
if up.loadBalancingPolicy != expectedLoadBalancingPolicy {
|
||||
t.Fatalf("unexpected loadBalancingPolicy; got %q; want %q", up.loadBalancingPolicy, expectedLoadBalancingPolicy)
|
||||
}
|
||||
if dropSrcPathPrefixParts != expectedDropSrcPathPrefixParts {
|
||||
t.Fatalf("unexpected dropSrcPathPrefixParts; got %d; want %d", dropSrcPathPrefixParts, expectedDropSrcPathPrefixParts)
|
||||
|
@ -110,7 +116,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
|||
// Simple routing with `url_prefix`
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "", "http://foo.bar/.", "[]", "[]", nil, 0)
|
||||
}, "", "http://foo.bar/.", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
HeadersConf: HeadersConf{
|
||||
|
@ -120,23 +126,24 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
|||
}},
|
||||
},
|
||||
RetryStatusCodes: []int{503, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: 2,
|
||||
}, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, 2)
|
||||
}, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, "first_available", 2)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/federate"),
|
||||
}, "/", "http://foo.bar/federate", "[]", "[]", nil, 0)
|
||||
}, "/", "http://foo.bar/federate", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, 0)
|
||||
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, 0)
|
||||
}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, 0)
|
||||
}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, 0)
|
||||
}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, "least_loaded", 0)
|
||||
|
||||
// Complex routing with `url_map`
|
||||
ui := &UserInfo{
|
||||
|
@ -163,6 +170,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
|||
},
|
||||
},
|
||||
RetryStatusCodes: []int{503, 500, 501},
|
||||
LoadBalancingPolicy: "first_available",
|
||||
DropSrcPathPrefixParts: 1,
|
||||
},
|
||||
{
|
||||
|
@ -184,9 +192,9 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
|||
RetryStatusCodes: []int{502},
|
||||
DropSrcPathPrefixParts: 2,
|
||||
}
|
||||
f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, 1)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
|
||||
f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, 2)
|
||||
f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, "first_available", 1)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", []int{502}, "least_loaded", 0)
|
||||
f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, "least_loaded", 2)
|
||||
|
||||
// Complex routing regexp paths in `url_map`
|
||||
ui = &UserInfo{
|
||||
|
@ -202,17 +210,17 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
|||
},
|
||||
URLPrefix: mustParseURL("http://default-server"),
|
||||
}
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, 0)
|
||||
f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=dev"),
|
||||
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, 0)
|
||||
}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar?extra_label=team=mobile"),
|
||||
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, 0)
|
||||
}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, "least_loaded", 0)
|
||||
}
|
||||
|
||||
func TestCreateTargetURLFailure(t *testing.T) {
|
||||
|
@ -223,7 +231,7 @@ func TestCreateTargetURLFailure(t *testing.T) {
|
|||
t.Fatalf("cannot parse %q: %s", requestURI, err)
|
||||
}
|
||||
u = normalizeURL(u)
|
||||
up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
|
||||
if up != nil {
|
||||
t.Fatalf("unexpected non-empty up=%#v", up)
|
||||
}
|
||||
|
@ -233,9 +241,6 @@ func TestCreateTargetURLFailure(t *testing.T) {
|
|||
if hc.ResponseHeaders != nil {
|
||||
t.Fatalf("unexpected non-empty response headers=%q", hc.ResponseHeaders)
|
||||
}
|
||||
if retryStatusCodes != nil {
|
||||
t.Fatalf("unexpected non-empty retryStatusCodes=%d", retryStatusCodes)
|
||||
}
|
||||
if dropSrcPathPrefixParts != 0 {
|
||||
t.Fatalf("unexpected non-zero dropSrcPathPrefixParts=%d", dropSrcPathPrefixParts)
|
||||
}
|
||||
|
|
|
@ -1,444 +1,3 @@
|
|||
# vmbackup
|
||||
See vmbackup docs [here](https://docs.victoriametrics.com/vmbackup.html).
|
||||
|
||||
`vmbackup` creates VictoriaMetrics data backups from [instant snapshots](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
|
||||
`vmbackup` supports incremental and full backups. Incremental backups are created automatically if the destination path already contains data from the previous backup.
|
||||
Full backups can be sped up with `-origin` pointing to an already existing backup on the same remote storage. In this case `vmbackup` makes server-side copy for the shared
|
||||
data between the existing backup and new backup. It saves time and costs on data transfer.
|
||||
|
||||
Backup process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmbackup` with the same args.
|
||||
|
||||
Backed up data can be restored with [vmrestore](https://docs.victoriametrics.com/vmrestore.html).
|
||||
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
|
||||
See also [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html) tool built on top of `vmbackup`. This tool simplifies
|
||||
creation of hourly, daily, weekly and monthly backups.
|
||||
|
||||
## Supported storage types
|
||||
|
||||
`vmbackup` supports the following `-dst` storage types:
|
||||
|
||||
* [GCS](https://cloud.google.com/storage/). Example: `gs://<bucket>/<path/to/backup>`
|
||||
* [S3](https://aws.amazon.com/s3/). Example: `s3://<bucket>/<path/to/backup>`
|
||||
* [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs/). Example: `azblob://<container>/<path/to/backup>`
|
||||
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/) or [Swift](https://platform.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
|
||||
* Local filesystem. Example: `fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
|
||||
|
||||
## Use cases
|
||||
|
||||
### Regular backups
|
||||
|
||||
Regular backup can be performed with the following command:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup>
|
||||
```
|
||||
|
||||
* `</path/to/victoria-metrics-data>` - path to VictoriaMetrics data pointed by `-storageDataPath` command-line flag in single-node VictoriaMetrics or in cluster `vmstorage`.
|
||||
There is no need to stop VictoriaMetrics for creating backups since they are performed from immutable [instant snapshots](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
* `http://victoriametrics:8428/snapshot/create` is the url for creating snapshots according to [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots). `vmbackup` creates a snapshot by querying the provided `-snapshot.createURL`, then performs the backup and then automatically removes the created snapshot.
|
||||
* `<bucket>` is an already existing name for [GCS bucket](https://cloud.google.com/storage/docs/creating-buckets).
|
||||
* `<path/to/new/backup>` is the destination path where new backup will be placed.
|
||||
|
||||
### Regular backups with server-side copy from existing backup
|
||||
|
||||
If the destination GCS bucket already contains the previous backup at `-origin` path, then new backup can be sped up
|
||||
with the following command:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/new/backup> -origin=gs://<bucket>/<path/to/existing/backup>
|
||||
```
|
||||
|
||||
It saves time and network bandwidth costs by performing server-side copy for the shared data from the `-origin` to `-dst`.
|
||||
|
||||
### Incremental backups
|
||||
|
||||
Incremental backups are performed if `-dst` points to an already existing backup. In this case only new data is uploaded to remote storage.
|
||||
It saves time and network bandwidth costs when working with big backups:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<path/to/existing/backup>
|
||||
```
|
||||
|
||||
### Smart backups
|
||||
|
||||
Smart backups mean storing full daily backups into `YYYYMMDD` folders and creating incremental hourly backup into `latest` folder:
|
||||
|
||||
* Run the following command every hour:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/latest
|
||||
```
|
||||
|
||||
Where `<latest-snapshot>` is the latest [snapshot](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots).
|
||||
The command will upload only changed data to `gs://<bucket>/latest`.
|
||||
|
||||
* Run the following command once a day:
|
||||
|
||||
```console
|
||||
./vmbackup -storageDataPath=</path/to/victoria-metrics-data> -snapshot.createURL=http://localhost:8428/snapshot/create -dst=gs://<bucket>/<YYYYMMDD> -origin=gs://<bucket>/latest
|
||||
```
|
||||
|
||||
Where `<daily-snapshot>` is the snapshot for the last day `<YYYYMMDD>`.
|
||||
|
||||
This approach saves network bandwidth costs on hourly backups (since they are incremental) and allows recovering data from either the last hour (`latest` backup)
|
||||
or from any day (`YYYYMMDD` backups). Note that hourly backup shouldn't run when creating daily backup.
|
||||
|
||||
Do not forget to remove old backups when they are no longer needed in order to save storage costs.
|
||||
|
||||
See also [vmbackupmanager tool](https://docs.victoriametrics.com/vmbackupmanager.html) for automating smart backups.
|
||||
|
||||
### Server-side copy of the existing backup
|
||||
|
||||
Sometimes it is needed to make server-side copy of the existing backup. This can be done by specifying the source backup path via `-origin` command-line flag,
|
||||
while the destination path for backup copy must be specified via `-dst` command-line flag. For example, the following command copies backup
|
||||
from `gs://bucket/foo` to `gs://bucket/bar`:
|
||||
|
||||
```console
|
||||
./vmbackup -origin=gs://bucket/foo -dst=gs://bucket/bar
|
||||
```
|
||||
|
||||
The `-origin` and `-dst` must point to the same object storage bucket or to the same filesystem.
|
||||
|
||||
The server-side backup copy is usually performed at much faster speed comparing to the usual backup, since backup data isn't transferred
|
||||
between the remote storage and locally running `vmbackup` tool.
|
||||
|
||||
If the `-dst` already contains some data, then its' contents is synced with the `-origin` data. This allows making incremental server-side copies of backups.
|
||||
|
||||
## How does it work?
|
||||
|
||||
The backup algorithm is the following:
|
||||
|
||||
1. Create a snapshot by querying the provided `-snapshot.createURL`
|
||||
1. Collect information about files in the created snapshot, in the `-dst` and in the `-origin`.
|
||||
1. Determine which files in `-dst` are missing in the created snapshot, and delete them. These are usually small files, which are already merged into bigger files in the snapshot.
|
||||
1. Determine which files in the created snapshot are missing in `-dst`. These are usually small new files and bigger merged files.
|
||||
1. Determine which files from step 3 exist in the `-origin`, and perform server-side copy of these files from `-origin` to `-dst`.
|
||||
These are usually the biggest and the oldest files, which are shared between backups.
|
||||
1. Upload the remaining files from step 3 from the created snapshot to `-dst`.
|
||||
1. Delete the created snapshot.
|
||||
|
||||
The algorithm splits source files into 1 GiB chunks in the backup. Each chunk is stored as a separate file in the backup.
|
||||
Such splitting balances between the number of files in the backup and the amounts of data that needs to be re-transferred after temporary errors.
|
||||
|
||||
`vmbackup` relies on [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) properties:
|
||||
|
||||
* All the files in the snapshot are immutable.
|
||||
* Old files are periodically merged into new files.
|
||||
* Smaller files have higher probability to be merged.
|
||||
* Consecutive snapshots share many identical files.
|
||||
|
||||
These properties allow performing fast and cheap incremental backups and server-side copying from `-origin` paths.
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details.
|
||||
`vmbackup` can work improperly or slowly when these properties are violated.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* If the backup is slow, then try setting higher value for `-concurrency` flag. This will increase the number of concurrent workers that upload data to backup storage.
|
||||
* If `vmbackup` eats all the network bandwidth or CPU, then either decrease the `-concurrency` command-line flag value or set `-maxBytesPerSecond` command-line flag value to lower value.
|
||||
* If `vmbackup` consumes all the CPU on systems with big number of CPU cores, then try running it with `-filestream.disableFadvise` command-line flag.
|
||||
* If `vmbackup` has been interrupted due to temporary error, then just restart it with the same args. It will resume the backup process.
|
||||
* Backups created from [single-node VictoriaMetrics](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html) cannot be restored
|
||||
at [cluster VictoriaMetrics](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) and vice versa.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
|
||||
### Providing credentials as a file
|
||||
|
||||
Obtaining credentials from a file.
|
||||
|
||||
Add flag `-credsFilePath=/etc/credentials` with the following content:
|
||||
|
||||
- for S3 (AWS, MinIO or other S3 compatible storages):
|
||||
|
||||
```console
|
||||
[default]
|
||||
aws_access_key_id=theaccesskey
|
||||
aws_secret_access_key=thesecretaccesskeyvalue
|
||||
```
|
||||
|
||||
- for GCP cloud storage:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "project-id",
|
||||
"private_key_id": "key-id",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nprivate-key\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "service-account-email",
|
||||
"client_id": "client-id",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://accounts.google.com/o/oauth2/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service-account-email"
|
||||
}
|
||||
```
|
||||
|
||||
### Providing credentials via env variables
|
||||
|
||||
Obtaining credentials from env variables.
|
||||
- For AWS S3 compatible storages set env variable `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
|
||||
Also you can set env variable `AWS_SHARED_CREDENTIALS_FILE` with path to credentials file.
|
||||
- For GCE cloud storage set env variable `GOOGLE_APPLICATION_CREDENTIALS` with path to credentials file.
|
||||
- For Azure storage either set env variables `AZURE_STORAGE_ACCOUNT_NAME` and `AZURE_STORAGE_ACCOUNT_KEY`, or `AZURE_STORAGE_ACCOUNT_CONNECTION_STRING`.
|
||||
|
||||
Please, note that `vmbackup` will use credentials provided by cloud providers metadata service [when applicable](https://docs.victoriametrics.com/vmbackup.html#using-cloud-providers-metadata-service).
|
||||
|
||||
### Using cloud providers metadata service
|
||||
|
||||
`vmbackup` and `vmbackupmanager` will automatically use cloud providers metadata service in order to obtain credentials if they are running in cloud environment
|
||||
and credentials are not explicitly provided via flags or env variables.
|
||||
|
||||
### Providing credentials in Kubernetes
|
||||
|
||||
The simplest way to provide credentials in Kubernetes is to use [Secrets](https://kubernetes.io/docs/concepts/configuration/secret/)
|
||||
and inject them into the pod as environment variables. For example, the following secret can be used for AWS S3 credentials:
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: vmbackup-credentials
|
||||
data:
|
||||
access_key: key
|
||||
secret_key: secret
|
||||
```
|
||||
And then it can be injected into the pod as environment variables:
|
||||
```yaml
|
||||
...
|
||||
env:
|
||||
- name: AWS_ACCESS_KEY_ID
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: access_key
|
||||
name: vmbackup-credentials
|
||||
- name: AWS_SECRET_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: secret_key
|
||||
name: vmbackup-credentials
|
||||
...
|
||||
```
|
||||
|
||||
A more secure way is to use IAM roles to provide tokens for pods instead of managing credentials manually.
|
||||
|
||||
For AWS deployments it will be required to configure [IAM roles for service accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html).
|
||||
In order to use IAM roles for service accounts with `vmbackup` or `vmbackupmanager` it is required to create ServiceAccount with IAM role mapping:
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: monitoring-backups
|
||||
annotations:
|
||||
eks.amazonaws.com/role-arn: arn:aws:iam::{ACCOUNT_ID}:role/{ROLE_NAME}
|
||||
```
|
||||
And [configure pod to use service account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/).
|
||||
After this `vmbackup` and `vmbackupmanager` will automatically use IAM role for service account in order to obtain credentials.
|
||||
|
||||
For GCP deployments it will be required to configure [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity).
|
||||
In order to use Workload Identity with `vmbackup` or `vmbackupmanager` it is required to create ServiceAccount with Workload Identity annotation:
|
||||
```yaml
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: monitoring-backups
|
||||
annotations:
|
||||
iam.gke.io/gcp-service-account: {sa_name}@{project_name}.iam.gserviceaccount.com
|
||||
```
|
||||
And [configure pod to use service account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/).
|
||||
After this `vmbackup` and `vmbackupmanager` will automatically use Workload Identity for servicpe account in order to obtain credentials.
|
||||
|
||||
### Using custom S3 endpoint
|
||||
|
||||
Usage with s3 custom url endpoint. It is possible to use `vmbackup` with s3 compatible storages like minio, cloudian, etc.
|
||||
You have to add a custom url endpoint via flag:
|
||||
|
||||
- for MinIO
|
||||
```console
|
||||
-customS3Endpoint=http://localhost:9000
|
||||
```
|
||||
|
||||
- for aws gov region
|
||||
```console
|
||||
-customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com
|
||||
```
|
||||
|
||||
### Permanent deletion of objects in S3-compatible storages
|
||||
|
||||
`vmbackup` and [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager.html) use standard delete operation
|
||||
for S3-compatible object storage when pefrorming [incremental backups](#incremental-backups).
|
||||
This operation removes only the current version of the object. This works OK in most cases.
|
||||
|
||||
Sometimes it is needed to remove all the versions of an object. In this case pass `-deleteAllObjectVersions` command-line flag to `vmbackup`.
|
||||
|
||||
Alternatively, it is possible to use object storage lifecycle rules to remove non-current versions of objects automatically.
|
||||
Refer to the respective documentation for your object storage provider for more details.
|
||||
|
||||
### Command-line flags
|
||||
|
||||
Run `vmbackup -help` in order to see all the available options:
|
||||
|
||||
```console
|
||||
-concurrency int
|
||||
The number of concurrent workers. Higher concurrency may reduce backup duration (default 10)
|
||||
-configFilePath string
|
||||
Path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-configProfile string
|
||||
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used
|
||||
-credsFilePath string
|
||||
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-customS3Endpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
-deleteAllObjectVersions
|
||||
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
|
||||
-dst string
|
||||
Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir
|
||||
-dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address for exporting metrics at /metrics page (default ":8420")
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-license string
|
||||
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxBytesPerSecond size
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-origin string
|
||||
Optional origin directory on the remote storage with old backup for server-side copying when performing full backup. This speeds up full backups
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-s3ForcePathStyle
|
||||
Prefixing endpoint with bucket name when set false, true by default. (default true)
|
||||
-s3StorageClass string
|
||||
The Storage Class applied to objects uploaded to AWS S3. Supported values are: GLACIER, DEEP_ARCHIVE, GLACIER_IR, INTELLIGENT_TIERING, ONEZONE_IA, OUTPOSTS, REDUCED_REDUNDANCY, STANDARD, STANDARD_IA.
|
||||
See https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-class-intro.html
|
||||
-snapshot.createURL string
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup. Example: http://victoriametrics:8428/snapshot/create . There is no need in setting -snapshotName if -snapshot.createURL is set
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from -snapshot.createURL if not provided. All created snapshots will be automatically deleted. Example: http://victoriametrics:8428/snapshot/delete
|
||||
-snapshotName string
|
||||
Name for the snapshot to backup. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-work-with-snapshots. There is no need in setting -snapshotName if -snapshot.createURL is set
|
||||
-storageDataPath string
|
||||
Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage (default "victoria-metrics-data")
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - see `vmutils-*` archives there.
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
|
||||
1. Run `make vmbackup` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmbackup` binary and puts it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
1. Run `make vmbackup-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmbackup-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmbackup`. It builds `victoriametrics/vmbackup:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmbackup`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```console
|
||||
ROOT_IMAGE=scratch make package-vmbackup
|
||||
```
|
||||
vmbackup docs can be edited at [docs/vmbackup.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmbackup.md).
|
||||
|
|
|
@ -1,554 +1,3 @@
|
|||
# vmbackupmanager
|
||||
See vmbackupmanager docs [here](https://docs.victoriametrics.com/vmbackupmanager.html).
|
||||
|
||||
***vmbackupmanager is a part of [enterprise package](https://docs.victoriametrics.com/enterprise.html).
|
||||
It is available for download and evaluation at [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).***
|
||||
|
||||
The VictoriaMetrics backup manager automates regular backup procedures. It supports the following backup intervals: **hourly**, **daily**, **weekly** and **monthly**.
|
||||
Multiple backup intervals may be configured simultaneously. I.e. the backup manager creates hourly backups every hour, while it creates daily backups every day, etc.
|
||||
Backup manager must have read access to the storage data, so best practice is to install it on the same machine (or as a sidecar) where the storage node is installed.
|
||||
The backup service makes a backup every hour and puts it to the latest folder and then copies data to the folders
|
||||
which represent the backup intervals (hourly, daily, weekly and monthly)
|
||||
|
||||
The required flags for running the service are as follows:
|
||||
|
||||
* -eula - should be true and means that you have the legal right to run a backup manager. That can either be a signed contract or an email
|
||||
with confirmation to run the service in a trial period.
|
||||
* -storageDataPath - path to VictoriaMetrics or vmstorage data path to make backup from.
|
||||
* -snapshot.createURL - VictoriaMetrics creates snapshot URL which will automatically be created during backup. Example: <http://victoriametrics:8428/snapshot/create>
|
||||
* -dst - backup destination at [the supported storage types](https://docs.victoriametrics.com/vmbackup.html#supported-storage-types).
|
||||
* -credsFilePath - path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See [https://cloud.google.com/iam/docs/creating-managing-service-account-keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys)
|
||||
and [https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html](https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html).
|
||||
|
||||
Backup schedule is controlled by the following flags:
|
||||
|
||||
* -disableHourly - disable hourly run. Default false
|
||||
* -disableDaily - disable daily run. Default false
|
||||
* -disableWeekly - disable weekly run. Default false
|
||||
* -disableMonthly - disable monthly run. Default false
|
||||
|
||||
By default, all flags are turned on and Backup Manager backups data every hour for every interval (hourly, daily, weekly and monthly).
|
||||
|
||||
The backup manager creates the following directory hierarchy at **-dst**:
|
||||
|
||||
* /latest/ - contains the latest backup
|
||||
* /hourly/ - contains hourly backups. Each backup is named as *YYYY-MM-DD:HH*
|
||||
* /daily/ - contains daily backups. Each backup is named as *YYYY-MM-DD*
|
||||
* /weekly/ - contains weekly backups. Each backup is named as *YYYY-WW*
|
||||
* /monthly/ - contains monthly backups. Each backup is named as *YYYY-MM*
|
||||
|
||||
To get the full list of supported flags please run the following command:
|
||||
|
||||
```console
|
||||
./vmbackupmanager --help
|
||||
```
|
||||
|
||||
The service creates a **full** backup each run. This means that the system can be restored fully
|
||||
from any particular backup using [vmrestore](https://docs.victoriametrics.com/vmrestore.html).
|
||||
Backup manager uploads only the data that has been changed or created since the most recent backup (incremental backup).
|
||||
This reduces the consumed network traffic and the time needed for performing the backup.
|
||||
See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for details.
|
||||
|
||||
*Please take into account that the first backup upload could take a significant amount of time as it needs to upload all the data.*
|
||||
|
||||
There are two flags which could help with performance tuning:
|
||||
|
||||
* -maxBytesPerSecond - the maximum upload speed. There is no limit if it is set to 0
|
||||
* -concurrency - The number of concurrent workers. Higher concurrency may improve upload speed (default 10)
|
||||
|
||||
## Example of Usage
|
||||
|
||||
GCS and cluster version. You need to have a credentials file in json format with following structure:
|
||||
|
||||
credentials.json
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "<project>",
|
||||
"private_key_id": "",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\-----END PRIVATE KEY-----\n",
|
||||
"client_email": "test@<project>.iam.gserviceaccount.com",
|
||||
"client_id": "",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test%40<project>.iam.gserviceaccount.com"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Backup manager launched with the following configuration:
|
||||
|
||||
```console
|
||||
export NODE_IP=192.168.0.10
|
||||
export VMSTORAGE_ENDPOINT=http://127.0.0.1:8428
|
||||
./vmbackupmanager -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create -eula
|
||||
```
|
||||
|
||||
Expected logs in vmbackupmanager:
|
||||
|
||||
```console
|
||||
info lib/backup/actions/backup.go:131 server-side copied 81 out of 81 parts from GCS{bucket: "vmstorage-data", dir: "192.168.0.10//latest/"} to GCS{bucket: "vmstorage-data", dir: "192.168.0.10//weekly/2020-34/"} in 2.549833008s
|
||||
info lib/backup/actions/backup.go:169 backed up 853315 bytes in 2.882 seconds; deleted 0 bytes; server-side copied 853315 bytes; uploaded 0 bytes
|
||||
```
|
||||
|
||||
Expected logs in vmstorage:
|
||||
|
||||
```console
|
||||
info VictoriaMetrics/lib/storage/table.go:146 creating table snapshot of "/vmstorage-data/data"...
|
||||
info VictoriaMetrics/lib/storage/storage.go:311 deleting snapshot "/vmstorage-data/snapshots/20200818201959-162C760149895DDA"...
|
||||
info VictoriaMetrics/lib/storage/storage.go:319 deleted snapshot "/vmstorage-data/snapshots/20200818201959-162C760149895DDA" in 0.169 seconds
|
||||
```
|
||||
|
||||
The result on the GCS bucket
|
||||
|
||||
* The root folder
|
||||
|
||||
<img alt="root folder" src="vmbackupmanager_root_folder.png">
|
||||
|
||||
* The latest folder
|
||||
|
||||
<img alt="latest folder" src="vmbackupmanager_latest_folder.png">
|
||||
|
||||
Please, see [vmbackup docs](https://docs.victoriametrics.com/vmbackup.html#advanced-usage) for more examples of authentication with different
|
||||
storage types.
|
||||
|
||||
## Backup Retention Policy
|
||||
|
||||
Backup retention policy is controlled by:
|
||||
|
||||
* -keepLastHourly - keep the last N hourly backups. Disabled by default
|
||||
* -keepLastDaily - keep the last N daily backups. Disabled by default
|
||||
* -keepLastWeekly - keep the last N weekly backups. Disabled by default
|
||||
* -keepLastMonthly - keep the last N monthly backups. Disabled by default
|
||||
|
||||
> *Note*: 0 value in every keepLast flag results into deletion of ALL backups for particular type (hourly, daily, weekly and monthly)
|
||||
|
||||
> *Note*: retention policy does not enforce removing previous versions of objects in object storages such if versioning is enabled. See [these docs](https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages) for more details.
|
||||
|
||||
Let’s assume we have a backup manager collecting daily backups for the past 10 days.
|
||||
|
||||
<img alt="retention policy daily before retention cycle" src="vmbackupmanager_rp_daily_1.png">
|
||||
|
||||
We enable backup retention policy for backup manager by using following configuration:
|
||||
|
||||
```console
|
||||
export NODE_IP=192.168.0.10
|
||||
export VMSTORAGE_ENDPOINT=http://127.0.0.1:8428
|
||||
./vmbackupmanager -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data -snapshot.createURL=$VMSTORAGE_ENDPOINT/snapshot/create
|
||||
-keepLastDaily=3 -eula
|
||||
```
|
||||
|
||||
Expected logs in backup manager on start:
|
||||
|
||||
```console
|
||||
info lib/logger/flag.go:20 flag "keepLastDaily" = "3"
|
||||
```
|
||||
|
||||
Expected logs in backup manager during retention cycle:
|
||||
|
||||
```console
|
||||
info app/vmbackupmanager/retention.go:106 daily backups to delete [daily/2021-02-13 daily/2021-02-12 daily/2021-02-11 daily/2021-02-10 daily/2021-02-09 daily/2021-02-08 daily/2021-02-07]
|
||||
```
|
||||
|
||||
The result on the GCS bucket. We see only 3 daily backups:
|
||||
|
||||
<img alt="retention policy daily after retention cycle" src="vmbackupmanager_rp_daily_2.png">
|
||||
|
||||
### Protection backups against deletion by retention policy
|
||||
|
||||
You can protect any backup against deletion by retention policy with the `vmbackupmanager backups lock` command.
|
||||
|
||||
For instance:
|
||||
|
||||
```console
|
||||
./vmbackupmanager backup lock daily/2021-02-13 -dst=<DST_PATH> -storageDataPath=/vmstorage-data -eula
|
||||
```
|
||||
|
||||
After that the backup won't be deleted by retention policy.
|
||||
You can view the `locked` attribute in backup list:
|
||||
|
||||
```console
|
||||
./vmbackupmanager backup list -dst=<DST_PATH> -storageDataPath=/vmstorage-data -eula
|
||||
```
|
||||
|
||||
To remove protection, you can use the command `vmbackupmanager backups unlock`.
|
||||
|
||||
For example:
|
||||
|
||||
```console
|
||||
./vmbackupmanager backup unlock daily/2021-02-13 -dst=<DST_PATH> -storageDataPath=/vmstorage-data -eula
|
||||
```
|
||||
|
||||
## API methods
|
||||
|
||||
`vmbackupmanager` exposes the following API methods:
|
||||
|
||||
* GET `/api/v1/backups` - returns list of backups in remote storage.
|
||||
Example output:
|
||||
```json
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
> Note: `created_at` field is in RFC3339 format.
|
||||
|
||||
* GET `/api/v1/backups/<BACKUP_NAME>` - returns backup info by name.
|
||||
Example output:
|
||||
```json
|
||||
{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00","locked":true}
|
||||
```
|
||||
|
||||
* PUT `/api/v1/backups/<BACKUP_NAME>` - update "locked" attribute for backup by name.
|
||||
Example request body:
|
||||
```json
|
||||
{"locked":true}
|
||||
```
|
||||
Example response:
|
||||
```json
|
||||
{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00", "locked": true}
|
||||
```
|
||||
|
||||
* POST `/api/v1/restore` - saves backup name to restore when [performing restore](#restore-commands).
|
||||
Example request body:
|
||||
```json
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
|
||||
* GET `/api/v1/restore` - returns backup name from restore mark if it exists.
|
||||
Example response:
|
||||
```json
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
|
||||
* DELETE `/api/v1/restore` - delete restore mark.
|
||||
|
||||
## CLI
|
||||
|
||||
`vmbackupmanager` exposes CLI commands to work with [API methods](#api-methods) without external dependencies.
|
||||
|
||||
Supported commands:
|
||||
```console
|
||||
vmbackupmanager backup
|
||||
|
||||
vmbackupmanager backup list
|
||||
List backups in remote storage
|
||||
|
||||
vmbackupmanager backup lock
|
||||
Locks backup in remote storage against deletion
|
||||
|
||||
vmbackupmanager backup unlock
|
||||
Unlocks backup in remote storage for deletion
|
||||
|
||||
vmbackupmanager restore
|
||||
Restore backup specified by restore mark if it exists
|
||||
|
||||
vmbackupmanager restore get
|
||||
Get restore mark if it exists
|
||||
|
||||
vmbackupmanager restore delete
|
||||
Delete restore mark if it exists
|
||||
|
||||
vmbackupmanager restore create [backup_name]
|
||||
Create restore mark
|
||||
```
|
||||
|
||||
By default, CLI commands are using `http://127.0.0.1:8300` endpoint to reach `vmbackupmanager` API.
|
||||
It can be changed by using flag:
|
||||
```
|
||||
-apiURL string
|
||||
vmbackupmanager address to perform API requests (default "http://127.0.0.1:8300")
|
||||
```
|
||||
|
||||
### Backup commands
|
||||
|
||||
`vmbackupmanager backup list` lists backups in remote storage:
|
||||
```console
|
||||
$ ./vmbackupmanager backup list
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
|
||||
### Restore commands
|
||||
|
||||
Restore commands are used to create, get and delete restore mark.
|
||||
Restore mark is used by `vmbackupmanager` to store backup name to restore when running restore.
|
||||
|
||||
|
||||
Create restore mark:
|
||||
```console
|
||||
$ ./vmbackupmanager restore create daily/2022-10-06
|
||||
```
|
||||
|
||||
Get restore mark if it exists:
|
||||
```console
|
||||
$ ./vmbackupmanager restore get
|
||||
{"backup":"daily/2022-10-06"}
|
||||
```
|
||||
|
||||
Delete restore mark if it exists:
|
||||
```console
|
||||
$ ./vmbackupmanager restore delete
|
||||
```
|
||||
|
||||
Perform restore:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore -dst=gs://vmstorage-data/$NODE_IP -credsFilePath=credentials.json -storageDataPath=/vmstorage-data
|
||||
```
|
||||
Note that `vmsingle` or `vmstorage` should be stopped before performing restore.
|
||||
|
||||
If restore mark doesn't exist at `storageDataPath`(restore wasn't requested) `vmbackupmanager restore` will exit with successful status code.
|
||||
|
||||
### How to restore backup via CLI
|
||||
|
||||
1. Run `vmbackupmanager backup list` to get list of available backups:
|
||||
```console
|
||||
$ /vmbackupmanager-prod backup list
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
1. Run `vmbackupmanager restore create` to create restore mark:
|
||||
- Use relative path to backup to restore from currently used remote storage:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create daily/2023-04-07
|
||||
```
|
||||
- Use full path to backup to restore from any remote storage:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create azblob://test1/vmbackupmanager/daily/2023-04-07
|
||||
```
|
||||
1. Stop `vmstorage` or `vmsingle` node
|
||||
1. Run `vmbackupmanager restore` to restore backup:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore -credsFilePath=credentials.json -storageDataPath=/vmstorage-data
|
||||
```
|
||||
1. Start `vmstorage` or `vmsingle` node
|
||||
|
||||
|
||||
### How to restore in Kubernetes
|
||||
|
||||
1. Ensure there is an init container with `vmbackupmanager restore` in `vmstorage` or `vmsingle` pod.
|
||||
For [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) deployments it is required to add:
|
||||
```yaml
|
||||
vmbackup:
|
||||
restore:
|
||||
onStart:
|
||||
enabled: "true"
|
||||
```
|
||||
See operator `VMStorage` schema [here](https://docs.victoriametrics.com/operator/api.html#vmstorage) and `VMSingle` [here](https://docs.victoriametrics.com/operator/api.html#vmsinglespec).
|
||||
1. Enter container running `vmbackupmanager`
|
||||
1. Use `vmbackupmanager backup list` to get list of available backups:
|
||||
```console
|
||||
$ /vmbackupmanager-prod backup list
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
1. Use `vmbackupmanager restore create` to create restore mark:
|
||||
- Use relative path to backup to restore from currently used remote storage:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create daily/2023-04-07
|
||||
```
|
||||
- Use full path to backup to restore from any remote storage:
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create azblob://test1/vmbackupmanager/daily/2023-04-07
|
||||
```
|
||||
1. Restart pod
|
||||
|
||||
#### Restore cluster into another cluster
|
||||
|
||||
These steps are assuming that [VictoriaMetrics operator](https://docs.victoriametrics.com/operator/VictoriaMetrics-Operator.html) is used to manage `VMCluster`.
|
||||
Clusters here are referred to as `source` and `destination`.
|
||||
|
||||
1. Create a new cluster with access to *source* cluster `vmbackupmanager` storage and same number of storage nodes.
|
||||
Add the following section in order to enable restore on start (operator `VMStorage` schema can be found [here](https://docs.victoriametrics.com/operator/api.html#vmstorage):
|
||||
```yaml
|
||||
vmbackup:
|
||||
restore:
|
||||
onStart:
|
||||
enabled: "true"
|
||||
```
|
||||
Note: it is safe to leave this section in the cluster configuration, since it will be ignored if restore mark doesn't exist.
|
||||
> Important! Use different `-dst` for *destination* cluster to avoid overwriting backup data of the *source* cluster.
|
||||
1. Enter container running `vmbackupmanager` in *source* cluster
|
||||
1. Use `vmbackupmanager backup list` to get list of available backups:
|
||||
```console
|
||||
$ /vmbackupmanager-prod backup list
|
||||
[{"name":"daily/2023-04-07","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:07+00:00"},{"name":"hourly/2023-04-07:11","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:06+00:00"},{"name":"latest","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:04+00:00"},{"name":"monthly/2023-04","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:10+00:00"},{"name":"weekly/2023-14","size_bytes":318837,"size":"311.4ki","created_at":"2023-04-07T16:15:09+00:00"}]
|
||||
```
|
||||
1. Use `vmbackupmanager restore create` to create restore mark at each pod of the *destination* cluster.
|
||||
Each pod in *destination* cluster should be restored from backup of respective pod in *source* cluster.
|
||||
For example: `vmstorage-source-0` in *source* cluster should be restored from `vmstorage-destination-0` in *destination* cluster.
|
||||
```console
|
||||
$ /vmbackupmanager-prod restore create s3://source_cluster/vmstorage-source-0/daily/2023-04-07
|
||||
```
|
||||
1. Restart `vmstorage` pods of *destination* cluster. On pod start `vmbackupmanager` will restore data from the specified backup.
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vmbackupmanager` exports various metrics in Prometheus exposition format at `http://vmbackupmanager:8300/metrics` page. It is recommended setting up regular scraping of this page
|
||||
either via [vmagent](https://docs.victoriametrics.com/vmagent.html) or via Prometheus, so the exported metrics could be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/17798-victoriametrics-backupmanager/) for `vmbackupmanager` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
|
||||
## Configuration
|
||||
|
||||
### Flags
|
||||
|
||||
Pass `-help` to `vmbackupmanager` in order to see the full list of supported
|
||||
command-line flags with their descriptions.
|
||||
|
||||
The shortlist of configuration flags is the following:
|
||||
|
||||
```
|
||||
vmbackupmanager performs regular backups according to the provided configs.
|
||||
|
||||
subcommands:
|
||||
backup: provides auxiliary backup-related commands
|
||||
restore: restores backup specified by restore mark if it exists
|
||||
|
||||
command-line flags:
|
||||
-apiURL string
|
||||
vmbackupmanager address to perform API requests (default "http://127.0.0.1:8300")
|
||||
-concurrency int
|
||||
The number of concurrent workers. Higher concurrency may reduce backup duration (default 10)
|
||||
-configFilePath string
|
||||
Path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-configProfile string
|
||||
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used
|
||||
-credsFilePath string
|
||||
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-customS3Endpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
-deleteAllObjectVersions
|
||||
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
|
||||
-disableDaily
|
||||
Disable daily run. Default false
|
||||
-disableHourly
|
||||
Disable hourly run. Default false
|
||||
-disableMonthly
|
||||
Disable monthly run. Default false
|
||||
-disableWeekly
|
||||
Disable weekly run. Default false
|
||||
-dst string
|
||||
The root folder of Victoria Metrics backups. Example: gs://bucket/path/to/backup/dir, s3://bucket/path/to/backup/dir or fs:///path/to/local/backup/dir
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
Address to listen for http connections (default ":8300")
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-keepLastDaily int
|
||||
Keep last N daily backups. If 0 is specified next retention cycle removes all backups for given time period. (default -1)
|
||||
-keepLastHourly int
|
||||
Keep last N hourly backups. If 0 is specified next retention cycle removes all backups for given time period. (default -1)
|
||||
-keepLastMonthly int
|
||||
Keep last N monthly backups. If 0 is specified next retention cycle removes all backups for given time period. (default -1)
|
||||
-keepLastWeekly int
|
||||
Keep last N weekly backups. If 0 is specified next retention cycle removes all backups for given time period. (default -1)
|
||||
-license string
|
||||
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxBytesPerSecond int
|
||||
The maximum upload speed. There is no limit if it is set to 0
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-runOnStart
|
||||
Upload backups immediately after start of the service. Otherwise the backup starts on new hour
|
||||
-s3ForcePathStyle
|
||||
Prefixing endpoint with bucket name when set false, true by default. (default true)
|
||||
-s3StorageClass string
|
||||
The Storage Class applied to objects uploaded to AWS S3. Supported values are: GLACIER, DEEP_ARCHIVE, GLACIER_IR, INTELLIGENT_TIERING, ONEZONE_IA, OUTPOSTS, REDUCED_REDUNDANCY, STANDARD, STANDARD_IA.
|
||||
See https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-class-intro.html
|
||||
-snapshot.createURL string
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup.Example: http://victoriametrics:8428/snapshot/create
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshot.createURL if not provided. All created snaphosts will be automatically deleted.Example: http://victoriametrics:8428/snapshot/delete
|
||||
-storageDataPath string
|
||||
Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage (default "victoria-metrics-data")
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
vmbackupmanager docs can be edited at [docs/vmbackupmanager.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmbackupmanager.md).
|
||||
|
|
Before Width: | Height: | Size: 29 KiB |
Before Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 99 KiB |
Before Width: | Height: | Size: 64 KiB |
1109
app/vmctl/README.md
|
@ -320,10 +320,11 @@ var (
|
|||
)
|
||||
|
||||
const (
|
||||
vmNativeFilterMatch = "vm-native-filter-match"
|
||||
vmNativeFilterTimeStart = "vm-native-filter-time-start"
|
||||
vmNativeFilterTimeEnd = "vm-native-filter-time-end"
|
||||
vmNativeStepInterval = "vm-native-step-interval"
|
||||
vmNativeFilterMatch = "vm-native-filter-match"
|
||||
vmNativeFilterTimeStart = "vm-native-filter-time-start"
|
||||
vmNativeFilterTimeEnd = "vm-native-filter-time-end"
|
||||
vmNativeFilterTimeReverse = "vm-native-filter-time-reverse"
|
||||
vmNativeStepInterval = "vm-native-step-interval"
|
||||
|
||||
vmNativeDisableBinaryProtocol = "vm-native-disable-binary-protocol"
|
||||
vmNativeDisableHTTPKeepAlive = "vm-native-disable-http-keep-alive"
|
||||
|
@ -362,10 +363,15 @@ var (
|
|||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeStepInterval,
|
||||
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are '%s','%s','%s','%s','%s'.", vmNativeFilterTimeStart,
|
||||
stepper.StepMonth, stepper.StepWeek, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
|
||||
Usage: fmt.Sprintf("The time interval to split the migration into steps. For example, to migrate 1y of data with '--%s=month' vmctl will execute it in 12 separate requests from the beginning of the time range to its end. To reverse the order use '--%s'. Requires setting '--%s'. Valid values are '%s','%s','%s','%s','%s'.",
|
||||
vmNativeStepInterval, vmNativeFilterTimeReverse, vmNativeFilterTimeStart, stepper.StepMonth, stepper.StepWeek, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
|
||||
Value: stepper.StepMonth,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeFilterTimeReverse,
|
||||
Usage: fmt.Sprintf("Whether to reverse the order of time intervals split by '--%s' cmd-line flag. When set, the migration will start from the newest to the oldest data.", vmNativeStepInterval),
|
||||
Value: false,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeDisableHTTPKeepAlive,
|
||||
Usage: "Disable HTTP persistent connections for requests made to VictoriaMetrics components during export",
|
||||
|
@ -469,6 +475,7 @@ const (
|
|||
remoteReadConcurrency = "remote-read-concurrency"
|
||||
remoteReadFilterTimeStart = "remote-read-filter-time-start"
|
||||
remoteReadFilterTimeEnd = "remote-read-filter-time-end"
|
||||
remoteReadFilterTimeReverse = "remote-read-filter-time-reverse"
|
||||
remoteReadFilterLabel = "remote-read-filter-label"
|
||||
remoteReadFilterLabelValue = "remote-read-filter-label-value"
|
||||
remoteReadStepInterval = "remote-read-step-interval"
|
||||
|
@ -520,9 +527,14 @@ var (
|
|||
Value: false,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: remoteReadStepInterval,
|
||||
Usage: fmt.Sprintf("Split export data into chunks. Requires setting --%s. Valid values are %q,%q,%q,%q.", remoteReadFilterTimeStart, stepper.StepMonth, stepper.StepDay, stepper.StepHour, stepper.StepMinute),
|
||||
Required: true,
|
||||
Name: remoteReadStepInterval,
|
||||
Usage: fmt.Sprintf("The time interval to split the migration into steps. For example, to migrate 1y of data with '--%s=month' vmctl will execute it in 12 separate requests from the beginning of the time range to its end. To reverse the order use '--%s'. Requires setting '--%s'. Valid values are '%s','%s','%s','%s','%s'.",
|
||||
remoteReadStepInterval, remoteReadFilterTimeReverse, remoteReadFilterTimeStart, stepper.StepMonth, stepper.StepWeek, stepper.StepDay, stepper.StepHour, stepper.StepMinute), Required: true,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: remoteReadFilterTimeReverse,
|
||||
Usage: fmt.Sprintf("Whether to reverse the order of time intervals split by '--%s' cmd-line flag. When set, the migration will start from the newest to the oldest data.", remoteReadStepInterval),
|
||||
Value: false,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: remoteReadSrcAddr,
|
||||
|
|
|
@ -358,10 +358,13 @@ func (c *Client) getSeries() ([]*Series, error) {
|
|||
func (c *Client) do(q influx.Query) ([]queryValues, error) {
|
||||
res, err := c.Query(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query %q err: %s", q.Command, err)
|
||||
return nil, fmt.Errorf("query error: %s", err)
|
||||
}
|
||||
if res.Error() != nil {
|
||||
return nil, fmt.Errorf("response error: %s", res.Error())
|
||||
}
|
||||
if len(res.Results) < 1 {
|
||||
return nil, fmt.Errorf("exploration query %q returned 0 results", q.Command)
|
||||
return nil, fmt.Errorf("query returned 0 results")
|
||||
}
|
||||
return parseResult(res.Results[0])
|
||||
}
|
||||
|
|
|
@ -12,11 +12,12 @@ import (
|
|||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/backoff"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
|
||||
"github.com/urfave/cli/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
|
||||
|
@ -150,9 +151,10 @@ func main() {
|
|||
src: rr,
|
||||
dst: importer,
|
||||
filter: remoteReadFilter{
|
||||
timeStart: c.Timestamp(remoteReadFilterTimeStart),
|
||||
timeEnd: c.Timestamp(remoteReadFilterTimeEnd),
|
||||
chunk: c.String(remoteReadStepInterval),
|
||||
timeStart: c.Timestamp(remoteReadFilterTimeStart),
|
||||
timeEnd: c.Timestamp(remoteReadFilterTimeEnd),
|
||||
chunk: c.String(remoteReadStepInterval),
|
||||
timeReverse: c.Bool(remoteReadFilterTimeReverse),
|
||||
},
|
||||
cc: c.Int(remoteReadConcurrency),
|
||||
isSilent: c.Bool(globalSilent),
|
||||
|
@ -234,10 +236,11 @@ func main() {
|
|||
rateLimit: c.Int64(vmRateLimit),
|
||||
interCluster: c.Bool(vmInterCluster),
|
||||
filter: native.Filter{
|
||||
Match: c.String(vmNativeFilterMatch),
|
||||
TimeStart: c.String(vmNativeFilterTimeStart),
|
||||
TimeEnd: c.String(vmNativeFilterTimeEnd),
|
||||
Chunk: c.String(vmNativeStepInterval),
|
||||
Match: c.String(vmNativeFilterMatch),
|
||||
TimeStart: c.String(vmNativeFilterTimeStart),
|
||||
TimeEnd: c.String(vmNativeFilterTimeEnd),
|
||||
Chunk: c.String(vmNativeStepInterval),
|
||||
TimeReverse: c.Bool(vmNativeFilterTimeReverse),
|
||||
},
|
||||
src: &native.Client{
|
||||
AuthCfg: srcAuthConfig,
|
||||
|
|
|
@ -4,10 +4,11 @@ import "fmt"
|
|||
|
||||
// Filter represents request filter
|
||||
type Filter struct {
|
||||
Match string
|
||||
TimeStart string
|
||||
TimeEnd string
|
||||
Chunk string
|
||||
Match string
|
||||
TimeStart string
|
||||
TimeEnd string
|
||||
Chunk string
|
||||
TimeReverse bool
|
||||
}
|
||||
|
||||
func (f Filter) String() string {
|
||||
|
|
|
@ -7,11 +7,12 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/stepper"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
)
|
||||
|
||||
type remoteReadProcessor struct {
|
||||
|
@ -26,9 +27,10 @@ type remoteReadProcessor struct {
|
|||
}
|
||||
|
||||
type remoteReadFilter struct {
|
||||
timeStart *time.Time
|
||||
timeEnd *time.Time
|
||||
chunk string
|
||||
timeStart *time.Time
|
||||
timeEnd *time.Time
|
||||
chunk string
|
||||
timeReverse bool
|
||||
}
|
||||
|
||||
func (rrp *remoteReadProcessor) run(ctx context.Context) error {
|
||||
|
@ -41,7 +43,7 @@ func (rrp *remoteReadProcessor) run(ctx context.Context) error {
|
|||
rrp.cc = 1
|
||||
}
|
||||
|
||||
ranges, err := stepper.SplitDateRange(*rrp.filter.timeStart, *rrp.filter.timeEnd, rrp.filter.chunk)
|
||||
ranges, err := stepper.SplitDateRange(*rrp.filter.timeStart, *rrp.filter.timeEnd, rrp.filter.chunk, rrp.filter.timeReverse)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create date ranges for the given time filters: %v", err)
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package stepper
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
@ -20,7 +21,7 @@ const (
|
|||
|
||||
// SplitDateRange splits start-end range in a subset of ranges respecting the given step
|
||||
// Ranges with granularity of StepMonth are aligned to 1st of each month in order to improve export efficiency at block transfer level
|
||||
func SplitDateRange(start, end time.Time, step string) ([][]time.Time, error) {
|
||||
func SplitDateRange(start, end time.Time, step string, timeReverse bool) ([][]time.Time, error) {
|
||||
|
||||
if start.After(end) {
|
||||
return nil, fmt.Errorf("start time %q should come before end time %q", start.Format(time.RFC3339), end.Format(time.RFC3339))
|
||||
|
@ -71,6 +72,11 @@ func SplitDateRange(start, end time.Time, step string) ([][]time.Time, error) {
|
|||
ranges = append(ranges, []time.Time{s, e})
|
||||
currentStep = e
|
||||
}
|
||||
if timeReverse {
|
||||
sort.SliceStable(ranges, func(i, j int) bool {
|
||||
return ranges[i][0].After(ranges[j][0])
|
||||
})
|
||||
}
|
||||
|
||||
return ranges, nil
|
||||
}
|
||||
|
|
|
@ -252,7 +252,280 @@ func Test_splitDateRange(t *testing.T) {
|
|||
start := mustParseDatetime(tt.args.start)
|
||||
end := mustParseDatetime(tt.args.end)
|
||||
|
||||
got, err := SplitDateRange(start, end, tt.args.granularity)
|
||||
got, err := SplitDateRange(start, end, tt.args.granularity, false)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("splitDateRange() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
|
||||
var testExpectedResults [][]time.Time
|
||||
if tt.want != nil {
|
||||
testExpectedResults = make([][]time.Time, 0)
|
||||
for _, dr := range tt.want {
|
||||
testExpectedResults = append(testExpectedResults, []time.Time{
|
||||
mustParseDatetime(dr[0]),
|
||||
mustParseDatetime(dr[1]),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(got, testExpectedResults) {
|
||||
t.Errorf("splitDateRange() got = %v, want %v", got, testExpectedResults)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_splitDateRange_reverse(t *testing.T) {
|
||||
type args struct {
|
||||
start string
|
||||
end string
|
||||
granularity string
|
||||
timeReverse bool
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []testTimeRange
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "validates start is before end",
|
||||
args: args{
|
||||
start: "2022-02-01T00:00:00Z",
|
||||
end: "2022-01-01T00:00:00Z",
|
||||
granularity: StepMonth,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "validates granularity value",
|
||||
args: args{
|
||||
start: "2022-01-01T00:00:00Z",
|
||||
end: "2022-02-01T00:00:00Z",
|
||||
granularity: "non-existent-format",
|
||||
timeReverse: true,
|
||||
},
|
||||
want: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "month chunking",
|
||||
args: args{
|
||||
start: "2022-01-03T11:11:11Z",
|
||||
end: "2022-03-03T12:12:12Z",
|
||||
granularity: StepMonth,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2022-03-01T00:00:00Z",
|
||||
"2022-03-03T12:12:12Z",
|
||||
},
|
||||
{
|
||||
"2022-02-01T00:00:00Z",
|
||||
"2022-02-28T23:59:59.999999999Z",
|
||||
},
|
||||
{
|
||||
"2022-01-03T11:11:11Z",
|
||||
"2022-01-31T23:59:59.999999999Z",
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "daily chunking",
|
||||
args: args{
|
||||
start: "2022-01-03T11:11:11Z",
|
||||
end: "2022-01-05T12:12:12Z",
|
||||
granularity: StepDay,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2022-01-05T11:11:11Z",
|
||||
"2022-01-05T12:12:12Z",
|
||||
},
|
||||
{
|
||||
"2022-01-04T11:11:11Z",
|
||||
"2022-01-05T11:11:11Z",
|
||||
},
|
||||
{
|
||||
"2022-01-03T11:11:11Z",
|
||||
"2022-01-04T11:11:11Z",
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "hourly chunking",
|
||||
args: args{
|
||||
start: "2022-01-03T11:11:11Z",
|
||||
end: "2022-01-03T14:14:14Z",
|
||||
granularity: StepHour,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2022-01-03T14:11:11Z",
|
||||
"2022-01-03T14:14:14Z",
|
||||
},
|
||||
{
|
||||
"2022-01-03T13:11:11Z",
|
||||
"2022-01-03T14:11:11Z",
|
||||
},
|
||||
{
|
||||
"2022-01-03T12:11:11Z",
|
||||
"2022-01-03T13:11:11Z",
|
||||
},
|
||||
{
|
||||
"2022-01-03T11:11:11Z",
|
||||
"2022-01-03T12:11:11Z",
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "month chunking with one day time range",
|
||||
args: args{
|
||||
start: "2022-01-03T11:11:11Z",
|
||||
end: "2022-01-04T12:12:12Z",
|
||||
granularity: StepMonth,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2022-01-03T11:11:11Z",
|
||||
"2022-01-04T12:12:12Z",
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "month chunking with same day time range",
|
||||
args: args{
|
||||
start: "2022-01-03T11:11:11Z",
|
||||
end: "2022-01-03T12:12:12Z",
|
||||
granularity: StepMonth,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2022-01-03T11:11:11Z",
|
||||
"2022-01-03T12:12:12Z",
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "month chunking with one month and two days range",
|
||||
args: args{
|
||||
start: "2022-01-03T11:11:11Z",
|
||||
end: "2022-02-03T00:00:00Z",
|
||||
granularity: StepMonth,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2022-02-01T00:00:00Z",
|
||||
"2022-02-03T00:00:00Z",
|
||||
},
|
||||
{
|
||||
"2022-01-03T11:11:11Z",
|
||||
"2022-01-31T23:59:59.999999999Z",
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "week chunking with not full week",
|
||||
args: args{
|
||||
start: "2023-07-30T00:00:00Z",
|
||||
end: "2023-08-05T23:59:59.999999999Z",
|
||||
granularity: StepWeek,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2023-07-30T00:00:00Z",
|
||||
"2023-08-05T23:59:59.999999999Z",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "week chunking with start of the week and end of the week",
|
||||
args: args{
|
||||
start: "2023-07-30T00:00:00Z",
|
||||
end: "2023-08-06T00:00:00Z",
|
||||
granularity: StepWeek,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2023-07-30T00:00:00Z",
|
||||
"2023-08-06T00:00:00Z",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "week chunking with next one day week",
|
||||
args: args{
|
||||
start: "2023-07-30T00:00:00Z",
|
||||
end: "2023-08-07T01:12:00Z",
|
||||
granularity: StepWeek,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2023-08-06T00:00:00Z",
|
||||
"2023-08-07T01:12:00Z",
|
||||
},
|
||||
{
|
||||
"2023-07-30T00:00:00Z",
|
||||
"2023-08-06T00:00:00Z",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "week chunking with month and not full week representation",
|
||||
args: args{
|
||||
start: "2023-07-30T00:00:00Z",
|
||||
end: "2023-09-01T01:12:00Z",
|
||||
granularity: StepWeek,
|
||||
timeReverse: true,
|
||||
},
|
||||
want: []testTimeRange{
|
||||
{
|
||||
"2023-08-27T00:00:00Z",
|
||||
"2023-09-01T01:12:00Z",
|
||||
},
|
||||
{
|
||||
"2023-08-20T00:00:00Z",
|
||||
"2023-08-27T00:00:00Z",
|
||||
},
|
||||
{
|
||||
"2023-08-13T00:00:00Z",
|
||||
"2023-08-20T00:00:00Z",
|
||||
},
|
||||
{
|
||||
"2023-08-06T00:00:00Z",
|
||||
"2023-08-13T00:00:00Z",
|
||||
},
|
||||
{
|
||||
"2023-07-30T00:00:00Z",
|
||||
"2023-08-06T00:00:00Z",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
start := mustParseDatetime(tt.args.start)
|
||||
end := mustParseDatetime(tt.args.end)
|
||||
|
||||
got, err := SplitDateRange(start, end, tt.args.granularity, tt.args.timeReverse)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("splitDateRange() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
|
|
|
@ -9,6 +9,8 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/backoff"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/limiter"
|
||||
|
@ -18,7 +20,6 @@ import (
|
|||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
)
|
||||
|
||||
type vmNativeProcessor struct {
|
||||
|
@ -67,12 +68,11 @@ func (p *vmNativeProcessor) run(ctx context.Context) error {
|
|||
|
||||
ranges := [][]time.Time{{start, end}}
|
||||
if p.filter.Chunk != "" {
|
||||
ranges, err = stepper.SplitDateRange(start, end, p.filter.Chunk)
|
||||
ranges, err = stepper.SplitDateRange(start, end, p.filter.Chunk, p.filter.TimeReverse)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create date ranges for the given time filters: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
tenants := []string{""}
|
||||
if p.interCluster {
|
||||
log.Printf("Discovering tenants...")
|
||||
|
|
|
@ -1,467 +1,3 @@
|
|||
# vmgateway
|
||||
See vmgateway docs [here](https://docs.victoriametrics.com/vmgateway.html).
|
||||
|
||||
***vmgateway is a part of [enterprise package](https://docs.victoriametrics.com/enterprise.html).
|
||||
It is available for download and evaluation at [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a free trial license [here](https://victoriametrics.com/products/enterprise/trial/).***
|
||||
|
||||
<img alt="vmgateway" src="vmgateway-overview.jpeg">
|
||||
|
||||
`vmgateway` is a proxy for the VictoriaMetrics Time Series Database (TSDB). It provides the following features:
|
||||
|
||||
* Rate Limiter
|
||||
* Based on cluster tenant's utilization, it supports multiple time interval limits for both the ingestion and retrieval of metrics
|
||||
* Token Access Control
|
||||
* Supports additional per-label access control for both the Single and Cluster versions of the VictoriaMetrics TSDB
|
||||
* Provides access by tenantID in the Cluster version
|
||||
* Allows for separate write/read/admin access to data
|
||||
|
||||
`vmgateway` is included in our [enterprise packages](https://docs.victoriametrics.com/enterprise.html).
|
||||
|
||||
## Access Control
|
||||
|
||||
<img alt="vmgateway-ac" src="vmgateway-access-control.jpg">
|
||||
|
||||
`vmgateway` supports jwt based authentication. With jwt payload can be configured to give access to specific tenants and labels as well as to read/write.
|
||||
|
||||
jwt token must be in following format:
|
||||
|
||||
```json
|
||||
{
|
||||
"exp": 1617304574,
|
||||
"vm_access": {
|
||||
"tenant_id": {
|
||||
"account_id": 1,
|
||||
"project_id": 5
|
||||
},
|
||||
"extra_labels": {
|
||||
"team": "dev",
|
||||
"project": "mobile"
|
||||
},
|
||||
"extra_filters": ["{env=~\"prod|dev\",team!=\"test\"}"],
|
||||
"mode": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Where:
|
||||
|
||||
* `exp` - required, expire time in unix_timestamp. If the token expires then `vmgateway` rejects the request.
|
||||
* `vm_access` - required, dict with claim info, minimum form: `{"vm_access": {"tenant_id": {}}`
|
||||
* `tenant_id` - optional, for cluster mode, routes requests to the corresponding tenant.
|
||||
* `extra_labels` - optional, key-value pairs for label filters added to the ingested or selected metrics. Multiple filters are added with `and` operation. If defined, `extra_label` from original request removed.
|
||||
* `extra_filters` - optional, [series selectors](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) added to the select query requests. Multiple selectors are added with `or` operation. If defined, `extra_filter` from original request removed.
|
||||
* `mode` - optional, access mode for api - read, write, or full. Supported values: 0 - full (default value), 1 - read, 2 - write.
|
||||
|
||||
## QuickStart
|
||||
|
||||
Start the single version of VictoriaMetrics
|
||||
|
||||
```console
|
||||
# single
|
||||
# start node
|
||||
./bin/victoria-metrics --selfScrapeInterval=10s
|
||||
```
|
||||
|
||||
Start vmgateway
|
||||
|
||||
```console
|
||||
./bin/vmgateway -eula -enable.auth -read.url http://localhost:8428 --write.url http://localhost:8428
|
||||
```
|
||||
|
||||
Retrieve data from the database
|
||||
|
||||
```console
|
||||
curl 'http://localhost:8431/api/v1/series/count' -H 'Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ2bV9hY2Nlc3MiOnsidGVuYW50X2lkIjp7fSwicm9sZSI6MX0sImV4cCI6MTkzOTM0NjIxMH0.5WUxEfdcV9hKo4CtQdtuZYOGpGXWwaqM9VuVivMMrVg'
|
||||
```
|
||||
|
||||
A request with an incorrect token or without any token will be rejected:
|
||||
|
||||
```console
|
||||
curl 'http://localhost:8431/api/v1/series/count'
|
||||
|
||||
curl 'http://localhost:8431/api/v1/series/count' -H 'Authorization: Bearer incorrect-token'
|
||||
```
|
||||
|
||||
## Rate Limiter
|
||||
|
||||
<img alt="vmgateway-rl" src="vmgateway-rate-limiting.jpg">
|
||||
|
||||
Limits incoming requests by given, pre-configured limits. It supports read and write limiting by tenant.
|
||||
|
||||
`vmgateway` needs a datasource for rate limit queries. It can be either single-node or cluster version of `victoria-metrics`.
|
||||
The metrics that you want to rate limit must be scraped from the cluster.
|
||||
|
||||
List of supported limit types:
|
||||
|
||||
* `queries` - count of api requests made at tenant to read the api, such as `/api/v1/query`, `/api/v1/series` and others.
|
||||
* `active_series` - count of current active series at any given tenant.
|
||||
* `new_series` - count of created series; aka churn rate
|
||||
* `rows_inserted` - count of inserted rows per tenant.
|
||||
|
||||
List of supported time windows:
|
||||
|
||||
* `minute`
|
||||
* `hour`
|
||||
|
||||
Limits can be specified per tenant or at a global level if you omit `project_id` and `account_id`.
|
||||
|
||||
Example of configuration file:
|
||||
|
||||
```yaml
|
||||
limits:
|
||||
- type: queries
|
||||
value: 1000
|
||||
resolution: minute
|
||||
- type: queries
|
||||
value: 10000
|
||||
resolution: hour
|
||||
- type: queries
|
||||
value: 10
|
||||
resolution: minute
|
||||
project_id: 5
|
||||
account_id: 1
|
||||
```
|
||||
|
||||
## QuickStart
|
||||
|
||||
cluster version of VictoriaMetrics is required for rate limiting.
|
||||
|
||||
```console
|
||||
# start datasource for cluster metrics
|
||||
|
||||
cat << EOF > cluster.yaml
|
||||
scrape_configs:
|
||||
- job_name: cluster
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['127.0.0.1:8481','127.0.0.1:8482','127.0.0.1:8480']
|
||||
EOF
|
||||
|
||||
./bin/victoria-metrics --promscrape.config cluster.yaml
|
||||
|
||||
# start cluster
|
||||
|
||||
# start vmstorage, vmselect and vminsert
|
||||
./bin/vmstorage -eula
|
||||
./bin/vmselect -eula -storageNode 127.0.0.1:8401
|
||||
./bin/vminsert -eula -storageNode 127.0.0.1:8400
|
||||
|
||||
# create base rate limiting config:
|
||||
cat << EOF > limit.yaml
|
||||
limits:
|
||||
- type: queries
|
||||
value: 100
|
||||
- type: rows_inserted
|
||||
value: 100000
|
||||
- type: new_series
|
||||
value: 1000
|
||||
- type: active_series
|
||||
value: 100000
|
||||
- type: queries
|
||||
value: 1
|
||||
account_id: 15
|
||||
EOF
|
||||
|
||||
# start gateway with clusterMoe
|
||||
./bin/vmgateway -eula -enable.rateLimit -ratelimit.config limit.yaml -datasource.url http://localhost:8428 -enable.auth -clusterMode -write.url=http://localhost:8480 --read.url=http://localhost:8481
|
||||
|
||||
# ingest simple metric to tenant 1:5
|
||||
curl 'http://localhost:8431/api/v1/import/prometheus' -X POST -d 'foo{bar="baz1"} 123' -H 'Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2MjAxNjIwMDAwMDAsInZtX2FjY2VzcyI6eyJ0ZW5hbnRfaWQiOnsiYWNjb3VudF9pZCI6MTV9fX0.PB1_KXDKPUp-40pxOGk6lt_jt9Yq80PIMpWVJqSForQ'
|
||||
# read metric from tenant 1:5
|
||||
curl 'http://localhost:8431/api/v1/labels' -H 'Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2MjAxNjIwMDAwMDAsInZtX2FjY2VzcyI6eyJ0ZW5hbnRfaWQiOnsiYWNjb3VudF9pZCI6MTV9fX0.PB1_KXDKPUp-40pxOGk6lt_jt9Yq80PIMpWVJqSForQ'
|
||||
|
||||
# check rate limit
|
||||
```
|
||||
|
||||
## JWT signature verification
|
||||
|
||||
`vmgateway` supports JWT signature verification.
|
||||
|
||||
Supported algorithms are `RS256`, `RS384`, `RS512`, `ES256`, `ES384`, `ES512`, `PS256`, `PS384`, `PS512`.
|
||||
Tokens with unsupported algorithms will be rejected.
|
||||
|
||||
In order to enable JWT signature verification, you need to specify keys for signature verification.
|
||||
The following flags are used to specify keys:
|
||||
- `-auth.publicKeyFiles` - allows to pass file path to file with public key.
|
||||
- `-auth.publicKeys` - allows to pass public key directly.
|
||||
|
||||
Note that both flags support passing multiple keys and also can be used together.
|
||||
|
||||
Example usage:
|
||||
```console
|
||||
./bin/vmgateway -eula \
|
||||
-enable.auth \
|
||||
-write.url=http://localhost:8480 \
|
||||
-read.url=http://localhost:8481 \
|
||||
-auth.publicKeyFiles=public_key.pem \
|
||||
-auth.publicKeyFiles=public_key2.pem \
|
||||
-auth.publicKeys=`-----BEGIN PUBLIC KEY-----
|
||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAu1SU1LfVLPHCozMxH2Mo
|
||||
4lgOEePzNm0tRgeLezV6ffAt0gunVTLw7onLRnrq0/IzW7yWR7QkrmBL7jTKEn5u
|
||||
+qKhbwKfBstIs+bMY2Zkp18gnTxKLxoS2tFczGkPLPgizskuemMghRniWaoLcyeh
|
||||
kd3qqGElvW/VDL5AaWTg0nLVkjRo9z+40RQzuVaE8AkAFmxZzow3x+VJYKdjykkJ
|
||||
0iT9wCS0DRTXu269V264Vf/3jvredZiKRkgwlL9xNAwxXFg0x/XFw005UWVRIkdg
|
||||
cKWTjpBP2dPwVZ4WWC+9aGVd+Gyn1o0CLelf4rEjGoXbAAEgAqeGUxrcIlbjXfbc
|
||||
mwIDAQAB
|
||||
-----END PUBLIC KEY-----
|
||||
`
|
||||
```
|
||||
This command will result in 3 keys loaded: 2 keys from files and 1 from command line.
|
||||
|
||||
### Using OpenID discovery endpoint for JWT signature verification
|
||||
|
||||
`vmgateway` supports using OpenID discovery endpoint for JWKS keys discovery.
|
||||
|
||||
In order to enable [OpenID discovery](https://openid.net/specs/openid-connect-discovery-1_0.html) endpoint for JWT signature verification, you need to specify OpenID discovery endpoint URLs by using `auth.oidcDiscoveryEndpoints` flag.
|
||||
When `auth.oidcDiscoveryEndpoints` is specified `vmageteway` will fetch JWKS keys from the specified endpoint and use them for JWT signature verification.
|
||||
|
||||
Example usage for tokens issued by Azure Active Directory:
|
||||
```console
|
||||
/bin/vmgateway -eula \
|
||||
-enable.auth \
|
||||
-write.url=http://localhost:8480 \
|
||||
-read.url=http://localhost:8481 \
|
||||
-auth.oidcDiscoveryEndpoints=https://login.microsoftonline.com/common/v2.0/.well-known/openid-configuration
|
||||
```
|
||||
|
||||
Example usage for tokens issued by Google:
|
||||
```console
|
||||
/bin/vmgateway -eula \
|
||||
-enable.auth \
|
||||
-write.url=http://localhost:8480 \
|
||||
-read.url=http://localhost:8481 \
|
||||
-auth.oidcDiscoveryEndpoints=https://accounts.google.com/.well-known/openid-configuration
|
||||
```
|
||||
|
||||
### Using JWKS endpoint for JWT signature verification
|
||||
|
||||
`vmgateway` supports using JWKS endpoint for JWT signature verification.
|
||||
|
||||
In order to enable JWKS endpoint for JWT signature verification, you need to specify JWKS endpoint URL by using `auth.jwksEndpoints` flag.
|
||||
When `auth.jwksEndpoints` is specified `vmageteway` will fetch public keys from the specified endpoint and use them for JWT signature verification.
|
||||
|
||||
Example usage for tokens issued by Azure Active Directory:
|
||||
```console
|
||||
/bin/vmgateway -eula \
|
||||
-enable.auth \
|
||||
-write.url=http://localhost:8480 \
|
||||
-read.url=http://localhost:8481 \
|
||||
-auth.jwksEndpoints=https://login.microsoftonline.com/common/discovery/v2.0/keys
|
||||
```
|
||||
|
||||
Example usage for tokens issued by Google:
|
||||
```console
|
||||
/bin/vmgateway -eula \
|
||||
-enable.auth \
|
||||
-write.url=http://localhost:8480 \
|
||||
-read.url=http://localhost:8481 \
|
||||
-auth.jwksEndpoints=https://www.googleapis.com/oauth2/v3/certs
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
The shortlist of configuration flags include the following:
|
||||
|
||||
```console
|
||||
-auth.httpHeader string
|
||||
HTTP header name to look for JWT authorization token (default "Authorization")
|
||||
-auth.jwksEndpoints array
|
||||
JWKS endpoints to fetch keys for JWT tokens signature verification
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-auth.oidcDiscoveryEndpoints array
|
||||
OpenID Connect discovery endpoints to fetch keys for JWT tokens signature verification
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-auth.publicKeyFiles array
|
||||
Path file with public key to verify JWT token signature
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-auth.publicKeys array
|
||||
Public keys to verify JWT token signature
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-clusterMode
|
||||
enable this for the cluster version
|
||||
-datasource.appendTypePrefix
|
||||
Whether to add type prefix to -datasource.url based on the query type. Set to true if sending different query types to the vmselect URL.
|
||||
-datasource.basicAuth.password string
|
||||
Optional basic auth password for -datasource.url
|
||||
-datasource.basicAuth.passwordFile string
|
||||
Optional path to basic auth password to use for -datasource.url
|
||||
-datasource.basicAuth.username string
|
||||
Optional basic auth username for -datasource.url
|
||||
-datasource.bearerToken string
|
||||
Optional bearer auth token to use for -datasource.url.
|
||||
-datasource.bearerTokenFile string
|
||||
Optional path to bearer token file to use for -datasource.url.
|
||||
-datasource.disableKeepAlive
|
||||
Whether to disable long-lived connections to the datasource. If true, disables HTTP keep-alives and will only use the connection to the server for a single HTTP request.
|
||||
-datasource.disableStepParam
|
||||
Whether to disable adding 'step' param to the issued instant queries. This might be useful when using vmalert with datasources that do not support 'step' param for instant queries, like Google Managed Prometheus. It is not recommended to enable this flag if you use vmalert with VictoriaMetrics.
|
||||
-datasource.headers string
|
||||
Optional HTTP extraHeaders to send with each request to the corresponding -datasource.url. For example, -datasource.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -datasource.url. Multiple headers must be delimited by '^^': -datasource.headers='header1:value1^^header2:value2'
|
||||
-datasource.lookback duration
|
||||
Lookback defines how far into the past to look when evaluating queries. For example, if the datasource.lookback=5m then param "time" with value now()-5m will be added to every query.
|
||||
-datasource.maxIdleConnections int
|
||||
Defines the number of idle (keep-alive connections) to each configured datasource. Consider setting this value equal to the value: groups_total * group.concurrency. Too low a value may result in a high number of sockets in TIME_WAIT state. (default 100)
|
||||
-datasource.oauth2.clientID string
|
||||
Optional OAuth2 clientID to use for -datasource.url.
|
||||
-datasource.oauth2.clientSecret string
|
||||
Optional OAuth2 clientSecret to use for -datasource.url.
|
||||
-datasource.oauth2.clientSecretFile string
|
||||
Optional OAuth2 clientSecretFile to use for -datasource.url.
|
||||
-datasource.oauth2.scopes string
|
||||
Optional OAuth2 scopes to use for -datasource.url. Scopes must be delimited by ';'
|
||||
-datasource.oauth2.tokenUrl string
|
||||
Optional OAuth2 tokenURL to use for -datasource.url.
|
||||
-datasource.queryStep duration
|
||||
How far a value can fallback to when evaluating queries. For example, if -datasource.queryStep=15s then param "step" with value "15s" will be added to every query. If set to 0, rule's evaluation interval will be used instead. (default 5m0s)
|
||||
-datasource.queryTimeAlignment
|
||||
Deprecated: please use "eval_alignment" in rule group instead. Whether to align "time" parameter with evaluation interval. Alignment supposed to produce deterministic results despite number of vmalert replicas or time they were started. See more details at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1257 (default true)
|
||||
-datasource.roundDigits int
|
||||
Adds "round_digits" GET param to datasource requests. In VM "round_digits" limits the number of digits after the decimal point in response values.
|
||||
-datasource.showURL
|
||||
Whether to avoid stripping sensitive information such as auth headers or passwords from URLs in log messages or UI and exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
|
||||
-datasource.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -datasource.url. By default, system CA is used
|
||||
-datasource.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -datasource.url
|
||||
-datasource.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -datasource.url
|
||||
-datasource.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -datasource.url
|
||||
-datasource.tlsServerName string
|
||||
Optional TLS server name to use for connections to -datasource.url. By default, the server name from -datasource.url is used
|
||||
-datasource.url string
|
||||
Datasource compatible with Prometheus HTTP API. It can be single node VictoriaMetrics or vmselect URL. Required parameter. E.g. http://127.0.0.1:8428 . See also -remoteRead.disablePathAppend and -datasource.showURL
|
||||
-enable.auth
|
||||
enables auth with jwt token
|
||||
-enable.rateLimit
|
||||
enables rate limiter
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address to listen for http connections. See also -httpListenAddr.useProxyProtocol (default ":8431")
|
||||
-httpListenAddr.useProxyProtocol
|
||||
Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-license string
|
||||
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-ratelimit.config string
|
||||
path for configuration file. Accepts url address
|
||||
-ratelimit.configCheckInterval duration
|
||||
interval for config file re-read. Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.
|
||||
-ratelimit.extraLabels array
|
||||
additional labels, that will be applied to fetchdata from datasource
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-ratelimit.refreshInterval duration
|
||||
(default 5s)
|
||||
-read.url string
|
||||
read access url address, example: http://vmselect:8481
|
||||
-remoteRead.disablePathAppend
|
||||
Whether to disable automatic appending of '/api/v1/query' path to the configured -datasource.url and -remoteRead.url
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
-write.url string
|
||||
write access url address, example: http://vminsert:8480
|
||||
```
|
||||
|
||||
## TroubleShooting
|
||||
|
||||
* Access control:
|
||||
* incorrect `jwt` format, try <https://jwt.io/#debugger-io> with our tokens
|
||||
* expired token, check `exp` field.
|
||||
* Rate Limiting:
|
||||
* `scrape_interval` at datasource, reduce it to apply limits faster.
|
||||
|
||||
## Limitations
|
||||
|
||||
* Access Control:
|
||||
* `jwt` token signature verification for `HMAC` algorithms is not supported.
|
||||
* RateLimiting:
|
||||
* limits applied based on queries to `datasource.url`
|
||||
* only cluster version can be rate-limited.
|
||||
vmgateway docs can be edited at [docs/vmgateway.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmgateway.md).
|
||||
|
|
Before Width: | Height: | Size: 48 KiB |
|
@ -19,7 +19,7 @@ var (
|
|||
//
|
||||
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
|
||||
func InsertHandler(r io.Reader) error {
|
||||
return stream.Parse(r, insertRows)
|
||||
return stream.Parse(r, false, insertRows)
|
||||
}
|
||||
|
||||
func insertRows(rows []parser.Row) error {
|
||||
|
|
|
@ -1,237 +1,3 @@
|
|||
# vmrestore
|
||||
See vmrestore docs [here](https://docs.victoriametrics.com/vmrestore.html).
|
||||
|
||||
`vmrestore` restores data from backups created by [vmbackup](https://docs.victoriametrics.com/vmbackup.html).
|
||||
|
||||
Restore process can be interrupted at any time. It is automatically resumed from the interruption point
|
||||
when restarting `vmrestore` with the same args.
|
||||
|
||||
## Usage
|
||||
|
||||
VictoriaMetrics must be stopped during the restore process.
|
||||
|
||||
Run the following command to restore backup from the given `-src` into the given `-storageDataPath`:
|
||||
|
||||
```console
|
||||
./vmrestore -src=<storageType>://<path/to/backup> -storageDataPath=<local/path/to/restore>
|
||||
```
|
||||
|
||||
* `<storageType>://<path/to/backup>` is the path to backup made with [vmbackup](https://docs.victoriametrics.com/vmbackup.html).
|
||||
`vmrestore` can restore backups from the following storage types:
|
||||
* [GCS](https://cloud.google.com/storage/). Example: `-src=gs://<bucket>/<path/to/backup>`
|
||||
* [S3](https://aws.amazon.com/s3/). Example: `-src=s3://<bucket>/<path/to/backup>`
|
||||
* [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs/). Example: `-src=azblob://<container>/<path/to/backup>`
|
||||
* Any S3-compatible storage such as [MinIO](https://github.com/minio/minio), [Ceph](https://docs.ceph.com/en/pacific/radosgw/s3/)
|
||||
or [Swift](https://platform.swiftstack.com/docs/admin/middleware/s3_middleware.html). See [these docs](#advanced-usage) for details.
|
||||
* Local filesystem. Example: `-src=fs://</absolute/path/to/backup>`. Note that `vmbackup` prevents from storing the backup
|
||||
into the directory pointed by `-storageDataPath` command-line flag, since this directory should be managed solely by VictoriaMetrics or `vmstorage`.
|
||||
* `<local/path/to/restore>` is the path to folder where data will be restored. This folder must be passed
|
||||
to VictoriaMetrics in `-storageDataPath` command-line flag after the restore process is complete.
|
||||
|
||||
The original `-storageDataPath` directory may contain old files. They will be substituted by the files from backup,
|
||||
i.e. the end result would be similar to [rsync --delete](https://askubuntu.com/questions/476041/how-do-i-make-rsync-delete-files-that-have-been-deleted-from-the-source-folder).
|
||||
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* If `vmrestore` eats all the network bandwidth, then set `-maxBytesPerSecond` to the desired value.
|
||||
* If `vmrestore` has been interrupted due to temporary error, then just restart it with the same args. It will resume the restore process.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
* Obtaining credentials from a file.
|
||||
|
||||
Add flag `-credsFilePath=/etc/credentials` with following content:
|
||||
|
||||
for s3 (aws, minio or other s3 compatible storages):
|
||||
|
||||
```console
|
||||
[default]
|
||||
aws_access_key_id=theaccesskey
|
||||
aws_secret_access_key=thesecretaccesskeyvalue
|
||||
```
|
||||
|
||||
for gce cloud storage:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "service_account",
|
||||
"project_id": "project-id",
|
||||
"private_key_id": "key-id",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nprivate-key\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "service-account-email",
|
||||
"client_id": "client-id",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://accounts.google.com/o/oauth2/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service-account-email"
|
||||
}
|
||||
```
|
||||
|
||||
* Usage with s3 custom url endpoint. It is possible to use `vmrestore` with s3 api compatible storages, like minio, cloudian and other.
|
||||
You have to add custom url endpoint with a flag:
|
||||
|
||||
```console
|
||||
# for minio:
|
||||
-customS3Endpoint=http://localhost:9000
|
||||
|
||||
# for aws gov region
|
||||
-customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com
|
||||
```
|
||||
|
||||
* Run `vmrestore -help` in order to see all the available options:
|
||||
|
||||
```console
|
||||
-concurrency int
|
||||
The number of concurrent workers. Higher concurrency may reduce restore duration (default 10)
|
||||
-configFilePath string
|
||||
Path to file with S3 configs. Configs are loaded from default location if not set.
|
||||
See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-configProfile string
|
||||
Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used
|
||||
-credsFilePath string
|
||||
Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set.
|
||||
See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
-customS3Endpoint string
|
||||
Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
-deleteAllObjectVersions
|
||||
Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-eula
|
||||
Deprecated, please use -license or -licenseFile flags instead. By specifying this flag, you confirm that you have an enterprise license and accept the ESA https://victoriametrics.com/legal/esa/ . This flag is available only in Enterprise binaries. See https://docs.victoriametrics.com/enterprise.html
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey string
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming http connections are closed after the configured timeout. This may help to spread the incoming load among a cluster of services behind a load balancer. Please note that the real timeout may be bigger by up to 10% as a protection against the thundering herd problem (default 2m0s)
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password string
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr string
|
||||
TCP address for exporting metrics at /metrics page (default ":8421")
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-license string
|
||||
Lisense key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed via file specified by -licenseFile command-line flag
|
||||
-license.forceOffline
|
||||
Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
|
||||
-licenseFile string
|
||||
Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-maxBytesPerSecond size
|
||||
The maximum download speed. There is no limit if it is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metricsAuthKey string
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pprofAuthKey string
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-s3ForcePathStyle
|
||||
Prefixing endpoint with bucket name when set false, true by default. (default true)
|
||||
-s3StorageClass string
|
||||
The Storage Class applied to objects uploaded to AWS S3. Supported values are: GLACIER, DEEP_ARCHIVE, GLACIER_IR, INTELLIGENT_TIERING, ONEZONE_IA, OUTPOSTS, REDUCED_REDUNDANCY, STANDARD, STANDARD_IA.
|
||||
See https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-class-intro.html
|
||||
-skipBackupCompleteCheck
|
||||
Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file
|
||||
-src string
|
||||
Source path with backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup
|
||||
-storageDataPath string
|
||||
Destination path where backup must be restored. VictoriaMetrics must be stopped when restoring from backup. -storageDataPath dir can be non-empty. In this case the contents of -storageDataPath dir is synchronized with -src contents, i.e. it works like 'rsync --delete' (default "victoria-metrics-data")
|
||||
-tls
|
||||
Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
|
||||
-tlsCertFile string
|
||||
Path to file with TLS certificate if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
-tlsKeyFile string
|
||||
Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated
|
||||
-tlsMinVersion string
|
||||
Optional minimum TLS version to use for incoming requests over HTTPS if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
|
||||
## How to build from sources
|
||||
|
||||
It is recommended using [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) - see `vmutils-*` archives there.
|
||||
|
||||
### Development build
|
||||
|
||||
1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.20.
|
||||
1. Run `make vmrestore` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmrestore` binary and puts it into the `bin` folder.
|
||||
|
||||
### Production build
|
||||
|
||||
1. [Install docker](https://docs.docker.com/install/).
|
||||
1. Run `make vmrestore-prod` from the root folder of [the repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
It builds `vmrestore-prod` binary and puts it into the `bin` folder.
|
||||
|
||||
### Building docker images
|
||||
|
||||
Run `make package-vmrestore`. It builds `victoriametrics/vmrestore:<PKG_TAG>` docker image locally.
|
||||
`<PKG_TAG>` is auto-generated image tag, which depends on source code in the repository.
|
||||
The `<PKG_TAG>` may be manually set via `PKG_TAG=foobar make package-vmrestore`.
|
||||
|
||||
The base docker image is [alpine](https://hub.docker.com/_/alpine) but it is possible to use any other base image
|
||||
by setting it via `<ROOT_IMAGE>` environment variable. For example, the following command builds the image on top of [scratch](https://hub.docker.com/_/scratch) image:
|
||||
|
||||
```console
|
||||
ROOT_IMAGE=scratch make package-vmrestore
|
||||
```
|
||||
vmrestore docs can be edited at [docs/vmrestore.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmrestore.md).
|
||||
|
|
|
@ -94,6 +94,23 @@ var vmuiFileServer = http.FileServer(http.FS(vmuiFiles))
|
|||
|
||||
// RequestHandler handles remote read API requests
|
||||
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
path := strings.Replace(r.URL.Path, "//", "/", -1)
|
||||
|
||||
// Strip /prometheus and /graphite prefixes in order to provide path compatibility with cluster version
|
||||
//
|
||||
// See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
|
||||
switch {
|
||||
case strings.HasPrefix(path, "/prometheus/"):
|
||||
path = path[len("/prometheus"):]
|
||||
case strings.HasPrefix(path, "/graphite/"):
|
||||
path = path[len("/graphite"):]
|
||||
}
|
||||
|
||||
if handleStaticAndSimpleRequests(w, r, path) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Handle non-trivial dynamic requests, which may take big amounts of time and resources.
|
||||
startTime := time.Now()
|
||||
defer requestDuration.UpdateDuration(startTime)
|
||||
tracerEnabled := httputils.GetBool(r, "trace")
|
||||
|
@ -152,7 +169,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||
}()
|
||||
}
|
||||
|
||||
path := strings.Replace(r.URL.Path, "//", "/", -1)
|
||||
if path == "/internal/resetRollupResultCache" {
|
||||
if !httpserver.CheckAuthFlag(w, r, *resetCacheAuthKey, "resetCacheAuthKey") {
|
||||
return true
|
||||
|
@ -161,50 +177,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// Strip /prometheus and /graphite prefixes in order to provide path compatibility with cluster version
|
||||
//
|
||||
// See https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format
|
||||
switch {
|
||||
case strings.HasPrefix(path, "/prometheus/"):
|
||||
path = path[len("/prometheus"):]
|
||||
case strings.HasPrefix(path, "/graphite/"):
|
||||
path = path[len("/graphite"):]
|
||||
}
|
||||
|
||||
// vmui access.
|
||||
if path == "/vmui" || path == "/graph" {
|
||||
// VMUI access via incomplete url without `/` in the end. Redirect to complete url.
|
||||
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
|
||||
// is hidden behind vmauth or similar proxy.
|
||||
_ = r.ParseForm()
|
||||
path = strings.TrimPrefix(path, "/")
|
||||
newURL := path + "/?" + r.Form.Encode()
|
||||
httpserver.Redirect(w, newURL)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/graph/") {
|
||||
// This is needed for serving /graph URLs from Prometheus datasource in Grafana.
|
||||
path = strings.Replace(path, "/graph/", "/vmui/", 1)
|
||||
}
|
||||
if path == "/vmui/custom-dashboards" {
|
||||
if err := handleVMUICustomDashboards(w); err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/vmui/") {
|
||||
if strings.HasPrefix(path, "/vmui/static/") {
|
||||
// Allow clients caching static contents for long period of time, since it shouldn't change over time.
|
||||
// Path to static contents (such as js and css) must be changed whenever its contents is changed.
|
||||
// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
|
||||
w.Header().Set("Cache-Control", "max-age=31536000")
|
||||
}
|
||||
r.URL.Path = path
|
||||
vmuiFileServer.ServeHTTP(w, r)
|
||||
return true
|
||||
}
|
||||
|
||||
if strings.HasPrefix(path, "/api/v1/label/") {
|
||||
s := path[len("/api/v1/label/"):]
|
||||
if strings.HasSuffix(s, "/values") {
|
||||
|
@ -229,45 +201,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||
}
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/functions") {
|
||||
funcName := path[len("/functions"):]
|
||||
funcName = strings.TrimPrefix(funcName, "/")
|
||||
if funcName == "" {
|
||||
graphiteFunctionsRequests.Inc()
|
||||
if err := graphite.FunctionsHandler(w, r); err != nil {
|
||||
graphiteFunctionsErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
graphiteFunctionDetailsRequests.Inc()
|
||||
if err := graphite.FunctionDetailsHandler(funcName, w, r); err != nil {
|
||||
graphiteFunctionDetailsErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
if path == "/vmalert" {
|
||||
// vmalert access via incomplete url without `/` in the end. Redirect to complete url.
|
||||
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
|
||||
// is hidden behind vmauth or similar proxy.
|
||||
httpserver.Redirect(w, "vmalert/")
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/vmalert/") {
|
||||
vmalertRequests.Inc()
|
||||
if len(*vmalertProxyURL) == 0 {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprintf(w, "%s", `{"status":"error","msg":"for accessing vmalert flag '-vmalert.proxyURL' must be configured"}`)
|
||||
return true
|
||||
}
|
||||
proxyVMAlertRequests(w, r)
|
||||
return true
|
||||
}
|
||||
|
||||
switch path {
|
||||
case "/api/v1/query":
|
||||
|
@ -324,20 +257,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||
return true
|
||||
}
|
||||
return true
|
||||
case "/api/v1/status/active_queries":
|
||||
statusActiveQueriesRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
promql.ActiveQueriesHandler(w, r)
|
||||
return true
|
||||
case "/api/v1/status/top_queries":
|
||||
topQueriesRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
if err := prometheus.QueryStatsHandler(startTime, w, r); err != nil {
|
||||
topQueriesErrors.Inc()
|
||||
sendPrometheusError(w, r, fmt.Errorf("cannot query status endpoint: %w", err))
|
||||
return true
|
||||
}
|
||||
return true
|
||||
case "/api/v1/export":
|
||||
exportRequests.Inc()
|
||||
if err := prometheus.ExportHandler(startTime, w, r); err != nil {
|
||||
|
@ -466,6 +385,113 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||
return true
|
||||
}
|
||||
return true
|
||||
case "/api/v1/admin/tsdb/delete_series":
|
||||
if !httpserver.CheckAuthFlag(w, r, *deleteAuthKey, "deleteAuthKey") {
|
||||
return true
|
||||
}
|
||||
deleteRequests.Inc()
|
||||
if err := prometheus.DeleteHandler(startTime, r); err != nil {
|
||||
deleteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path string) bool {
|
||||
// vmui access.
|
||||
if path == "/vmui" || path == "/graph" {
|
||||
// VMUI access via incomplete url without `/` in the end. Redirect to complete url.
|
||||
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
|
||||
// is hidden behind vmauth or similar proxy.
|
||||
_ = r.ParseForm()
|
||||
path = strings.TrimPrefix(path, "/")
|
||||
newURL := path + "/?" + r.Form.Encode()
|
||||
httpserver.Redirect(w, newURL)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/graph/") {
|
||||
// This is needed for serving /graph URLs from Prometheus datasource in Grafana.
|
||||
path = strings.Replace(path, "/graph/", "/vmui/", 1)
|
||||
}
|
||||
if path == "/vmui/custom-dashboards" {
|
||||
if err := handleVMUICustomDashboards(w); err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/vmui/") {
|
||||
if strings.HasPrefix(path, "/vmui/static/") {
|
||||
// Allow clients caching static contents for long period of time, since it shouldn't change over time.
|
||||
// Path to static contents (such as js and css) must be changed whenever its contents is changed.
|
||||
// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
|
||||
w.Header().Set("Cache-Control", "max-age=31536000")
|
||||
}
|
||||
r.URL.Path = path
|
||||
vmuiFileServer.ServeHTTP(w, r)
|
||||
return true
|
||||
}
|
||||
|
||||
if strings.HasPrefix(path, "/functions") {
|
||||
funcName := path[len("/functions"):]
|
||||
funcName = strings.TrimPrefix(funcName, "/")
|
||||
if funcName == "" {
|
||||
graphiteFunctionsRequests.Inc()
|
||||
if err := graphite.FunctionsHandler(w, r); err != nil {
|
||||
graphiteFunctionsErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
graphiteFunctionDetailsRequests.Inc()
|
||||
if err := graphite.FunctionDetailsHandler(funcName, w, r); err != nil {
|
||||
graphiteFunctionDetailsErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
if path == "/vmalert" {
|
||||
// vmalert access via incomplete url without `/` in the end. Redirect to complete url.
|
||||
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
|
||||
// is hidden behind vmauth or similar proxy.
|
||||
httpserver.Redirect(w, "vmalert/")
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(path, "/vmalert/") {
|
||||
vmalertRequests.Inc()
|
||||
if len(*vmalertProxyURL) == 0 {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprintf(w, "%s", `{"status":"error","msg":"for accessing vmalert flag '-vmalert.proxyURL' must be configured"}`)
|
||||
return true
|
||||
}
|
||||
proxyVMAlertRequests(w, r)
|
||||
return true
|
||||
}
|
||||
|
||||
switch path {
|
||||
case "/api/v1/status/active_queries":
|
||||
statusActiveQueriesRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
promql.ActiveQueriesHandler(w, r)
|
||||
return true
|
||||
case "/api/v1/status/top_queries":
|
||||
topQueriesRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
if err := prometheus.QueryStatsHandler(w, r); err != nil {
|
||||
topQueriesErrors.Inc()
|
||||
sendPrometheusError(w, r, fmt.Errorf("cannot query status endpoint: %w", err))
|
||||
return true
|
||||
}
|
||||
return true
|
||||
case "/metric-relabel-debug":
|
||||
promscrapeMetricRelabelDebugRequests.Inc()
|
||||
promscrape.WriteMetricRelabelDebug(w, r)
|
||||
|
@ -511,7 +537,10 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||
case "/api/v1/status/buildinfo":
|
||||
buildInfoRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprintf(w, "%s", `{"status":"success","data":{}}`)
|
||||
// prometheus version is used here, which affects what API Grafana uses when retrieving label values.
|
||||
// as new Grafana features are added that are customized for the Prometheus version, maybe the version will need to be increased.
|
||||
// see this issue for more info: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5370
|
||||
fmt.Fprintf(w, "%s", `{"status":"success","data":{"version":"2.24.0"}}`)
|
||||
return true
|
||||
case "/api/v1/query_exemplars":
|
||||
// Return dumb placeholder for https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars
|
||||
|
@ -519,18 +548,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprintf(w, "%s", `{"status":"success","data":[]}`)
|
||||
return true
|
||||
case "/api/v1/admin/tsdb/delete_series":
|
||||
if !httpserver.CheckAuthFlag(w, r, *deleteAuthKey, "deleteAuthKey") {
|
||||
return true
|
||||
}
|
||||
deleteRequests.Inc()
|
||||
if err := prometheus.DeleteHandler(startTime, r); err != nil {
|
||||
deleteErrors.Inc()
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -149,11 +149,11 @@
|
|||
{% if len(mn.Tags) > 0 %}
|
||||
{
|
||||
{% code tags := mn.Tags %}
|
||||
{%z= tags[0].Key %}={%qz= tags[0].Value %}
|
||||
{%z= tags[0].Key %}={%= escapePrometheusLabel(tags[0].Value) %}
|
||||
{% code tags = tags[1:] %}
|
||||
{% for i := range tags %}
|
||||
{% code tag := &tags[i] %}
|
||||
,{%z= tag.Key %}={%qz= tag.Value %}
|
||||
,{%z= tag.Key %}={%= escapePrometheusLabel(tag.Value) %}
|
||||
{% endfor %}
|
||||
}
|
||||
{% endif %}
|
||||
|
@ -173,4 +173,26 @@
|
|||
{% endif %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func escapePrometheusLabel(b []byte) %}
|
||||
"
|
||||
{% for len(b) > 0 %}
|
||||
{% code n := bytes.IndexAny(b, "\\\n\"") %}
|
||||
{% if n < 0 %}
|
||||
{%z= b %}
|
||||
{% break %}
|
||||
{% endif %}
|
||||
{%z= b[:n] %}
|
||||
{% switch b[n] %}
|
||||
{% case '\\' %}
|
||||
\\
|
||||
{% case '\n' %}
|
||||
\n
|
||||
{% case '"' %}
|
||||
\"
|
||||
{% endswitch %}
|
||||
{% code b = b[n+1:] %}
|
||||
{% endfor %}
|
||||
"
|
||||
{% endfunc %}
|
||||
|
||||
{% endstripspace %}
|
||||
|
|
|
@ -495,7 +495,7 @@ func streamprometheusMetricName(qw422016 *qt422016.Writer, mn *storage.MetricNam
|
|||
//line app/vmselect/prometheus/export.qtpl:152
|
||||
qw422016.N().S(`=`)
|
||||
//line app/vmselect/prometheus/export.qtpl:152
|
||||
qw422016.N().QZ(tags[0].Value)
|
||||
streamescapePrometheusLabel(qw422016, tags[0].Value)
|
||||
//line app/vmselect/prometheus/export.qtpl:153
|
||||
tags = tags[1:]
|
||||
|
||||
|
@ -511,7 +511,7 @@ func streamprometheusMetricName(qw422016 *qt422016.Writer, mn *storage.MetricNam
|
|||
//line app/vmselect/prometheus/export.qtpl:156
|
||||
qw422016.N().S(`=`)
|
||||
//line app/vmselect/prometheus/export.qtpl:156
|
||||
qw422016.N().QZ(tag.Value)
|
||||
streamescapePrometheusLabel(qw422016, tag.Value)
|
||||
//line app/vmselect/prometheus/export.qtpl:157
|
||||
}
|
||||
//line app/vmselect/prometheus/export.qtpl:157
|
||||
|
@ -599,3 +599,74 @@ func convertValueToSpecialJSON(v float64) string {
|
|||
return qs422016
|
||||
//line app/vmselect/prometheus/export.qtpl:174
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/export.qtpl:176
|
||||
func streamescapePrometheusLabel(qw422016 *qt422016.Writer, b []byte) {
|
||||
//line app/vmselect/prometheus/export.qtpl:176
|
||||
qw422016.N().S(`"`)
|
||||
//line app/vmselect/prometheus/export.qtpl:178
|
||||
for len(b) > 0 {
|
||||
//line app/vmselect/prometheus/export.qtpl:179
|
||||
n := bytes.IndexAny(b, "\\\n\"")
|
||||
|
||||
//line app/vmselect/prometheus/export.qtpl:180
|
||||
if n < 0 {
|
||||
//line app/vmselect/prometheus/export.qtpl:181
|
||||
qw422016.N().Z(b)
|
||||
//line app/vmselect/prometheus/export.qtpl:182
|
||||
break
|
||||
//line app/vmselect/prometheus/export.qtpl:183
|
||||
}
|
||||
//line app/vmselect/prometheus/export.qtpl:184
|
||||
qw422016.N().Z(b[:n])
|
||||
//line app/vmselect/prometheus/export.qtpl:185
|
||||
switch b[n] {
|
||||
//line app/vmselect/prometheus/export.qtpl:186
|
||||
case '\\':
|
||||
//line app/vmselect/prometheus/export.qtpl:186
|
||||
qw422016.N().S(`\\`)
|
||||
//line app/vmselect/prometheus/export.qtpl:188
|
||||
case '\n':
|
||||
//line app/vmselect/prometheus/export.qtpl:188
|
||||
qw422016.N().S(`\n`)
|
||||
//line app/vmselect/prometheus/export.qtpl:190
|
||||
case '"':
|
||||
//line app/vmselect/prometheus/export.qtpl:190
|
||||
qw422016.N().S(`\"`)
|
||||
//line app/vmselect/prometheus/export.qtpl:192
|
||||
}
|
||||
//line app/vmselect/prometheus/export.qtpl:193
|
||||
b = b[n+1:]
|
||||
|
||||
//line app/vmselect/prometheus/export.qtpl:194
|
||||
}
|
||||
//line app/vmselect/prometheus/export.qtpl:194
|
||||
qw422016.N().S(`"`)
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
func writeescapePrometheusLabel(qq422016 qtio422016.Writer, b []byte) {
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
streamescapePrometheusLabel(qw422016, b)
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
func escapePrometheusLabel(b []byte) string {
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
writeescapePrometheusLabel(qb422016, b)
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/export.qtpl:196
|
||||
}
|
||||
|
|
43
app/vmselect/prometheus/federate_test.go
Normal file
|
@ -0,0 +1,43 @@
|
|||
package prometheus
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
)
|
||||
|
||||
func TestFederate(t *testing.T) {
|
||||
f := func(rs *netstorage.Result, expectedResult string) {
|
||||
t.Helper()
|
||||
result := Federate(rs)
|
||||
if result != expectedResult {
|
||||
t.Fatalf("unexpected result; got\n%s\nwant\n%s", result, expectedResult)
|
||||
}
|
||||
}
|
||||
|
||||
f(&netstorage.Result{}, ``)
|
||||
|
||||
f(&netstorage.Result{
|
||||
MetricName: storage.MetricName{
|
||||
MetricGroup: []byte("foo"),
|
||||
Tags: []storage.Tag{
|
||||
{
|
||||
Key: []byte("a"),
|
||||
Value: []byte("b"),
|
||||
},
|
||||
{
|
||||
Key: []byte("qqq"),
|
||||
Value: []byte("\\"),
|
||||
},
|
||||
{
|
||||
Key: []byte("abc"),
|
||||
// Verify that < isn't encoded. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5431
|
||||
Value: []byte("a<b\"\\c"),
|
||||
},
|
||||
},
|
||||
},
|
||||
Values: []float64{1.23},
|
||||
Timestamps: []int64{123},
|
||||
}, `foo{a="b",qqq="\\",abc="a<b\"\\c"} 1.23 123`+"\n")
|
||||
}
|
38
app/vmselect/prometheus/federate_timing_test.go
Normal file
|
@ -0,0 +1,38 @@
|
|||
package prometheus
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
)
|
||||
|
||||
func BenchmarkFederate(b *testing.B) {
|
||||
rs := &netstorage.Result{
|
||||
MetricName: storage.MetricName{
|
||||
MetricGroup: []byte("foo_bar_bazaaaa_total"),
|
||||
Tags: []storage.Tag{
|
||||
{
|
||||
Key: []byte("instance"),
|
||||
Value: []byte("foobarbaz:2344"),
|
||||
},
|
||||
{
|
||||
Key: []byte("job"),
|
||||
Value: []byte("aaabbbccc"),
|
||||
},
|
||||
},
|
||||
},
|
||||
Values: []float64{112.23},
|
||||
Timestamps: []int64{1234567890},
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var bb bytes.Buffer
|
||||
for pb.Next() {
|
||||
bb.Reset()
|
||||
WriteFederate(&bb, rs)
|
||||
}
|
||||
})
|
||||
}
|
|
@ -54,7 +54,8 @@ var (
|
|||
maxSeriesLimit = flag.Int("search.maxSeries", 30e3, "The maximum number of time series, which can be returned from /api/v1/series. This option allows limiting memory usage")
|
||||
maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 30e3, "The maximum points per a single timeseries returned from /api/v1/query_range. "+
|
||||
"This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points "+
|
||||
"returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph")
|
||||
"returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph. "+
|
||||
"See also -search.maxResponseSeries")
|
||||
)
|
||||
|
||||
// Default step used if not set.
|
||||
|
@ -1063,9 +1064,7 @@ func getLatencyOffsetMilliseconds(r *http.Request) (int64, error) {
|
|||
}
|
||||
|
||||
// QueryStatsHandler returns query stats at `/api/v1/status/top_queries`
|
||||
func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
defer queryStatsDuration.UpdateDuration(startTime)
|
||||
|
||||
func QueryStatsHandler(w http.ResponseWriter, r *http.Request) error {
|
||||
topN := 20
|
||||
topNStr := r.FormValue("topN")
|
||||
if len(topNStr) > 0 {
|
||||
|
@ -1090,8 +1089,6 @@ func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
|
|||
return nil
|
||||
}
|
||||
|
||||
var queryStatsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/status/top_queries"}`)
|
||||
|
||||
// commonParams contains common parameters for all /api/v1/* handlers
|
||||
//
|
||||
// timeout, start, end, match[], extra_label, extra_filters[]
|
||||
|
|
|
@ -929,7 +929,7 @@ func evalRollupFuncWithSubquery(qt *querytracer.Tracer, ec *EvalConfig, funcName
|
|||
}
|
||||
|
||||
ecSQ := copyEvalConfig(ec)
|
||||
ecSQ.Start -= window + maxSilenceInterval + step
|
||||
ecSQ.Start -= window + step + maxSilenceInterval()
|
||||
ecSQ.End += step
|
||||
ecSQ.Step = step
|
||||
ecSQ.MaxPointsPerSeries = *maxPointsSubqueryPerTimeseries
|
||||
|
@ -1672,12 +1672,12 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// Fetch the remaining part of the result.
|
||||
// Fetch the result.
|
||||
tfss := searchutils.ToTagFilterss(me.LabelFilterss)
|
||||
tfss = searchutils.JoinTagFilterss(tfss, ec.EnforcedTagFilterss)
|
||||
minTimestamp := ec.Start
|
||||
if needSilenceIntervalForRollupFunc(funcName) {
|
||||
minTimestamp -= maxSilenceInterval
|
||||
minTimestamp -= maxSilenceInterval()
|
||||
}
|
||||
if window > ec.Step {
|
||||
minTimestamp -= window
|
||||
|
@ -1772,10 +1772,22 @@ func getRollupMemoryLimiter() *memoryLimiter {
|
|||
return &rollupMemoryLimiter
|
||||
}
|
||||
|
||||
func maxSilenceInterval() int64 {
|
||||
d := minStalenessInterval.Milliseconds()
|
||||
if d <= 0 {
|
||||
d = 5 * 60 * 1000
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func needSilenceIntervalForRollupFunc(funcName string) bool {
|
||||
// All rollup the functions, which do not rely on the previous sample
|
||||
// before the lookbehind window (aka prevValue), do not need silence interval.
|
||||
// All the rollup functions, which do not rely on the previous sample
|
||||
// before the lookbehind window (aka prevValue and realPrevValue), do not need silence interval.
|
||||
switch strings.ToLower(funcName) {
|
||||
case "default_rollup":
|
||||
// The default_rollup implicitly relies on the previous samples in order to fill gaps.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5388
|
||||
return true
|
||||
case
|
||||
"absent_over_time",
|
||||
"avg_over_time",
|
||||
|
@ -1784,7 +1796,6 @@ func needSilenceIntervalForRollupFunc(funcName string) bool {
|
|||
"count_le_over_time",
|
||||
"count_ne_over_time",
|
||||
"count_over_time",
|
||||
"default_rollup",
|
||||
"first_over_time",
|
||||
"histogram_over_time",
|
||||
"hoeffding_bound_lower",
|
||||
|
|
|
@ -20,6 +20,8 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
maxResponseSeries = flag.Int("search.maxResponseSeries", 0, "The maximum number of time series which can be returned from /api/v1/query and /api/v1/query_range . "+
|
||||
"The limit is disabled if it equals to 0. See also -search.maxPointsPerTimeseries and -search.maxUniqueTimeseries")
|
||||
treatDotsAsIsInRegexps = flag.Bool("search.treatDotsAsIsInRegexps", false, "Whether to treat dots as is in regexp label filters used in queries. "+
|
||||
`For example, foo{bar=~"a.b.c"} will be automatically converted to foo{bar=~"a\\.b\\.c"}, i.e. all the dots in regexp filters will be automatically escaped `+
|
||||
`in order to match only dot char instead of matching any char. Dots in ".+", ".*" and ".{n}" regexps aren't escaped. `+
|
||||
|
@ -77,6 +79,10 @@ func Exec(qt *querytracer.Tracer, ec *EvalConfig, q string, isFirstPointOnly boo
|
|||
}
|
||||
maySort := maySortResults(e)
|
||||
result, err := timeseriesToResult(rv, maySort)
|
||||
if *maxResponseSeries > 0 && len(result) > *maxResponseSeries {
|
||||
return nil, fmt.Errorf("the response contains more than -search.maxResponseSeries=%d time series: %d series; either increase -search.maxResponseSeries "+
|
||||
"or change the query in order to return smaller number of series", *maxResponseSeries, len(result))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -853,6 +853,17 @@ func TestExecSuccess(t *testing.T) {
|
|||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run("day_of_year()", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `day_of_year(time()*1e4)`
|
||||
r := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{116, 139, 163, 186, 209, 232},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
resultExpected := []netstorage.Result{r}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run("days_in_month()", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `days_in_month(time()*2e4)`
|
||||
|
|
|
@ -561,9 +561,6 @@ var (
|
|||
inf = math.Inf(1)
|
||||
)
|
||||
|
||||
// The maximum interval without previous rows.
|
||||
const maxSilenceInterval = 5 * 60 * 1000
|
||||
|
||||
type timeseriesMap struct {
|
||||
origin *timeseries
|
||||
h metrics.Histogram
|
||||
|
@ -620,7 +617,7 @@ func (tsm *timeseriesMap) GetOrCreateTimeseries(labelName, labelValue string) *t
|
|||
//
|
||||
// rc.Timestamps are used as timestamps for dstValues.
|
||||
//
|
||||
// timestamps must cover time range [rc.Start - rc.Window - maxSilenceInterval ... rc.End].
|
||||
// It is expected that timestamps cover the time range [rc.Start - rc.Window ... rc.End].
|
||||
//
|
||||
// Do cannot be called from concurrent goroutines.
|
||||
func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []int64) ([]float64, uint64) {
|
||||
|
|
|
@ -42,6 +42,7 @@ var transformFuncs = map[string]transformFunc{
|
|||
"cosh": newTransformFuncOneArg(transformCosh),
|
||||
"day_of_month": newTransformFuncDateTime(transformDayOfMonth),
|
||||
"day_of_week": newTransformFuncDateTime(transformDayOfWeek),
|
||||
"day_of_year": newTransformFuncDateTime(transformDayOfYear),
|
||||
"days_in_month": newTransformFuncDateTime(transformDaysInMonth),
|
||||
"deg": newTransformFuncOneArg(transformDeg),
|
||||
"drop_common_labels": transformDropCommonLabels,
|
||||
|
@ -353,6 +354,10 @@ func newTransformFuncDateTime(f func(t time.Time) int) transformFunc {
|
|||
}
|
||||
}
|
||||
|
||||
func transformDayOfYear(t time.Time) int {
|
||||
return t.YearDay()
|
||||
}
|
||||
|
||||
func transformDayOfMonth(t time.Time) int {
|
||||
return t.Day()
|
||||
}
|
||||
|
@ -2743,14 +2748,15 @@ func copyTimeseriesMetricNames(tss []*timeseries, makeCopy bool) []*timeseries {
|
|||
return rvs
|
||||
}
|
||||
|
||||
// copyTimeseriesShallow returns a copy of arg with shallow copies of MetricNames,
|
||||
// Timestamps and Values.
|
||||
func copyTimeseriesShallow(arg []*timeseries) []*timeseries {
|
||||
rvs := make([]*timeseries, len(arg))
|
||||
for i, src := range arg {
|
||||
var dst timeseries
|
||||
dst.CopyShallow(src)
|
||||
rvs[i] = &dst
|
||||
// copyTimeseriesShallow returns a copy of src with shallow copies of MetricNames, Timestamps and Values.
|
||||
func copyTimeseriesShallow(src []*timeseries) []*timeseries {
|
||||
tss := make([]timeseries, len(src))
|
||||
for i, src := range src {
|
||||
tss[i].CopyShallow(src)
|
||||
}
|
||||
rvs := make([]*timeseries, len(tss))
|
||||
for i := range tss {
|
||||
rvs[i] = &tss[i]
|
||||
}
|
||||
return rvs
|
||||
}
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
{
|
||||
"files": {
|
||||
"main.css": "./static/css/main.349e6522.css",
|
||||
"main.js": "./static/js/main.c93073e5.js",
|
||||
"main.css": "./static/css/main.fb353c1e.css",
|
||||
"main.js": "./static/js/main.5bcddddc.js",
|
||||
"static/js/522.da77e7b3.chunk.js": "./static/js/522.da77e7b3.chunk.js",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.8644fd7c964802dd34a9.md",
|
||||
"static/media/MetricsQL.md": "./static/media/MetricsQL.b64c4dbf91f4fa581621.md",
|
||||
"index.html": "./index.html"
|
||||
},
|
||||
"entrypoints": [
|
||||
"static/css/main.349e6522.css",
|
||||
"static/js/main.c93073e5.js"
|
||||
"static/css/main.fb353c1e.css",
|
||||
"static/js/main.5bcddddc.js"
|
||||
]
|
||||
}
|
|
@ -1 +1 @@
|
|||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.c93073e5.js"></script><link href="./static/css/main.349e6522.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
||||
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.ico"/><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5"/><meta name="theme-color" content="#000000"/><meta name="description" content="UI for VictoriaMetrics"/><link rel="apple-touch-icon" href="./apple-touch-icon.png"/><link rel="icon" type="image/png" sizes="32x32" href="./favicon-32x32.png"><link rel="manifest" href="./manifest.json"/><title>VM UI</title><script src="./dashboards/index.js" type="module"></script><meta name="twitter:card" content="summary_large_image"><meta name="twitter:image" content="./preview.jpg"><meta name="twitter:title" content="UI for VictoriaMetrics"><meta name="twitter:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta name="twitter:site" content="@VictoriaMetrics"><meta property="og:title" content="Metric explorer for VictoriaMetrics"><meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data"><meta property="og:image" content="./preview.jpg"><meta property="og:type" content="website"><script defer="defer" src="./static/js/main.5bcddddc.js"></script><link href="./static/css/main.fb353c1e.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
1
app/vmselect/vmui/static/css/main.fb353c1e.css
Normal file
2
app/vmselect/vmui/static/js/main.5bcddddc.js
Normal file
|
@ -1029,7 +1029,7 @@ for every point of every time series returned by `q`.
|
|||
|
||||
Metric names are stripped from the resulting series. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
|
||||
|
||||
This function is supported by PromQL. This function is supported by PromQL. See also [acosh](#acosh).
|
||||
This function is supported by PromQL. See also [acosh](#acosh).
|
||||
|
||||
#### day_of_month
|
||||
|
||||
|
@ -1049,6 +1049,15 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
|
|||
|
||||
This function is supported by PromQL.
|
||||
|
||||
#### day_of_year
|
||||
|
||||
`day_of_year(q)` is a [transform function](#transform-functions), which returns the day of year for every point of every time series returned by `q`.
|
||||
It is expected that `q` returns unix timestamps. The returned values are in the range `[1...365]` for non-leap years, and `[1 to 366]` in leap years.
|
||||
|
||||
Metric names are stripped from the resulting series. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
|
||||
|
||||
This function is supported by PromQL.
|
||||
|
||||
#### days_in_month
|
||||
|
||||
`days_in_month(q)` is a [transform function](#transform-functions), which returns the number of days in the month identified
|
|
@ -1,4 +1,4 @@
|
|||
FROM golang:1.21.4 as build-web-stage
|
||||
FROM golang:1.21.5 as build-web-stage
|
||||
COPY build /build
|
||||
|
||||
WORKDIR /build
|
||||
|
@ -6,7 +6,7 @@ COPY web/ /build/
|
|||
RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o web-amd64 github.com/VictoriMetrics/vmui/ && \
|
||||
GOOS=windows GOARCH=amd64 CGO_ENABLED=0 go build -o web-windows github.com/VictoriMetrics/vmui/
|
||||
|
||||
FROM alpine:3.18.2
|
||||
FROM alpine:3.19.0
|
||||
USER root
|
||||
|
||||
COPY --from=build-web-stage /build/web-amd64 /app/web
|
||||
|
|
|
@ -1029,7 +1029,7 @@ for every point of every time series returned by `q`.
|
|||
|
||||
Metric names are stripped from the resulting series. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
|
||||
|
||||
This function is supported by PromQL. This function is supported by PromQL. See also [acosh](#acosh).
|
||||
This function is supported by PromQL. See also [acosh](#acosh).
|
||||
|
||||
#### day_of_month
|
||||
|
||||
|
@ -1049,6 +1049,15 @@ Metric names are stripped from the resulting series. Add [keep_metric_names](#ke
|
|||
|
||||
This function is supported by PromQL.
|
||||
|
||||
#### day_of_year
|
||||
|
||||
`day_of_year(q)` is a [transform function](#transform-functions), which returns the day of year for every point of every time series returned by `q`.
|
||||
It is expected that `q` returns unix timestamps. The returned values are in the range `[1...365]` for non-leap years, and `[1 to 366]` in leap years.
|
||||
|
||||
Metric names are stripped from the resulting series. Add [keep_metric_names](#keep_metric_names) modifier in order to keep metric names.
|
||||
|
||||
This function is supported by PromQL.
|
||||
|
||||
#### days_in_month
|
||||
|
||||
`days_in_month(q)` is a [transform function](#transform-functions), which returns the number of days in the month identified
|
||||
|
|
|
@ -11,7 +11,7 @@ import classNames from "classnames";
|
|||
import useBoolean from "../../../hooks/useBoolean";
|
||||
import useEventListener from "../../../hooks/useEventListener";
|
||||
import Tooltip from "../../Main/Tooltip/Tooltip";
|
||||
import { AUTOCOMPLETE_KEY } from "../../Main/ShortcutKeys/constants/keyList";
|
||||
import { AUTOCOMPLETE_QUICK_KEY } from "../../Main/ShortcutKeys/constants/keyList";
|
||||
|
||||
const AdditionalSettingsControls: FC<{isMobile?: boolean}> = ({ isMobile }) => {
|
||||
const { autocomplete } = useQueryState();
|
||||
|
@ -32,11 +32,16 @@ const AdditionalSettingsControls: FC<{isMobile?: boolean}> = ({ isMobile }) => {
|
|||
queryDispatch({ type: "TOGGLE_AUTOCOMPLETE" });
|
||||
};
|
||||
|
||||
const onChangeQuickAutocomplete = () => {
|
||||
queryDispatch({ type: "SET_AUTOCOMPLETE_QUICK", payload: true });
|
||||
};
|
||||
|
||||
const handleKeyDown = (e: KeyboardEvent) => {
|
||||
const { code, altKey } = e;
|
||||
if (code === "KeyA" && altKey) {
|
||||
/** @see AUTOCOMPLETE_QUICK_KEY */
|
||||
const { code, ctrlKey, altKey } = e;
|
||||
if (code === "Space" && (ctrlKey || altKey)) {
|
||||
e.preventDefault();
|
||||
onChangeAutocomplete();
|
||||
onChangeQuickAutocomplete();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -49,7 +54,7 @@ const AdditionalSettingsControls: FC<{isMobile?: boolean}> = ({ isMobile }) => {
|
|||
"vm-additional-settings_mobile": isMobile
|
||||
})}
|
||||
>
|
||||
<Tooltip title={AUTOCOMPLETE_KEY}>
|
||||
<Tooltip title={<>Quick tip: {AUTOCOMPLETE_QUICK_KEY}</>}>
|
||||
<Switch
|
||||
label={"Autocomplete"}
|
||||
value={autocomplete}
|
||||
|
|