diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1ff416a59..c8a460b06 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,7 +1,13 @@ name: main on: - - push - - pull_request + push: + paths-ignore: + - 'docs/**' + - '**.md' + pull_request: + paths-ignore: + - 'docs/**' + - '**.md' jobs: build: name: Build @@ -24,19 +30,19 @@ jobs: env: GO111MODULE: on run: | - export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14 - make check-all - git diff --exit-code - make test-full - make test-pure - make test-full-386 - make victoria-metrics - make victoria-metrics-pure - make victoria-metrics-arm - make victoria-metrics-arm64 - make vmutils - GOOS=freebsd go build -mod=vendor ./app/victoria-metrics - GOOS=darwin go build -mod=vendor ./app/victoria-metrics + export PATH=$PATH:$(go env GOPATH)/bin # temporary fix. See https://github.com/actions/setup-go/issues/14 + make check-all + git diff --exit-code + make test-full + make test-pure + make test-full-386 + make victoria-metrics + make victoria-metrics-pure + make victoria-metrics-arm + make victoria-metrics-arm64 + make vmutils + GOOS=freebsd go build -mod=vendor ./app/victoria-metrics + GOOS=darwin go build -mod=vendor ./app/victoria-metrics - name: Publish coverage uses: codecov/codecov-action@v1.0.4 with: diff --git a/.github/workflows/wiki.yml b/.github/workflows/wiki.yml new file mode 100644 index 000000000..fb89c103c --- /dev/null +++ b/.github/workflows/wiki.yml @@ -0,0 +1,27 @@ +name: wiki +on: + push: + paths: + - 'docs/*.md' +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: publish + shell: bash + env: + TOKEN: ${{secrets.CI_TOKEN}} + run: | + cd doc + git clone https://vika:${TOKEN}github.com/VictoriaMetrics/VictoriaMetrics.wiki.git wiki + find ./ -name '*.md' -exec cp -prv '{}' 'wiki' ';' + cd wiki + git config --local user.email "info@victoriametrics.com" + git config --local user.name "Vika" + git add "*.md" + git commit -m "update wiki pages" + remote_repo="https://vika:${TOKEN}@github.com/VictoriaMetrics/VictoriaMetrics.wiki.git" + git push "${remote_repo}" + cd .. + rm -rf wiki \ No newline at end of file diff --git a/.gitignore b/.gitignore index 24ef443a9..12bcf1ea3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/tmp /tags /pkg *.pprof diff --git a/doc/Articles.md b/doc/Articles.md new file mode 100644 index 000000000..6339f771c --- /dev/null +++ b/doc/Articles.md @@ -0,0 +1,19 @@ +* [Open-sourcing VictoriaMetrics](https://medium.com/@valyala/open-sourcing-victoriametrics-f31e34485c2b) +* [How we created VictoriaMetrics](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac) +* [VictoriaMetrics vs TimescaleDB vs InfluxDB benchmarks on 40K unique time series](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) +* [VictoriaMetrics vs TimescaleDB vs InfluxDB benchmarks on 400K, 4M and 40M unique time series](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) +* [Insert benchmarks for VictoriaMetrics vs InfluxDB on high-cardinality data](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) +* [Measuring vertical scalability for time series databases in Google Cloud](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae) +* [How VictoriaMetrics creates instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) +* [Prometheus Subqueries in VictoriaMetrics](https://medium.com/@valyala/prometheus-subqueries-in-victoriametrics-9b1492b720b3) +* [Why irate from Prometheus doesn't capture spikes](https://medium.com/@valyala/why-irate-from-prometheus-doesnt-capture-spikes-45f9896d7832) +* [Why mmap'ed files in Go may hurt performance](https://medium.com/@valyala/mmap-in-go-considered-harmful-d92a25cb161d) +* [WAL Usage Looks Broken in Modern TSDBs](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704) +* [Analyzing Prometheus data with external tools](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) +* [Stripping dependency bloat in VictoriaMetrics Docker image](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) +* [PromQL tutorial for beginners](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085) +* [Achieving better compression for time series data than Gorilla](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932) +* [Comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683) +* [Speeding up backups for big time series databases](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) +* [Evaluation performance and correctness: VictoriaMetrics response](https://medium.com/@valyala/evaluating-performance-and-correctness-victoriametrics-response-e27315627e87) +* [Improving histogram usability for Prometheus and Grafana](https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) diff --git a/doc/Cluster-VictoriaMetrics.md b/doc/Cluster-VictoriaMetrics.md new file mode 100644 index 000000000..fc4d84c93 --- /dev/null +++ b/doc/Cluster-VictoriaMetrics.md @@ -0,0 +1,326 @@ +# Cluster version of VictoriaMetrics + +VictoriaMetrics is fast, cost-effective and scalable time series database. It can be used as a long-term remote storage for Prometheus. + +It is recommended using [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics) instead of cluster version +for ingestion rates lower than 10 million of data points per second. +Single-node version [scales perfectly](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae) +with the number of CPU cores, RAM and available storage space. +Single-node version is easier to configure and operate comparing to cluster version, so think twice before sticking to cluster version. + +Join [our Slack](http://slack.victoriametrics.com/) or [contact us](mailto:info@victoriametrics.com) with consulting and support questions. + + +## Prominent features + +- Supports all the features of [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics). +- Performance and capacity scales horizontally. +- Supports multiple independent namespaces for time series data (aka multi-tenancy). + + +## Architecture overview + +VictoriaMetrics cluster consists of the following services: + +- `vmstorage` - stores the data +- `vminsert` - proxies the ingested data to `vmstorage` shards using consistent hashing +- `vmselect` - performs incoming queries using the data from `vmstorage` + +Each service may scale independently and may run on the most suitable hardware. + + + + +## Binaries + +Compiled binaries for cluster version are available in the `assets` section of [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases). +See archives containing `cluster` word. + +Docker images for cluster version are available here: + +- `vminsert` - https://hub.docker.com/r/victoriametrics/vminsert/tags +- `vmselect` - https://hub.docker.com/r/victoriametrics/vmselect/tags +- `vmstorage` - https://hub.docker.com/r/victoriametrics/vmstorage/tags + + +## Building from sources + +Source code for cluster version is available at [cluster branch](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster). + + +### Development Builds + +1. [Install go](https://golang.org/doc/install). The minimum supported version is Go 1.12. +2. Run `make` from the repository root. It should build `vmstorage`, `vmselect` + and `vminsert` binaries and put them into the `bin` folder. + + +### Production builds + +There is no need in installing Go on a host system since binaries are built +inside [the official docker container for Go](https://hub.docker.com/_/golang). +This makes reproducible builds. +So [install docker](https://docs.docker.com/install/) and run the following command: + +``` +make vminsert-prod vmselect-prod vmstorage-prod +``` + +Production binaries are built into statically linked binaries for `GOARCH=amd64`, `GOOS=linux`. +They are put into `bin` folder with `-prod` suffixes: +``` +$ make vminsert-prod vmselect-prod vmstorage-prod +$ ls -1 bin +vminsert-prod +vmselect-prod +vmstorage-prod +``` + +### Building docker images + +Run `make package`. It will build the following docker images locally: + +* `victoriametrics/vminsert:` +* `victoriametrics/vmselect:` +* `victoriametrics/vmstorage:` + +`` is auto-generated image tag, which depends on source code in the repository. +The `` may be manually set via `PKG_TAG=foobar make package`. + + + +## Operation + +### Cluster setup + +A minimal cluster must contain the following nodes: + +* a single `vmstorage` node with `-retentionPeriod` and `-storageDataPath` flags +* a single `vminsert` node with `-storageNode=:8400` +* a single `vmselect` node with `-storageNode=:8401` + +It is recommended to run at least two nodes for each service +for high availability purposes. + +An http load balancer must be put in front of `vminsert` and `vmselect` nodes: +- requests starting with `/insert` must be routed to port `8480` on `vminsert` nodes. +- requests starting with `/select` must be routed to port `8481` on `vmselect` nodes. + +Ports may be altered by setting `-httpListenAddr` on the corresponding nodes. + +It is recommended setting up [monitoring](#monitoring) for the cluster. + + +### Monitoring + +All the cluster components expose various metrics in Prometheus-compatible format at `/metrics` page on the TCP port set in `-httpListenAddr` command-line flag. +By default the following TCP ports are used: +- `vminsert` - 8480 +- `vmselect` - 8481 +- `vmstorage` - 8482 + +It is recommended setting up Prometheus to scrape `/metrics` pages from all the cluster components, so they can be monitored and analyzed +with [the official Grafana dashboard for VictoriaMetrics cluster](https://grafana.com/grafana/dashboards/11176). + + +### URL format + +* URLs for data ingestion: `http://:8480/insert//`, where: + - `` is an arbitrary number identifying namespace for data ingestion (aka tenant) + - `` may have the following values: + - `prometheus` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) + - `influx/write` or `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/) + +* URLs for querying: `http://:8481/select//prometheus/`, where: + - `` is an arbitrary number identifying data namespace for the query (aka tenant) + - `` may have the following values: + - `api/v1/query` - performs [PromQL instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries) + - `api/v1/query_range` - performs [PromQL range query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) + - `api/v1/series` - performs [series query](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers) + - `api/v1/labels` - returns a [list of label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names) + - `api/v1/label//values` - returns values for the given `` according [to API](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values) + - `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/) + - `api/v1/export` - exports raw data. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details + +* URL for time series deletion: `http://:8481/delete//prometheus/api/v1/admin/tsdb/delete_series?match[]=`. + Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't + be used on a regular basis, since it carries non-zero overhead. + +* `vmstorage` nodes provide the following HTTP endpoints on `8482` port: + - `/snapshot/create` - create [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282), + which can be used for backups in background. Snapshots are created in `/snapshots` folder, where `` is the corresponding + command-line flag value. + - `/snapshot/list` - list available snasphots. + - `/snapshot/delete?snapshot=` - delete the given snapshot. + - `/snapshot/delete_all` - delete all the snapshots. + + Snapshots may be created independently on each `vmstorage` node. There is no need in synchronizing snapshots' creation + across `vmstorage` nodes. + + +### Cluster resizing and scalability. + +Cluster performance and capacity scales with adding new nodes. + +* `vminsert` and `vmselect` nodes are stateless and may be added / removed at any time. + Do not forget updating the list of these nodes on http load balancer. + Adding more `vminsert` nodes scales data ingestion rate. See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/175#issuecomment-536925841) + about ingestion rate scalability. + Adding more `vmselect` nodes scales select queries rate. +* `vmstorage` nodes own the ingested data, so they cannot be removed without data loss. + Adding more `vmstorage` nodes scales cluster capacity. + +Steps to add `vmstorage` node: + +1. Start new `vmstorage` node with the same `-retentionPeriod` as existing nodes in the cluster. +2. Gradually restart all the `vmselect` nodes with new `-storageNode` arg containing `:8401`. +3. Gradually restart all the `vminsert` nodes with new `-storageNode` arg containing `:8400`. + + +### Cluster availability + +* HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes. +* The cluster remains available if at least a single `vmstorage` node exists: + + - `vminsert` re-routes incoming data from unavailable `vmstorage` nodes to healthy `vmstorage` nodes + - `vmselect` continues serving partial responses if at least a single `vmstorage` node is available. + + +### Updating / reconfiguring cluster nodes + +All the node types - `vminsert`, `vmselect` and `vmstorage` - may be updated via graceful shutdown. +Send `SIGINT` signal to the corresponding process, wait until it finishes and then start new version +with new configs. + +Cluster should remain in working state if at least a single node of each type remains available during +the update process. See [cluster availability](#cluster-availability) section for details. + + +### Capacity planning + +Each instance type - `vminsert`, `vmselect` and `vmstorage` - can run on the most suitable hardware. + +#### vminsert + +* The recommended total number of vCPU cores for all the `vminsert` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`. +* The recommended number of vCPU cores per each `vminsert` instance should equal to the number of `vmstorage` instances in the cluster. +* The amount of RAM per each `vminsert` instance should be 1GB or more. RAM is used as a buffer for spikes in ingestion rate. +* Sometimes `-rpc.disableCompression` command-line flag on `vminsert` instances could increase ingestion capacity at the cost + of higher network bandwidth usage between `vminsert` and `vmstorage`. + +#### vmstorage + +* The recommended total number of vCPU cores for all the `vmstorage` instances can be calculated from the ingestion rate: `vCPUs = ingestion_rate / 150K`. +* The recommended total amount of RAM for all the `vmstorage` instances can be calculated from the number of active time series: `RAM = active_time_series * 1KB`. + Time series is active if it received at least a single data point during the last hour or if it has been queried during the last hour. +* The recommended total amount of storage space for all the `vmstorage` instances can be calculated + from the ingestion rate and retention: `storage_space = ingestion_rate * retention_seconds`. + +#### vmselect + +The recommended hardware for `vmselect` instances highly depends on the type of queries. Lightweight queries over small number of time series usually require +small number of vCPU cores and small amount of RAM on `vmselect`, while heavy queries over big number of time series (>10K) usually require +bigger number of vCPU cores and bigger amounts of RAM. + + +### Helm + +Helm chart simplifies managing cluster version of VictoriaMetrics in Kubernetes. +It is available in the [helm-charts](https://github.com/VictoriaMetrics/helm-charts) repository. + +Upgrade follows `Cluster resizing procedure` under the hood. + + +### Replication and data safety + +VictoriaMetrics offloads replication to the underlying storage pointed by `-storageDataPath`. +It is recommended storing data on [Google Compute Engine persistent disks](https://cloud.google.com/compute/docs/disks/#pdspecs), +since they are protected from data loss and data corruption. They also provide consistently high performance +and [may be resized](https://cloud.google.com/compute/docs/disks/add-persistent-disk) without downtime. +HDD-based persistent disks should be enough for the majority of use cases. + +It is recommended using durable replicated persistent volumes in Kubernetes. + +Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883). + + +### Backups + +It is recommended performing periodical backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) +for protecting from user errors such as accidental data deletion. + +The following steps must be performed for each `vmstorage` node for creating a backup: + +1. Create an instant snapshot by navigating to `/snapshot/create` HTTP handler. It will create snapshot and return its name. +2. Archive the created snapshot from `<-storageDataPath>/snapshots/` folder using [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/app/vmbackup/README.md). + The archival process doesn't interfere with `vmstorage` work, so it may be performed at any suitable time. +3. Delete unused snapshots via `/snapshot/delete?snapshot=` or `/snapshot/delete_all` in order to free up occupied storage space. + +There is no need in synchronizing backups among all the `vmstorage` nodes. + +Restoring from backup: + +1. Stop `vmstorage` node with `kill -INT`. +2. Restore data from backup using [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/cluster/app/vmrestore/README.md) into `-storageDataPath` directory. +3. Start `vmstorage` node. + + +## Community and contributions + +We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle): + +- Prefer simple code and architecture. +- Avoid complex abstractions. +- Avoid magic code and fancy algorithms. +- Avoid [big external dependencies](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d). +- Minimize the number of moving parts in the distributed system. +- Avoid automated decisions, which may hurt cluster availability, consistency or performance. + +Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people. + +Due to `KISS` cluster version of VictoriaMetrics has no the following "features" popular in distributed computing world: + +- Fragile gossip protocols. See [failed attempt in Thanos](https://github.com/improbable-eng/thanos/blob/030bc345c12c446962225221795f4973848caab5/docs/proposals/completed/201809_gossip-removal.md). +- Hard-to-understand-and-implement-properly [Paxos protocols](https://www.quora.com/In-distributed-systems-what-is-a-simple-explanation-of-the-Paxos-algorithm). +- Complex replication schemes, which may go nuts in unforesseen edge cases. The replication is offloaded to the underlying durable replicated storage + such as [persistent disks in Google Compute Engine](https://cloud.google.com/compute/docs/disks/#pdspecs). +- Automatic data reshuffling between storage nodes, which may hurt cluster performance and availability. +- Automatic cluster resizing, which may cost you a lot of money if improperly configured. +- Automatic discovering and addition of new nodes in the cluster, which may mix data between dev and prod clusters :) +- Automatic leader election, which may result in split brain disaster on network errors. + + +## Reporting bugs + +Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues). + + +## Victoria Metrics Logo + +[Zip](VM_logo.zip) contains three folders with different image orientation (main color and inverted version). + +Files included in each folder: + +* 2 JPEG Preview files +* 2 PNG Preview files with transparent background +* 2 EPS Adobe Illustrator EPS10 files + + +### Logo Usage Guidelines + +#### Font used: + +* Lato Black +* Lato Regular + +#### Color Palette: + +* HEX [#110f0f](https://www.color-hex.com/color/110f0f) +* HEX [#ffffff](https://www.color-hex.com/color/ffffff) + +### We kindly ask: + +- Please don't use any other font instead of suggested. +- There should be sufficient clear space around the logo. +- Do not change spacing, alignment, or relative locations of the design elements. +- Do not change the proportions of any of the design elements or the design itself. You may resize as needed but must retain all proportions. diff --git a/doc/ExtendedPromQL.md b/doc/ExtendedPromQL.md new file mode 100644 index 000000000..8cb998ae4 --- /dev/null +++ b/doc/ExtendedPromQL.md @@ -0,0 +1,61 @@ +VictoriaMetrics supports [standard PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/) +including [subqueries](https://prometheus.io/blog/2019/01/28/subquery-support/). +Additionally it supports useful extensions mentioned below. +Try these extensions on [an editable Grafana dashboard](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo). + +- [`WITH` templates](https://play.victoriametrics.com/promql/expand-with-exprs). This feature simplifies writing and managing complex queries. Go to [`WITH` templates playground](https://victoriametrics.com/promql/expand-with-exprs) and try it. +- Metric names and metric labels may contain escaped chars. For instance, `foo\-bar{baz\=aa="b"}` is valid expression. It returns time series with name `foo-bar` containing label `baz=aa` with value `b`. Additionally, `\xXX` escape sequence is supported, where `XX` is hexadecimal representation of escaped char. +- `offset`, range duration and step value for range vector may refer to the current step aka `$__interval` value from Grafana. + For instance, `rate(metric[10i] offset 5i)` would return per-second rate over a range covering 10 previous steps with the offset of 5 steps. +- `default` binary operator. `q1 default q2` substitutes `NaN` values from `q1` with the corresponding values from `q2`. +- `if` binary operator. `q1 if q2` removes values from `q1` for `NaN` values from `q2`. +- `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for non-`NaN` values from `q2`. +- `offset` may be put anywere in the query. For instance, `sum(foo) offset 24h`. +- Trailing commas on all the lists are allowed - label filters, function args and with expressions. For instance, the following queries are valid: `m{foo="bar",}`, `f(a, b,)`, `WITH (x=y,) x`. This simplifies maintenance of multi-line queries. +- String literals may be concatenated. This is useful with `WITH` templates: `WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`. +- Range duration in functions such as [rate](https://prometheus.io/docs/prometheus/latest/querying/functions/#rate()) may be omitted. VictoriaMetrics automatically selects range duration depending on the current step used for building the graph. For instance, the following query is valid in VictoriaMetrics: `rate(node_network_receive_bytes_total)`. +- [Range duration](https://prometheus.io/docs/prometheus/latest/querying/basics/#range-vector-selectors) and [offset](https://prometheus.io/docs/prometheus/latest/querying/basics/#offset-modifier) may be fractional. For instance, `rate(node_network_receive_bytes_total[1.5m] offset 0.5d)`. +- Comments starting with `#` and ending with newline. For instance, `up # this is a comment for 'up' metric`. +- Rollup functions - `rollup(m[d])`, `rollup_rate(m[d])`, `rollup_deriv(m[d])`, `rollup_increase(m[d])`, `rollup_delta(m[d])` - return `min`, `max` and `avg` + values for all the `m` data points over `d` duration. +- `rollup_candlestick(m[d])` - returns `open`, `close`, `low` and `high` values (OHLC) for all the `m` data points over `d` duration. This function is useful for financial applications. +- `union(q1, ... qN)` function for building multiple graphs for `q1`, ... `qN` subqueries with a single query. The `union` function name may be skipped - + the following queries are equivalent: `union(q1, q2)` and `(q1, q2)`. +- `ru(freeResources, maxResources)` function for returning resource utilization percentage in the range `0% - 100%`. For instance, `ru(node_memory_MemFree_bytes, node_memory_MemTotal_bytes)` returns memory utilization over [node_exporter](https://github.com/prometheus/node_exporter) metrics. +- `ttf(slowlyChangingFreeResources)` function for returning the time in seconds when the given `slowlyChangingFreeResources` expression reaches zero. For instance, `ttf(node_filesystem_avail_byte)` returns the time to storage space exhaustion. This function may be useful for capacity planning. +- Functions for label manipulation: + - `alias(q, name)` for setting metric name across all the time series `q`. + - `label_set(q, label1, value1, ... labelN, valueN)` for setting the given values for the given labels on `q`. + - `label_del(q, label1, ... labelN)` for deleting the given labels from `q`. + - `label_keep(q, label1, ... labelN)` for deleting all the labels except the given labels from `q`. + - `label_copy(q, src_label1, dst_label1, ... src_labelN, dst_labelN)` for copying label values from `src_*` to `dst_*`. + - `label_move(q, src_label1, dst_label1, ... src_labelN, dst_labelN)` for moving label values from `src_*` to `dst_*`. + - `label_transform(q, label, regexp, replacement)` for replacing all the `regexp` occurences with `replacement` in the `label` values from `q`. + - `label_value(q, label)` - returns numeric values for the given `label` from `q`. +- `step()` function for returning the step in seconds used in the query. +- `start()` and `end()` functions for returning the start and end timestamps of the `[start ... end]` range used in the query. +- `integrate(m[d])` for returning integral over the given duration `d` for the given metric `m`. +- `ideriv(m)` - for calculating `instant` derivative for `m`. +- `deriv_fast(m[d])` - for calculating `fast` derivative for `m` based on the first and the last points from duration `d`. +- `running_` functions - `running_sum`, `running_min`, `running_max`, `running_avg` - for calculating [running values](https://en.wikipedia.org/wiki/Running_total) on the selected time range. +- `range_` functions - `range_sum`, `range_min`, `range_max`, `range_avg`, `range_first`, `range_last`, `range_median`, `range_quantile` - for calculating global value over the selected time range. +- `smooth_exponential(q, sf)` - smooths `q` using [exponential moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) with the given smooth factor `sf`. +- `remove_resets(q)` - removes counter resets from `q`. +- `lag(q[d])` - returns lag between the current timestamp and the timestamp from the previous data point in `q` over `d`. +- `lifetime(q[d])` - returns lifetime of `q` over `d` in seconds. It is expected that `d` exceeds the lifetime of `q`. +- `scrape_interval(q[d])` - returns the average interval in seconds between data points of `q` over `d` aka `scrape interval`. +- Trigonometric functions - `sin(q)`, `cos(q)`, `asin(q)`, `acos(q)` and `pi()`. +- `median_over_time(m[d])` - calculates median values for `m` over `d` time window. Shorthand to `quantile_over_time(0.5, m[d])`. +- `median(q)` - median aggregate. Shorthand to `quantile(0.5, q)`. +- `limitk(k, q)` - limits the number of time series returned from `q` to `k`. +- `keep_last_value(q)` - fills missing data (gaps) in `q` with the previous value. +- `distinct_over_time(m[d])` - returns distinct number of values for `m` data points over `d` duration. +- `distinct(q)` - returns a time series with the number of unique values for each timestamp in `q`. +- `sum2_over_time(m[d])` - returns sum of squares for all the `m` values over `d` duration. +- `sum2(q)` - returns a time series with sum of square values for each timestamp in `q`. +- `geomean_over_time(m[d])` - returns [geomean](https://en.wikipedia.org/wiki/Geometric_mean) value for all the `m` value over `d` duration. +- `geomean(q)` - returns a time series with [geomean](https://en.wikipedia.org/wiki/Geometric_mean) value for each timestamp in `q`. +- `rand()`, `rand_normal()` and `rand_exponential()` functions - for generating pseudo-random series with even, normal and exponential distribution. +- `increases_over_time(m[d])` and `decreases_over_time(m[d])` - returns the number of `m` increases or decreases over the given duration `d`. +- `prometheus_buckets(q)` - converts [VictoriaMetrics histogram](https://godoc.org/github.com/VictoriaMetrics/metrics#Histogram) buckets to Prometheus buckets with `le` labels. +- `histogram(q)` - calculates aggregate histogram over `q` time series for each point on the graph. diff --git a/doc/FAQ.md b/doc/FAQ.md new file mode 100644 index 000000000..6636dfa9d --- /dev/null +++ b/doc/FAQ.md @@ -0,0 +1,158 @@ +### What is the main purpose of VictoriaMetrics? + +To provide the best long-term [remote storage](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) solution for [Prometheus](https://prometheus.io/). + + +### Which features does VictoriaMetrics have? + +* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana. + Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL). +* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) + and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4). + [Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae). +* [Uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) when working with millions of unique time series (aka high cardinality). +* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) + may be crammed into a limited storage comparing to TimescaleDB. +* Optimized for storage with high-latency IO and low iops (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b). +* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB. + See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae) + and [comparing Thanos to VictoriaMetrics](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683). +* Easy operation: + * VictoriaMetrics consists of a single executable without external dependencies. + * All the configuration is done via explicit command-line flags with reasonable defaults. + * All the data is stored in a single directory pointed by `-storageDataPath` flag. + * Easy backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +* Storage is protected from corruption on unclean shutdown (i.e. hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +* Supports metrics' ingestion and backfilling via the following protocols: + * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) + * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/) + * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon) + if `-graphiteListenAddr` is set. + * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set. +* Ideally works with big amounts of time series data from IoT sensors, connected car sensors and industrial sensors. +* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster). + + +### Which clients do you target? + +The following Prometheus users may be interested in VictoriaMetrics: +- Users who don't want to bother with Prometheus' local storage operational burden - backups, replication, capacity planning, scalability, etc. +- Users with multiple Prometheus instances who want performing arbitrary queries over all the metrics collected by their Prometheus instances (aka `global querying view`). +- Users who want reducing costs for storing huge amounts of time series data. + + +### How to start using VictoriaMetrics? + +Start with [single-node version](Single-server-VictoriaMetrics). It is easy to configure and operate. It should fit the majority of use cases. + + +### Is it safe to enable [remote write storage](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage) in Prometheus? + +Yes. Prometheus continues writing data to local storage after enabling remote storage write, so all the existing local storage data +and new data is available for querying via Prometheus as usual. + + +### How does VictoriaMetrics compare to other clustered TSDBs on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/improbable-eng/thanos), [Cortex](https://github.com/cortexproject/cortex), etc.? + +VictoriaMetrics is simpler, faster, more cost-effective and it provides [useful extensions for PromQL](ExtendedPromQL). The simplicity is twofold: +- It is simpler to configure and operate. There is no need in configuring third-party [sidecars](https://github.com/improbable-eng/thanos/blob/master/docs/components/sidecar.md) + or fighting with [gossip protocol](https://github.com/improbable-eng/thanos/blob/master/docs/proposals/completed/201809_gossip-removal.md). +- VictoriaMetrics has simpler architecture, which means less bugs and more useful features in a long run comparing to competing TSDBs. + +See [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683). + + +### How does VictoriaMetrics compare to [InfluxDB](https://www.influxdata.com/time-series-platform/influxdb/)? + +VictoriaMetrics requires [10x less RAM](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and it [works faster](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae). +It is easier to configure and operate. It provides [better query language](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085) than InfluxQL or Flux. + + +### How does VictoriaMetrics compare to [TimescaleDB](https://www.timescale.com/)? + +TimescaleDB insists on using SQL as a query language. While SQL is more powerful than PromQL, this power is rarely required during typical TSDB usage. Real-world queries usually [look clearer and simpler when written in PromQL than in SQL](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085). +Additionally, VictoriaMetrics requires [up to 70x less storage space comparing to TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) for storing the same amount of time series data. + + +### Does VictoriaMetrics use Prometheus technologies like other clustered TSDBs built on top of Prometheus such as [M3 from Uber](https://eng.uber.com/m3/), [Thanos](https://github.com/improbable-eng/thanos), [Cortex](https://github.com/cortexproject/cortex)? + +No. VictoriaMetrics core is written in Go from scratch by [fasthttp](https://github.com/valyala/fasthttp) [author](https://github.com/valyala). +The architecture is [optimized for storing and querying large amounts of time series data with high cardinality](https://medium.com/devopslinks/victoriametrics-creating-the-best-remote-storage-for-prometheus-5d92d66787ac). VictoriaMetrics storage uses [certain ideas from ClickHouse](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). Special thanks to [Alexey Milovidov](https://github.com/alexey-milovidov). + + +### Are there performance comparisons with other solutions? + +Yes: + +* [Measuring vertical scalability for time series databases: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae). +* [Measuring insert performance on high-cardinality time series: VictoriaMetrics vs InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) +* [TSBS benchmark on high-cardinality time series: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) +* [Standard TSBS benchmark: VictoriaMetrics vs InfluxDB vs TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) + + +### What is the pricing for VictoriaMetrics? + +The following versions are open source and free: +* [Single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics). +* [Cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster). + +We provide commercial support for both versions. [Contact us](mailto:info@victoriametrics.com) for the pricing. + +The following versions are commercial: +* Managed cluster in the Cloud. +* SaaS version. + +[Contact us](mailto:info@victoriametrics.com) for the pricing. + + +### Why VictoriaMetrics doesn't support [Prometheus remote read API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#%3Cremote_read%3E)? + +Remote read API requires transferring all the raw data for all the requested metrics over the given time range. For instance, +if a query covers 1000 metrics with 10K values each, then the remote read API had to return `1000*10K`=10M metric values to Prometheus. +This is slow and expensive. +Prometheus remote read API isn't intended for querying foreign data aka `global query view`. See [this issue](https://github.com/prometheus/prometheus/issues/4456) for details. + +So just query VictoriaMetrics directly via [Prometheus Querying API](https://prometheus.io/docs/prometheus/latest/querying/api/) +or via [Prometheus datasoruce in Grafana](http://docs.grafana.org/features/datasources/prometheus/). + + +### Does VictoriaMetrics deduplicate data from Prometheus instances scraping the same targets (aka `HA pairs`)? + +Data from all the Prometheus instances is saved in VictoriaMetrics without deduplication. + +The deduplication for Prometheus HA pair may be easily implemented on top of VictoriaMetrics with the following steps: + +1) Run multiple VictoriaMetrics instances in multiple availability zones (datacenters). +2) Configure each Prometheus from each HA pair to write data to VictoriaMetrics in distinct availability zone. +3) Put [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics instances. +4) Send queries to Promxy - it will deduplicate data from VictoriaMetrics instances behind it. + + +### Where is the source code of VictoriaMetrics? + +Source code for the following versions is available in the following places: +* [Single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics). +* [Cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster). + + +### Does VictoriaMetrics fit for data from IoT sensors and industrial sensors? + +VictoriaMetrics is able to handle data from hundreds of millions of IoT sensors and industrial sensors. +It supports [high cardinality data](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b), +perfectly [scales up on a single node](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae) +and scales horizontally to multiple nodes. + + +### Where can I ask questions about VictoriaMetrics? + +See [VictoriaMetrics-users group](https://groups.google.com/forum/#!forum/victorametrics-users). + + +### Where can I file bugs and feature requests regarding VictoriaMetrics? + +File bugs and feature requests [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues). + + +### Are you looking for investors? + +Yes. [Mail us](mailto:info@victoriametrics.com) if you are interested in. diff --git a/doc/Home.md b/doc/Home.md new file mode 100644 index 000000000..1489822a1 --- /dev/null +++ b/doc/Home.md @@ -0,0 +1,10 @@ +## VictoriaMetrics docs + +* [Quick start](Quick-Start) +* [`WITH` templates playground](https://play.victoriametrics.com/promql/expand-with-exprs) +* [Grafana playground](http://play-grafana.victoriametrics.com:3000/d/4ome8yJmz/node-exporter-on-victoriametrics-demo) +* [Extended PromQL](ExtendedPromQL) +* [FAQ](FAQ) +* [Single-node version](Single-server-VictoriaMetrics) +* [Cluster version](Cluster-VictoriaMetrics) +* [Articles](Articles) diff --git a/doc/Quick-Start.md b/doc/Quick-Start.md new file mode 100644 index 000000000..86d1ae931 --- /dev/null +++ b/doc/Quick-Start.md @@ -0,0 +1,25 @@ +1. Download the latest VictoriaMetrics release from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), + from [Docker hub](https://hub.docker.com/r/valyala/victoria-metrics/) + or [build it from sources](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#how-to-build-from-sources). + +2. Run the binary or Docker image with the desired command-line flags. Pass `-help` in order to see description for all the available flags + and their default values. Default flag values should fit the majoirty of cases. The minimum required flags to configure are: + + * `-storageDataPath` - path to directory where VictoriaMetrics stores all the data. + * `-retentionPeriod` - data retention in months. + + For instance: + + `./victoria-metrics-prod -storageDataPath=/var/lib/victoria-metrics-data -retentionPeriod=3` + + See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/43) in order to configure VictoriaMetrics as OS service. + It is recommended setting up [VictoriaMetrics monitoring](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#monitoring). + +3. Configure all the Prometheus instances to write data to VictoriaMetrics. + See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#prometheus-setup). + +4. Configure Grafana to query VictoriaMetrics instead of Prometheus. + See [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Single-server-VictoriaMetrics#grafana-setup). + + +There is also [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) and [SaaS playground](https://play.victoriametrics.com/signIn). diff --git a/doc/Release-Guide.md b/doc/Release-Guide.md new file mode 100644 index 000000000..c1fdb2d4e --- /dev/null +++ b/doc/Release-Guide.md @@ -0,0 +1,31 @@ +## Release version and Docker images + +1. Create release tag with `git tag v1.xx.y`. +2. Run `make release` for creating `*.tar.gz` release archive with the corresponding `_checksums.txt` inside `bin` directory. +3. Run `make publish` for creating and publishing Docker images. +4. Push release tag to https://github.com/VictoriaMetrics/VictoriaMetrics : `git push origin v1.xx.y`. +5. Go to https://github.com/VictoriaMetrics/VictoriaMetrics/releases , create new release from the pushed tag on step 4 + and upload `*.tar.gz` archive with the corresponding `_checksums.txt` from step 2. + + +## Helm Charts + +The helm chart repository [https://github.com/VictoriaMetrics/helm-charts/](https://github.com/VictoriaMetrics/helm-charts/) + + +### Bump the version of images. +In that case, don't need to bump the helm chart version + +1. Need to update [`values.yaml`](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-cluster/values.yaml), bump version for `vmselect`, `vminsert` and `vmstorage` +2. Specify the correct version in [`Chart.yaml`](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-cluster/Chart.yaml) +3. Update version [README.md](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-cluster/README.md), specify the new version in the documentation +4. Push changes to master. `master` is a source of truth +5. Rebase `master` into `gh-pages` branch +6. Run `make package` which creates or updates zip file with the packed chart +7. Run `make merge`. It creates or updates metadata for charts in index.yaml +8. Push the changes to `gh-pages` branch + +### Updating the chart. +1. Update chart version in [`Chart.yaml`](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-cluster/Chart.yaml) +2. Update [README.md](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-cluster/README.md) file, reflect changes in the documentation. +3. Repeat the procedure from step _4_ previous section. diff --git a/doc/Single-server-VictoriaMetrics.md b/doc/Single-server-VictoriaMetrics.md new file mode 100644 index 000000000..564d0a683 --- /dev/null +++ b/doc/Single-server-VictoriaMetrics.md @@ -0,0 +1,817 @@ +## Single-node VictoriaMetrics + +VictoriaMetrics is fast, cost-effective and scalable time-series database. It can be used as long-term remote storage for Prometheus. +It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases), +[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) and +in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics). + +Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster). + + +## Prominent features + +* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana. + Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL). +* Supports global query view. Multiple Prometheus instances may write data into VictoriaMetrics. Later this data may be used in a single query. +* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b) + and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4). + [Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae). +* [Uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) when working with millions of unique time series (aka high cardinality). +* Optimized for time series with high churn rate. Think about [prometheus-operator](https://github.com/coreos/prometheus-operator) metrics from frequent deployments in Kubernetes. +* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) + may be crammed into limited storage comparing to TimescaleDB. +* Optimized for storage with high-latency IO and low IOPS (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b). +* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB. + See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae) + and [comparing Thanos to VictoriaMetrics cluster](https://medium.com/@valyala/comparing-thanos-to-victoriametrics-cluster-b193bea1683). +* Easy operation: + * VictoriaMetrics consists of a single [small executable](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d) without external dependencies. + * All the configuration is done via explicit command-line flags with reasonable defaults. + * All the data is stored in a single directory pointed by `-storageDataPath` flag. + * Easy and fast backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) + to S3 or GCS with [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md) / [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md). + See [this article](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883) for more details. +* Storage is protected from corruption on unclean shutdown (i.e. OOM, hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +* Supports metrics' ingestion and [backfilling](#backfilling) via the following protocols: + * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write) + * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/) + * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon) + if `-graphiteListenAddr` is set. + * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set. + * [HTTP OpenTSDB /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) if `-opentsdbHTTPListenAddr` is set. +* Ideally works with big amounts of time series data from Kubernetes, IoT sensors, connected cars, industrial telemetry, financial data and various Enterprise workloads. +* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster). + + +## Operation + + +### Table of contents + + - [How to start VictoriaMetrics](#how-to-start-victoriametrics) + - [Prometheus setup](#prometheus-setup) + - [Grafana setup](#grafana-setup) + - [How to upgrade VictoriaMetrics?](#how-to-upgrade-victoriametrics) + - [How to apply new config to VictoriaMetrics?](#how-to-apply-new-config-to-victoriametrics) + - [How to send data from InfluxDB-compatible agents such as Telegraf?](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf) + - [How to send data from Graphite-compatible agents such as StatsD?](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd) + - [Querying Graphite data](#querying-graphite-data) + - [How to send data from OpenTSDB-compatible agents?](#how-to-send-data-from-opentsdb-compatible-agents) + - [How to build from sources](#how-to-build-from-sources) + - [Development build](#development-build) + - [Production build](#production-build) + - [ARM build](#arm-build) + - [Pure Go build (CGO_ENABLED=0)](#pure-go-build-cgo_enabled0) + - [Building docker images](#building-docker-images) + - [Start with docker-compose](#start-with-docker-compose) + - [Setting up service](#setting-up-service) + - [Third-party contributions](#third-party-contributions) + - [How to work with snapshots?](#how-to-work-with-snapshots) + - [How to delete time series?](#how-to-delete-time-series) + - [How to export time series?](#how-to-export-time-series) + - [Federation](#federation) + - [Capacity planning](#capacity-planning) + - [High availability](#high-availability) + - [Multiple retentions](#multiple-retentions) + - [Downsampling](#downsampling) + - [Multi-tenancy](#multi-tenancy) + - [Scalability and cluster version](#scalability-and-cluster-version) + - [Alerting](#alerting) + - [Security](#security) + - [Tuning](#tuning) + - [Monitoring](#monitoring) + - [Troubleshooting](#troubleshooting) + - [Backfilling](#backfilling) + - [Profiling](#profiling) +- [Integrations](#integrations) +- [Roadmap](#roadmap) +- [Contacts](#contacts) +- [Community and contributions](#community-and-contributions) +- [Reporting bugs](#reporting-bugs) +- [Victoria Metrics Logo](#victoria-metrics-logo) + - [Logo Usage Guidelines](#logo-usage-guidelines) + - [Font used:](#font-used) + - [Color Palette:](#color-palette) + - [We kindly ask:](#we-kindly-ask) + + +### How to start VictoriaMetrics + +Just start VictoriaMetrics [executable](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) +or [docker image](https://hub.docker.com/r/victoriametrics/victoria-metrics/) with the desired command-line flags. + +The following command-line flags are used the most: + +* `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory. Default path is `victoria-metrics-data` in current working directory. +* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted. Default period is 1 month. +* `-httpListenAddr` - TCP address to listen to for http requests. By default, it listens port `8428` on all the network interfaces. +* `-graphiteListenAddr` - TCP and UDP address to listen to for Graphite data. By default, it is disabled. +* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data over telnet protocol. By default, it is disabled. +* `-opentsdbHTTPListenAddr` - TCP address to listen to for HTTP OpenTSDB data over `/api/put`. By default, it is disabled. + +Pass `-help` to see all the available flags with description and default values. + +It is recommended setting up [monitoring](#monitoring) for VictoriaMetrics. + + +### Prometheus setup + +Add the following lines to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`): + +```yml +remote_write: + - url: http://:8428/api/v1/write + queue_config: + max_samples_per_send: 10000 + max_shards: 30 +``` + +Substitute `` with the hostname or IP address of VictoriaMetrics. +Then apply the new config via the following command: + +``` +kill -HUP `pidof prometheus` +``` + +Prometheus writes incoming data to local storage and replicates it to remote storage in parallel. +This means the data remains available in local storage for `--storage.tsdb.retention.time` duration +even if remote storage is unavailable. + +If you plan to send data to VictoriaMetrics from multiple Prometheus instances, then add the following lines into `global` section +of [Prometheus config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file): + +```yml +global: + external_labels: + datacenter: dc-123 +``` + +This instructs Prometheus to add `datacenter=dc-123` label to each time series sent to remote storage. +The label name may be arbitrary - `datacenter` is just an example. The label value must be unique +across Prometheus instances, so those time series may be filtered and grouped by this label. + + +It is recommended upgrading Prometheus to [v2.12.0](https://github.com/prometheus/prometheus/releases) or newer, +since the previous versions may have issues with `remote_write`. + + +### Grafana setup + +Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url: + +``` +http://:8428 +``` + +Substitute `` with the hostname or IP address of VictoriaMetrics. + +Then build graphs with the created datasource using [Prometheus query language](https://prometheus.io/docs/prometheus/latest/querying/basics/). +VictoriaMetrics supports native PromQL and [extends it with useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL). + + +### How to upgrade VictoriaMetrics? + +It is safe upgrading VictoriaMetrics to new versions unless [release notes](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) +say otherwise. It is recommended performing regular upgrades to the latest version, +since it may contain important bug fixes, performance optimizations or new features. + +Follow the following steps during the upgrade: + +1) Send `SIGINT` signal to VictoriaMetrics process in order to gracefully stop it. +2) Wait until the process stops. This can take a few seconds. +3) Start the upgraded VictoriaMetrics. + +Prometheus doesn't drop data during VictoriaMetrics restart. +See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. + + +### How to apply new config to VictoriaMetrics? + +VictoriaMetrics must be restarted for applying new config: + +1) Send `SIGINT` signal to VictoriaMetrics process in order to gracefully stop it. +2) Wait until the process stops. This can take a few seconds. +3) Start VictoriaMetrics with the new config. + +Prometheus doesn't drop data during VictoriaMetrics restart. +See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. + + +### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)? + +Just use `http://:8428` url instead of InfluxDB url in agents' configs. +For instance, put the following lines into `Telegraf` config, so it sends data to VictoriaMetrics instead of InfluxDB: + +``` +[[outputs.influxdb]] + urls = ["http://:8428"] +``` + +Do not forget substituting `` with the real address where VictoriaMetrics runs. + +VictoriaMetrics maps Influx data using the following rules: +* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value + unless `db` tag exists in the Influx line. +* Field names are mapped to time series names prefixed with `{measurement}{separator}` value, + where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag. + See also `-influxSkipSingleField` command-line flag. +* Field values are mapped to time series values. +* Tags are mapped to Prometheus labels as-is. + +For example, the following Influx line: + +``` +foo,tag1=value1,tag2=value2 field1=12,field2=40 +``` + +is converted into the following Prometheus data points: + +``` +foo_field1{tag1="value1", tag2="value2"} 12 +foo_field2{tag1="value1", tag2="value2"} 40 +``` + +Example for writing data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/) +to local VictoriaMetrics using `curl`: + +``` +curl -d 'measurement,tag1=value1,tag2=value2 field1=123,field2=1.23' -X POST 'http://localhost:8428/write' +``` + +An arbitrary number of lines delimited by '\n' may be sent in a single request. +After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint: + +``` +curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}' +``` + +The `/api/v1/export` endpoint should return the following response: + +``` +{"metric":{"__name__":"measurement_field1","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560272508147]} +{"metric":{"__name__":"measurement_field2","tag1":"value1","tag2":"value2"},"values":[1.23],"timestamps":[1560272508147]} +``` + +Note that Influx line protocol expects [timestamps in *nanoseconds* by default](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/#timestamp), +while VictoriaMetrics stores them with *milliseconds* precision. + + +### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)? + +1) Enable Graphite receiver in VictoriaMetrics by setting `-graphiteListenAddr` command line flag. For instance, +the following command will enable Graphite receiver in VictoriaMetrics on TCP and UDP port `2003`: + +``` +/path/to/victoria-metrics-prod -graphiteListenAddr=:2003 +``` + +2) Use the configured address in Graphite-compatible agents. For instance, set `graphiteHost` +to the VictoriaMetrics host in `StatsD` configs. + + +Example for writing data with Graphite plaintext protocol to local VictoriaMetrics using `nc`: + +``` +echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003 +``` + +VictoriaMetrics sets the current time if the timestamp is omitted. +An arbitrary number of lines delimited by `\n` may be sent in one go. +After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint: + +``` +curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}' +``` + +The `/api/v1/export` endpoint should return the following response: + +``` +{"metric":{"__name__":"foo.bar.baz","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560277406000]} +``` + + +### Querying Graphite data + +Data sent to VictoriaMetrics via `Graphite plaintext protocol` may be read either via +[Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/) +or via [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml). + + + +### How to send data from OpenTSDB-compatible agents? + +VictoriaMetrics supports [telnet put protocol](http://opentsdb.net/docs/build/html/api_telnet/put.html) +and [HTTP /api/put requests](http://opentsdb.net/docs/build/html/api_http/put.html) for ingesting OpenTSDB data. + +#### Sending data via `telnet put` protocol + +1) Enable OpenTSDB receiver in VictoriaMetrics by setting `-opentsdbListenAddr` command line flag. For instance, +the following command enables OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`: + +``` +/path/to/victoria-metrics-prod -opentsdbListenAddr=:4242 +``` + +2) Send data to the given address from OpenTSDB-compatible agents. + + +Example for writing data with OpenTSDB protocol to local VictoriaMetrics using `nc`: + +``` +echo "put foo.bar.baz `date +%s` 123 tag1=value1 tag2=value2" | nc -N localhost 4242 +``` + +An arbitrary number of lines delimited by `\n` may be sent in one go. +After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint: + +``` +curl -G 'http://localhost:8428/api/v1/export' -d 'match={__name__!=""}' +``` + +The `/api/v1/export` endpoint should return the following response: + +``` +{"metric":{"__name__":"foo.bar.baz","tag1":"value1","tag2":"value2"},"values":[123],"timestamps":[1560277292000]} +``` + + +#### Sending OpenTSDB data via HTTP `/api/put` requests + +1) Enable HTTP server for OpenTSDB `/api/put` requests by setting `-opentsdbHTTPListenAddr` command line flag. For instance, +the following command enables OpenTSDB HTTP server on port `4242`: + +``` +/path/to/victoria-metrics-prod -opentsdbHTTPListenAddr=:4242 +``` + +2) Send data to the given address from OpenTSDB-compatible agents. + +Example for writing a single data point: + +``` +curl -H 'Content-Type: application/json' -d '{"metric":"x.y.z","value":45.34,"tags":{"t1":"v1","t2":"v2"}}' http://localhost:4242/api/put +``` + +Example for writing multiple data points in a single request: + +``` +curl -H 'Content-Type: application/json' -d '[{"metric":"foo","value":45.34},{"metric":"bar","value":43}]' http://localhost:4242/api/put +``` + +After that the data may be read via [/api/v1/export](#how-to-export-time-series) endpoint: + +``` +curl -G 'http://localhost:8428/api/v1/export' -d 'match[]=x.y.z' -d 'match[]=foo' -d 'match[]=bar' +``` + +The `/api/v1/export` endpoint should return the following response: + +``` +{"metric":{"__name__":"foo"},"values":[45.34],"timestamps":[1566464846000]} +{"metric":{"__name__":"bar"},"values":[43],"timestamps":[1566464846000]} +{"metric":{"__name__":"x.y.z","t1":"v1","t2":"v2"},"values":[45.34],"timestamps":[1566464763000]} +``` + + +### How to build from sources + +We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or +[docker images](https://hub.docker.com/r/victoriametrics/victoria-metrics/) instead of building VictoriaMetrics +from sources. Building from sources is reasonable when developing additional features specific +to your needs. + + +#### Development build + +1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12. +2. Run `make victoria-metrics` from the root folder of the repository. + It builds `victoria-metrics` binary and puts it into the `bin` folder. + +#### Production build + +1. [Install docker](https://docs.docker.com/install/). +2. Run `make victoria-metrics-prod` from the root folder of the repository. + It builds `victoria-metrics-prod` binary and puts it into the `bin` folder. + +#### ARM build + +ARM build may run on Raspberry Pi or on [energy-efficient ARM servers](https://blog.cloudflare.com/arm-takes-wing/). + +#### Development ARM build + +1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12. +2. Run `make victoria-metrics-arm` or `make victoria-metrics-arm64` from the root folder of the repository. + It builds `victoria-metrics-arm` or `victoria-metrics-arm64` binary respectively and puts it into the `bin` folder. + +#### Production ARM build + +1. [Install docker](https://docs.docker.com/install/). +2. Run `make victoria-metrics-arm-prod` or `make victoria-metrics-arm64-prod` from the root folder of the repository. + It builds `victoria-metrics-arm-prod` or `victoria-metrics-arm64-prod` binary respectively and puts it into the `bin` folder. + +#### Pure Go build (CGO_ENABLED=0) + +`Pure Go` mode builds only Go code without [cgo](https://golang.org/cmd/cgo/) dependencies. +This is an experimental mode, which may result in a lower compression ratio and slower decompression performance. +Use it with caution! + +1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12. +2. Run `make victoria-metrics-pure` from the root folder of the repository. + It builds `victoria-metrics-pure` binary and puts it into the `bin` folder. + +#### Building docker images + +Run `make package-victoria-metrics`. It builds `victoriametrics/victoria-metrics:` docker image locally. +`` is auto-generated image tag, which depends on source code in the repository. +The `` may be manually set via `PKG_TAG=foobar make package-victoria-metrics`. + + +### Start with docker-compose + +[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/docker-compose.yml) +helps to spin up VictoriaMetrics, Prometheus and Grafana with one command. +More details may be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#folder-contains-basic-images-and-tools-for-building-and-running-victoria-metrics-in-docker). + + +### Setting up service + +Read [these instructions](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/43) on how to set up VictoriaMetrics as a service in your OS. + + +### Third-party contributions + +* [Unofficial yum repository](https://copr.fedorainfracloud.org/coprs/antonpatsev/VictoriaMetrics/) ([source code](https://github.com/patsevanton/victoriametrics-rpm)) + + +### How to work with snapshots? + +VictoriaMetrics can create [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) +for all the data stored under `-storageDataPath` directory. +Navigate to `http://:8428/snapshot/create` in order to create an instant snapshot. +The page will return the following JSON response: + +``` +{"status":"ok","snapshot":""} +``` + +Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-storageDataPath>` +is the command-line flag value. Snapshots can be archived to backup storage at any time +with [vmbackup](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmbackup/README.md). + +The `http://:8428/snapshot/list` page contains the list of available snapshots. + +Navigate to `http://:8428/snapshot/delete?snapshot=` in order +to delete `` snapshot. + +Navigate to `http://:8428/snapshot/delete_all` in order to delete all the snapshots. + +Steps for restoring from a snapshot: +1. Stop VictoriaMetrics with `kill -INT`. +2. Restore snapshot contents from backup with [vmrestore](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmrestore/README.md) + to the directory pointed by `-storageDataPath`. +3. Start VictoriaMetrics. + + +### How to delete time series? + +Send a request to `http://:8428/api/v1/admin/tsdb/delete_series?match[]=`, +where `` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) +for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for +the deleted time series isn't freed instantly - it is freed during subsequent merges of data files. + +It is recommended verifying which metrics will be deleted with the call to `http://:8428/api/v1/series?match[]=` +before actually deleting the metrics. + + +### How to export time series? + +Send a request to `http://:8428/api/v1/export?match[]=`, +where `` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) +for metrics to export. The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON). +Each JSON line would contain data for a single time series. An example output: + +``` +{"metric":{"__name__":"up","job":"node_exporter","instance":"localhost:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]} +{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]} +``` + +Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either +unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values. + + +### Federation + +VictoriaMetrics exports [Prometheus-compatible federation data](https://prometheus.io/docs/prometheus/latest/federation/) +at `http://:8428/federate?match[]=`. + +Optional `start` and `end` args may be added to the request in order to scrape the last point for each selected time series on the `[start ... end]` interval. +`start` and `end` may contain either unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values. By default, the last point +on the interval `[now - max_lookback ... now]` is scraped for each time series. The default value for `max_lookback` is `5m` (5 minutes), but it can be overridden. +For instance, `/federate?match[]=up&max_lookback=1h` would return last points on the `[now - 1h ... now]` interval. This may be useful for time series federation +with scrape intervals exceeding `5m`. + + +### Capacity planning + +A rough estimation of the required resources for ingestion path: + +* RAM size: less than 1KB per active time series. So, ~1GB of RAM is required for 1M active time series. + Time series is considered active if new data points have been added to it recently or if it has been recently queried. + The number of active time series may be obtained from `vm_cache_entries{type="storage/hour_metric_ids"}` metric + exproted on the `/metrics` page. + VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited by `-memory.allowedPercent` flag. + +* CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing + the insert stream of 1M data points per second. The ingestion rate may be lower for high cardinality data or for time series with high number of labels. + See [this article](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) for details. + If you see lower numbers per CPU core, then it is likely active time series info doesn't fit caches, + so you need more RAM for lowering CPU usage. + +* Storage space: less than a byte per data point on average. So, ~260GB is required for storing a month-long insert stream + of 100K data points per second. + The actual storage size heavily depends on data randomness (entropy). Higher randomness means higher storage size requirements. + Read [this article](https://medium.com/faun/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932) + for details. + +* Network usage: outbound traffic is negligible. Ingress traffic is ~100 bytes per ingested data point via + [Prometheus remote_write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write). + The actual ingress bandwidth usage depends on the average number of labels per ingested metric and the average size + of label values. The higher number of per-metric labels and longer label values mean the higher ingress bandwidth. + + +The required resources for query path: + +* RAM size: depends on the number of time series to scan in each query and the `step` + argument passed to [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries). + The higher number of scanned time series and lower `step` argument results in the higher RAM usage. + +* CPU cores: a CPU core per 30 millions of scanned data points per second. + +* Network usage: depends on the frequency and the type of incoming requests. Typical Grafana dashboards usually + require negligible network bandwidth. + + +### High availability + +1) Install multiple VictoriaMetrics instances in distinct datacenters (availability zones). +2) Add addresses of these instances to `remote_write` section in Prometheus config: + +```yml +remote_write: + - url: http://:8428/api/v1/write + queue_config: + max_samples_per_send: 10000 + # ... + - url: http://:8428/api/v1/write + queue_config: + max_samples_per_send: 10000 +``` + +3) Apply the updated config: + +``` +kill -HUP `pidof prometheus` +``` + +4) Now Prometheus should write data into all the configured `remote_write` urls in parallel. +5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas. +6) Set up Prometheus datasource in Grafana that points to Promxy. + + +If you have Prometheus HA pairs with replicas `r1` and `r2` in each pair, then configure each `r1` +to write data to `victoriametrics-addr-1`, while each `r2` should write data to `victoriametrics-addr-2`. + + +### Multiple retentions + +Just start multiple VictoriaMetrics instances with distinct values for the following flags: + +* `-retentionPeriod` +* `-storageDataPath`, so the data for each retention period is saved in a separate directory +* `-httpListenAddr`, so clients may reach VictoriaMetrics instance with proper retention + + +### Downsampling + +There is no downsampling support at the moment, but: +- VictoriaMetrics is optimized for querying big amounts of raw data. See benchmark results for heavy queries + in [this article](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae). +- VictoriaMetrics has good compression for on-disk data. See [this article](https://medium.com/@valyala/victoriametrics-achieving-better-compression-for-time-series-data-than-gorilla-317bc1f95932) + for details. + +These properties reduce the need in downsampling. We plan to implement downsampling in the future. +See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) for details. + + +### Multi-tenancy + +Single-node VictoriaMetrics doesn't support multi-tenancy. Use [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) instead. + + +### Scalability and cluster version + +Though single-node VictoriaMetrics cannot scale to multiple nodes, it is optimized for resource usage - storage size / bandwidth / IOPS, RAM, CPU. +This means that a single-node VictoriaMetrics may scale vertically and substitute a moderately sized cluster built with competing solutions +such as Thanos, Uber M3, InfluxDB or TimescaleDB. See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae). + +So try single-node VictoriaMetrics at first and then [switch to cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) if you still need +horizontally scalable long-term remote storage for really large Prometheus deployments. +[Contact us](mailto:info@victoriametrics.com) for paid support. + + +### Alerting + +VictoriaMetrics doesn't support rule evaluation and alerting yet, so these actions must be performed either +on [Prometheus side](https://prometheus.io/docs/alerting/overview/) or on [Grafana side](https://grafana.com/docs/alerting/rules/). + + +### Security + +Do not forget protecting sensitive endpoints in VictoriaMetrics when exposing it to untrusted networks such as the internet. +Consider setting the following command-line flags: + +* `-tls`, `-tlsCertFile` and `-tlsKeyFile` for switching from HTTP to HTTPS. +* `-httpAuth.username` and `-httpAuth.password` for protecting all the HTTP endpoints + with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication). +* `-deleteAuthKey` for protecting `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series). +* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots). + +Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats. +For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=:2003`. + + +### Tuning + +* There is no need in VictoriaMetrics tuning since it uses reasonable defaults for command-line flags, + which are automatically adjusted for the available CPU and RAM resources. +* There is no need in Operating System tuning since VictoriaMetrics is optimized for default OS settings. + The only option is increasing the limit on [the number of open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a), + so Prometheus instances could establish more connections to VictoriaMetrics. +* The recommended filesystem is `ext4`, the recommended persistent storage is [persistent HDD-based disk on GCP](https://cloud.google.com/compute/docs/disks/#pdspecs), + since it is protected from hardware failures via internal replication and it can be [resized on the fly](https://cloud.google.com/compute/docs/disks/add-persistent-disk#resize_pd). + If you plan storing more than 1TB of data on `ext4` partition or plan extending it to more than 16TB, + then the following options are recommended to pass to `mkfs.ext4`: + +``` +mkfs.ext4 ... -O 64bit,huge_file,extent -T huge +``` + + +### Monitoring + +VictoriaMetrics exports internal metrics in Prometheus format on the `/metrics` page. +Add this page to Prometheus' scrape config in order to collect VictoriaMetrics metrics. +There is [an official Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229). + +The most interesting metrics are: + +* `vm_cache_entries{type="storage/hour_metric_ids"}` - the number of time series with new data points during the last hour + aka active time series. +* `rate(vm_new_timeseries_created_total[5m])` - time series churn rate. +* `vm_rows{type="indexdb"}` - the number of rows in inverted index. High value for this number usually mean high churn rate for time series. +* Sum of `vm_rows{type="storage/big"}` and `vm_rows{type="storage/small"}` - total number of `(timestamp, value)` data points + in the database. +* Sum of all the `vm_cache_size_bytes` metrics - the total size of all the caches in the database. +* `vm_allowed_memory_bytes` - the maximum allowed size for caches in the database. It is calculated as `system_memory * <-memory.allowedPercent> / 100`, + where `system_memory` is the amount of system memory and `-memory.allowedPercent` is the corresponding flag value. +* `vm_rows_inserted_total` - the total number of inserted rows since VictoriaMetrics start. + + +### Troubleshooting + +* It is recommended to use default command-line flag values (i.e. don't set them explicitly) until the need + in tweaking these flag values arises. + +* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second, + then it is likely you have too many active time series for the current amount of RAM. + It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve + ingestion performance. + Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this + option, since too big value for `-memory.allowedPercent` may result in high I/O usage. + +* VictoriaMetrics requires free disk space for [merging data files to bigger ones](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). + It may slow down when there is no enough free space left. So make sure `-storageDataPath` directory + has at least 20% of free space comparing to disk size. + +* If VictoriaMetrics doesn't work because of certain parts are corrupted due to disk errors, + then just remove directoreis with broken parts. This will recover VictoriaMetrics at the cost + of data loss stored in the broken parts. In the future, `vmrecover` tool will be created + for automatic recovering from such errors. + + +### Backfilling + +Make sure that configured `-retentionPeriod` covers timestamps for the backfilled data. + +It is recommended disabling query cache with `-search.disableCache` command-line flag when writing +historical data with timestamps from the past, since the cache assumes that the data is written with +the current timestamps. Query cache can be enabled after the backfilling is complete. + + +### Profiling + +VictoriaMetrics provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs): + +- Memory profile. It can be collected with the following command: +``` +curl -s http://:8428/debug/pprof/heap > mem.pprof +``` + +- CPU profile. It can be collected with the following command: +``` +curl -s http://:8428/debug/pprof/profile > cpu.pprof +``` + +The command for collecting CPU profile waits for 30 seconds before returning. + +The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof). + + +## Integrations + +* [netdata](https://github.com/netdata/netdata) can push data into VictoriaMetrics via `Prometheus remote_write API`. + See [these docs](https://github.com/netdata/netdata#integrations). +* [go-graphite/carbonapi](https://github.com/go-graphite/carbonapi) can use VictoriaMetrics as time series backend. + See [this example](/blob/master/cmd/carbonapi/carbonapi.example.prometheus.yaml). +* [Ansible role for installing VictoriaMetrics](https://github.com/dreamteam-gg/ansible-victoriametrics-role). + + +## Roadmap + +- [ ] Replication [#118](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/118) +- [ ] Support of Object Storages (GCS, S3, Azure Storage) [#38](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/38) +- [ ] Data downsampling [#36](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/36) +- [ ] Alert Manager Integration [#119](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/119) +- [ ] CLI tool for data migration, re-balancing and adding/removing nodes [#103](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/103) + + +The discussion happens [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/129). Feel free to comment any item or add own one. + + +## Contacts + +Contact us with any questions regarding VictoriaMetrics at [info@victoriametrics.com](mailto:info@victoriametrics.com). + + +## Community and contributions + +Feel free asking any questions regarding VictoriaMetrics: + +- [slack](http://slack.victoriametrics.com/) +- [telegram-en](https://t.me/VictoriaMetrics_en) +- [telegram-ru](https://t.me/VictoriaMetrics_ru1) +- [google groups](https://groups.google.com/forum/#!forum/victorametrics-users) + + +If you like VictoriaMetrics and want to contribute, then we need the following: + +- Filing issues and feature requests [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues). +- Spreading a word about VictoriaMetrics: conference talks, articles, comments, experience sharing with colleagues. +- Updating documentation. + +We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle): + +- Prefer simple code and architecture. +- Avoid complex abstractions. +- Avoid magic code and fancy algorithms. +- Avoid [big external dependencies](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d). +- Minimize the number of moving parts in the distributed system. +- Avoid automated decisions, which may hurt cluster availability, consistency or performance. + +Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people. + + +## Reporting bugs + +Report bugs and propose new features [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues). + + +## Victoria Metrics Logo + +[Zip](VM_logo.zip) contains three folders with different image orientation (main color and inverted version). + +Files included in each folder: + +* 2 JPEG Preview files +* 2 PNG Preview files with transparent background +* 2 EPS Adobe Illustrator EPS10 files + + +### Logo Usage Guidelines + +#### Font used: + +* Lato Black +* Lato Regular + +#### Color Palette: + +* HEX [#110f0f](https://www.color-hex.com/color/110f0f) +* HEX [#ffffff](https://www.color-hex.com/color/ffffff) + +### We kindly ask: + +- Please don't use any other font instead of suggested. +- There should be sufficient clear space around the logo. +- Do not change spacing, alignment, or relative locations of the design elements. +- Do not change the proportions of any of the design elements or the design itself. You may resize as needed but must retain all proportions.