diff --git a/.gitignore b/.gitignore
index b5246b398..0b323d148 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,10 @@
/victoria-metrics-data
/vmstorage-data
/vmselect-cache
+.DS_Store
+
+
+### terraform
+terraform.tfstate
+terraform.tfstate.*
+.terraform/
diff --git a/Makefile b/Makefile
index 9d258dfa6..d3a614cb0 100644
--- a/Makefile
+++ b/Makefile
@@ -11,16 +11,26 @@ endif
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(shell date -u +'%Y%m%d-%H%M%S')-$(BUILDINFO_TAG)'
all: \
- victoria-metrics-prod
+ vminsert \
+ vmselect \
+ vmstorage
include app/*/Makefile
include deployment/*/Makefile
+include deployment/*/helm/Makefile
clean:
rm -rf bin/*
-release: victoria-metrics-prod
- cd bin && tar czf victoria-metrics-$(PKG_TAG).tar.gz victoria-metrics-prod
+publish: \
+ publish-vmstorage \
+ publish-vmselect \
+ publish-vminsert
+
+package: \
+ package-vmstorage \
+ package-vmselect \
+ package-vminsert
fmt:
go fmt $(PKG_PREFIX)/lib/...
@@ -57,6 +67,9 @@ vendor-update:
go mod tidy
go mod vendor
+app-local:
+ GO111MODULE=on go build $(RACE) -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/$(APP_NAME)$(RACE) $(PKG_PREFIX)/app/$(APP_NAME)
+
quicktemplate-gen: install-qtc
qtc
diff --git a/README.md b/README.md
index 33a62a5f6..6738de51d 100644
--- a/README.md
+++ b/README.md
@@ -1,386 +1,170 @@
-## Single-node VictoriaMetrics
+# Cluster version of VictoriaMetrics
-[![Latest Release](https://img.shields.io/github/release/VictoriaMetrics/VictoriaMetrics.svg?style=flat-square)](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
+VictoriaMetrics is fast and cost-effective long-term remote storage for Prometheus.
-VictoriaMetrics is a long-term remote storage for Prometheus.
-It is available in [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases),
-[docker images](https://hub.docker.com/r/valyala/victoria-metrics/) and
-in [source code](https://github.com/VictoriaMetrics/VictoriaMetrics).
-
-Cluster version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
+Single-node version is available [here](https://github.com/VictoriaMetrics/VictoriaMetrics).
## Prominent features
-* Supports [Prometheus querying API](https://prometheus.io/docs/prometheus/latest/querying/api/), so it can be used as Prometheus drop-in replacement in Grafana.
- Additionally, VictoriaMetrics extends PromQL with opt-in [useful features](https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/ExtendedPromQL).
-* High performance and good scalability for both [inserts](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b)
- and [selects](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4).
- [Outperforms InfluxDB and TimescaleDB by up to 20x](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
-* [Uses 10x less RAM than InfluxDB](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) when working with millions of unique time series (aka high cardinality).
-* High data compression, so [up to 70x more data points](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4)
- may be crammed into a limited storage comparing to TimescaleDB.
-* Optimized for storage with high-latency IO and low iops (HDD and network storage in AWS, Google Cloud, Microsoft Azure, etc). See [graphs from these benchmarks](https://medium.com/@valyala/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b).
-* A single-node VictoriaMetrics may substitute moderately sized clusters built with competing solutions such as Thanos, Uber M3, Cortex, InfluxDB or TimescaleDB.
- See [vertical scalability benchmarks](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
-* Easy operation:
- * VictoriaMetrics consists of a single executable without external dependencies.
- * All the configuration is done via explicit command-line flags with reasonable defaults.
- * All the data is stored in a single directory pointed by `-storageDataPath` flag.
- * Easy backups from [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Storage is protected from corruption on unclean shutdown (i.e. hardware reset or `kill -9`) thanks to [the storage architecture](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
-* Supports metrics' ingestion and backfilling via the following protocols:
- * [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
- * [InfluxDB line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
- * [Graphite plaintext protocol](https://graphite.readthedocs.io/en/latest/feeding-carbon.html) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon)
- if `-graphiteListenAddr` is set.
- * [OpenTSDB put message](http://opentsdb.net/docs/build/html/api_telnet/put.html) if `-opentsdbListenAddr` is set.
-* Ideally works with big amounts of time series data from IoT sensors, connected car sensors and industrial sensors.
-* Has open source [cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
+- Supports all the features of [single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics).
+- Scales horizontally to multiple nodes.
+- Supports multiple independent namespaces for time series data (aka multi-tenancy).
-## Operation
+## Architecture overview
+
+VictoriaMetrics cluster consists of the following services:
+
+- `vmstorage` - stores the data
+- `vminsert` - proxies the ingested data to `vmstorage`
+- `vmselect` - performs incoming queries using the data from `vmstorage`
+
+Each service may scale independently and may run on the most suitable hardware.
-### Table of contents
+## Building from sources
-* [How to build from sources](#how-to-build-from-sources)
-* [How to start VictoriaMetrics](#how-to-start-victoriametrics)
-* [Prometheus setup](#prometheus-setup)
-* [Grafana setup](#grafana-setup)
-* [How to send data from InfluxDB-compatible agents such as Telegraf](#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf)
-* [How to send data from Graphite-compatible agents such as StatsD](#how-to-send-data-from-graphite-compatible-agents-such-as-statsd)
-* [How to send data from OpenTSDB-compatible agents](#how-to-send-data-from-opentsdb-compatible-agents)
-* [How to apply new config / ugrade VictoriaMetrics](#how-to-apply-new-config--upgrade-victoriametrics)
-* [How to work with snapshots](#how-to-work-with-snapshots)
-* [How to delete time series](#how-to-delete-time-series)
-* [How to export time series](#how-to-export-time-series)
-* [Federation](#federation)
-* [Capacity planning](#capacity-planning)
-* [High Availability](#high-availability)
-* [Multiple retentions](#multiple-retentions)
-* [Scalability and cluster version](#scalability-and-cluster-version)
-* [Security](#security)
-* [Tuning](#tuning)
-* [Monitoring](#monitoring)
-* [Troubleshooting](#troubleshooting)
-* [Community and contributions](#community-and-contributions)
-* [Reporting bugs](#reporting-bugs)
+### Development Builds
+
+1. [Install go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
+2. Run `make` from the repository root. It should build `vmstorage`, `vmselect`
+ and `vminsert` binaries and put them into the `bin` folder.
-### How to build from sources
+### Production builds
-We recommend using either [binary releases](https://github.com/VictoriaMetrics/VictoriaMetrics/releases) or
-[docker images](https://hub.docker.com/r/valyala/victoria-metrics/) instead of building VictoriaMetrics
-from sources. Building from sources is reasonable when developing an additional features specific
-to your needs.
+There is no need in installing Go on a host system since binaries are built
+inside [the official docker container for Go](https://hub.docker.com/_/golang).
+This makes reproducible builds.
+So [install docker](https://docs.docker.com/install/) and run the following command:
+```
+make vminsert-prod vmselect-prod vmstorage-prod
+```
-#### Development build
+Production binaries are built into statically linked binaries for `GOARCH=amd64`, `GOOS=linux`.
+They are put into `bin` folder with `-prod` suffixes:
+```
+$ make vminsert-prod vmselect-prod vmstorage-prod
+$ ls -1 bin
+vminsert-prod
+vmselect-prod
+vmstorage-prod
+```
-1. [Install Go](https://golang.org/doc/install). The minimum supported version is Go 1.12.
-2. Run `go build ./app/victoria-metrics` from the root folder of the repository.
- It will build `victoria-metrics` binary in the root folder of the repository.
+### Building docker images
-#### Production build
+Run `make package`. It will build the following docker images locally:
-1. [Install docker](https://docs.docker.com/install/).
-2. Run `make victoria-metrics-prod` from the root folder of the respository.
- It will build `victoria-metrics-prod` binary and put it into the `bin` folder.
+* `valyala/vminsert:`
+* `valyala/vmselect:`
+* `valyala/vmstorage:`
-#### Building docker images
-
-Run `make package-victoria-metrics`. It will build `valyala/victoria-metrics:` docker image locally.
`` is auto-generated image tag, which depends on source code in the repository.
The `` may be manually set via `PKG_TAG=foobar make package`.
-### How to start VictoriaMetrics
+## Operation
-Just start VictoriaMetrics executable or docker image with the desired command-line flags.
+### Cluster setup
-The following command line flags are used the most:
+A minimal cluster must contain the following nodes:
-* `-storageDataPath` - path to data directory. VictoriaMetrics stores all the data in this directory.
-* `-retentionPeriod` - retention period in months for the data. Older data is automatically deleted.
-* `-httpListenAddr` - TCP address to listen to for http requests. By default it listens port `8428` on all the network interfaces.
-* `-graphiteListenAddr` - TCP and UDP address to listen to for Graphite data. By default it is disabled.
-* `-opentsdbListenAddr` - TCP and UDP address to listen to for OpenTSDB data. By default it is disabled.
+* a single `vmstorage` node with `-retentionPeriod` and `-storageDataPath` flags
+* a single `vminsert` node with `-storageNode=:8400`
+* a single `vmselect` node with `-storageNode=:8401`
-Pass `-help` to see all the available flags with description and default values.
+It is recommended to run at least two nodes for each service
+for high availability purposes.
+An http load balancer must be put in front of `vminsert` and `vmselect` nodes:
+- requests starting with `/insert` must be routed to port `8480` on `vminsert` nodes.
+- requests starting with `/select` must be routed to port `8481` on `vmselect` nodes.
-### Prometheus setup
+Ports may be altered by setting `-httpListenAddr` on the corresponding nodes.
-Add the following lines to Prometheus config file (it is usually located at `/etc/prometheus/prometheus.yml`):
-```yml
-remote_write:
- - url: http://:8428/api/v1/write
- queue_config:
- max_samples_per_send: 10000
-```
+### URL format
-Substitute `` with the hostname or IP address of VictoriaMetrics.
-Then apply the new config via the following command:
+* URLs for data ingestion: `/insert//`, where:
+ - `` is an arbitrary number identifying namespace for data ingestion
+ - `` may have the following values:
+ - `prometheus` - for inserting data with [Prometheus remote write API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write)
+ - `influx/write` or `influx/api/v2/write` - for inserting data with [Influx line protocol](https://docs.influxdata.com/influxdb/v1.7/write_protocols/line_protocol_tutorial/)
-```
-kill -HUP `pidof prometheus`
-```
+* URLs for querying: `/select//prometheus/`, where:
+ - `` is an arbitrary number identifying data namespace for the query
+ - `` may have the following values:
+ - `api/v1/query` - performs [PromQL instant query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
+ - `api/v1/query_range` - performs [PromQL range query](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries)
+ - `api/v1/series` - performs [series query](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers)
+ - `api/v1/labels` - returns a [list of label names](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names)
+ - `api/v1/label//values` - returns values for the given `` according [to API](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values)
+ - `federate` - returns [federated metrics](https://prometheus.io/docs/prometheus/latest/federation/)
+ - `api/v1/export` - exports raw data. See [this article](https://medium.com/@valyala/analyzing-prometheus-data-with-external-tools-5f3e5e147639) for details
-Prometheus writes incoming data to local storage and to remote storage in parallel.
-This means the data remains available in local storage for `--storage.tsdb.retention.time` duration
-if remote storage stops working.
+* `vmstorage` nodes provide the following HTTP endpoints on `8482` port:
+ - `/snapshot/create` - create [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282),
+ which can be used for backups in background. Snapshots are created in `/snapshots` folder, where `` is the corresponding
+ command-line flag value.
+ - `/snapshot/list` - list available snasphots.
+ - `/snapshot/delete?snapshot=` - delete the given snapshot.
+ - `/snapshot/delete_all` - delete all the snapshots.
-If you plan sending data to VictoriaMetrics from multiple Prometheus instances, then add the following lines into `global` section
-of [Prometheus config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file):
+ Snapshots may be created independently on each `vmstorage` node. There is no need in synchronizing snapshots' creation
+ across `vmstorage` nodes.
-```yml
-global:
- external_labels:
- datacenter: dc-123
-```
-This instructs Prometheus to add `datacenter=dc-123` label to each time series sent to remote storage.
-The label name may be arbitrary - `datacenter` is just an example. The label value must be unique
-across Prometheus instances, so time series may be filtered and grouped by this label.
+### Cluster resizing
+* `vminsert` and `vmselect` nodes are stateless and may be added / removed at any time.
+ Do not forget updating the list of these nodes on http load balancer.
+* `vmstorage` nodes own the ingested data, so they cannot be removed without data loss.
-### Grafana setup
+Steps to add `vmstorage` node:
-Create [Prometheus datasource](http://docs.grafana.org/features/datasources/prometheus/) in Grafana with the following Url:
+1. Start new `vmstorage` node.
+2. Gradually restart all the `vmselect` nodes with new `-storageNode` arg containing `:8401`.
+3. Gradually restart all the `vminsert` nodes with new `-storageNode` arg containing `:8400`.
-```
-http://:8428
-```
-Substitute `` with the hostname or IP address of VictoriaMetrics.
+### Cluster availability
-Then build graphs with the created datasource using [Prometheus query language](https://prometheus.io/docs/prometheus/latest/querying/basics/).
-VictoriaMetrics supports native PromQL and [extends it with useful features](ExtendedPromQL).
+* HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes.
+* The cluster remains available if at least a single `vmstorage` node exists:
+ - `vminsert` re-routes incoming data from unavailable `vmstorage` nodes to healthy `vmstorage` nodes
+ - `vmselect` continues serving partial responses if at least a single `vmstorage` node is available.
-### How to send data from InfluxDB-compatible agents such as [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/)?
-Just use `http://:8428` url instead of InfluxDB url in agents' configs.
-For instance, put the following lines into `Telegraf` config, so it sends data to VictoriaMetrics instead of InfluxDB:
+### Updating / reconfiguring cluster nodes
-```
-[[outputs.influxdb]]
- urls = ["http://:8428"]
-```
+All the node types - `vminsert`, `vmselect` and `vmstorage` - may be updated via graceful shutdown.
+Send `SIGINT` signal to the corresponding process, wait until it finishes and then start new version
+with new configs.
-Do not forget substituting `` with the real address where VictoriaMetrics runs.
+Cluster should remain in working state if at least a single node of each type remains available during
+the update process. See [cluster availability](cluster-availability) section for details.
-VictoriaMetrics maps Influx data using the following rules:
-* [`db` query arg](https://docs.influxdata.com/influxdb/v1.7/tools/api/#write-http-endpoint) is mapped into `db` label value
-* Field names are mapped to time series names prefixed by `{measurement}.` value
-* Field values are mapped to time series values
-* Tags are mapped to Prometheus labels as-is
+### Helm
-### How to send data from Graphite-compatible agents such as [StatsD](https://github.com/etsy/statsd)?
+* Helm chart is available in the `deployment/k8s/helm/victoria-metrics` folder.
-1) Enable Graphite receiver in VictoriaMetrics by setting `-graphiteListenAddr` command line flag. For instance,
-the following command will enable Graphite receiver in VictoriaMetrics on TCP and UDP port `2003`:
+1. Install Cluster: `helm install -n deployment/k8s/helm/victoria-mertrics` or `ENV= make helm-install`.
+2. Upgrade Cluster: `helm upgrade deployment/k8s/helm/victoria-mertrics` or `ENV= make helm-upgrade`.
+3. Delete Cluster: `helm del --purge ` or `ENV= make helm-delete`.
-```
-/path/to/victoria-metrics-prod ... -graphiteListenAddr=:2003
-```
+* Upgrade follows `Cluster resizing procedure` under the hood.
-2) Use the configured address in Graphite-compatible agents. For instance, set `graphiteHost`
-to the VictoriaMetrics host in `StatsD` configs.
-
-
-### How to send data from OpenTSDB-compatible agents?
-
-1) Enable OpenTSDB receiver in VictoriaMetrics by setting `-opentsdbListenAddr` command line flag. For instance,
-the following command will enable OpenTSDB receiver in VictoriaMetrics on TCP and UDP port `4242`:
-
-```
-/path/to/victoria-metrics-prod ... -opentsdbListenAddr=:4242
-```
-
-2) Send data to the given address from OpenTSDB-compatible agents.
-
-
-### How to apply new config / upgrade VictoriaMetrics?
-
-VictoriaMetrics must be restarted in order to upgrade or apply new config:
-
-1) Send `SIGINT` signal to VictoriaMetrics process in order to gracefully stop it.
-2) Wait until the process stops. This can take a few seconds.
-3) Start the upgraded VictoriaMetrics with new config.
-
-
-### How to work with snapshots?
-
-Navigate to `http://:8428/snapshot/create` in order to create an instant snapshot.
-The page will return the following JSON response:
-
-```
-{"status":"ok","snapshot":""}
-```
-
-Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-storageDataPath>`
-is the command-line flag value. Snapshots can be archived to backup storage via `rsync -L`, `scp -r`
-or any similar tool that follows symlinks during copying.
-
-The `http://:8428/snapshot/list` page contains the list of available snapshots.
-
-Navigate to `http://:8428/snapshot/delete?snapshot=` in order
-to delete `` snapshot.
-
-Navigate to `http://:8428/snapshot/delete_all` in order to delete all the snapshots.
-
-
-### How to delete time series?
-
-Send a request to `http://:8428/api/v1/admin/tsdb/delete_series?match[]=`,
-where `` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
-for metrics to delete. After that all the time series matching the given selector are deleted. Storage space for
-the deleted time series isn't freed instantly - it is freed during subsequent merges of data files.
-
-
-### How to export time series?
-
-Send a request to `http://:8428/api/v1/export?match[]=`,
-where `` may contain any [time series selector](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors)
-for metrics to export. The response would contain all the data for the selected time series in [JSON streaming format](https://en.wikipedia.org/wiki/JSON_streaming#Line-delimited_JSON).
-Each JSON line would contain data for a single time series. An example output:
-
-```
-{"metric":{"__name__":"up","job":"node_exporter","instance":"localhost:9100"},"values":[0,0,0],"timestamps":[1549891472010,1549891487724,1549891503438]}
-{"metric":{"__name__":"up","job":"prometheus","instance":"localhost:9090"},"values":[1,1,1],"timestamps":[1549891461511,1549891476511,1549891491511]}
-```
-
-Optional `start` and `end` args may be added to the request in order to limit the time frame for the exported data. These args may contain either
-unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values.
-
-
-### Federation
-
-VictoriaMetrics exports [Prometheus-compatible federation data](https://prometheus.io/docs/prometheus/latest/federation/)
-at `http://:8428/federate?match[]=`.
-
-Optional `start` and `end` args may be added to the request in order to scrape the last point for each selected time series on the `[start ... end]` interval.
-`start` and `end` may contain either unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) values. By default the last point
-on the interval `[now - max_lookback ... now]` is scraped for each time series. Default value for `max_lookback` is `5m` (5 minutes), but can be overriden.
-For instance, `/federate?match[]=up&max_lookback=1h` would return last points on the `[now - 1h ... now]` interval. This may be useful for time series federation
-with scrape intervals exceeding `5m`.
-
-
-### Capacity planning
-
-Rough estimation of the required resources:
-
-* RAM size: less than 1KB per active time series. So, ~1GB of RAM is required for 1M active time series.
- Time series is considered active if new data points have been added to it recently or if it has been recently queried.
- VictoriaMetrics stores various caches in RAM. Memory size for these caches may be limited with `-memory.allowedPercent` flag.
-* CPU cores: a CPU core per 300K inserted data points per second. So, ~4 CPU cores are required for processing
- the insert stream of 1M data points per second.
- If you see lower numbers per CPU core, then it is likely active time series info doesn't fit caches,
- so you need more RAM for lowering CPU usage.
-* Storage size: less than a byte per data point on average. So, ~260GB is required for storing a month-long insert stream
- of 100K data points per second.
- The actual storage size heavily depends on data randomness (entropy). Higher randomness means higher storage size requirements.
-
-
-### High availability
-
-1) Install multiple VictoriaMetrics instances in distinct datacenters.
-2) Add addresses of these instances to `remote_write` section in Prometheus config:
-
-```yml
-remote_write:
- - url: http://:8428/api/v1/write
- queue_config:
- max_samples_per_send: 10000
- # ...
- - url: http://:8428/api/v1/write
- queue_config:
- max_samples_per_send: 10000
-```
-
-3) Apply the updated config:
-
-```
-kill -HUP `pidof prometheus`
-```
-
-4) Now Prometheus should write data into all the configured `remote_write` urls in parallel.
-5) Set up [Promxy](https://github.com/jacksontj/promxy) in front of all the VictoriaMetrics replicas.
-6) Set up Prometheus datasource in Grafana that points to Promxy.
-
-
-### Multiple retentions
-
-Just start multiple VictoriaMetrics instances with distinct values for the following flags:
-
-* `-retentionPeriod`
-* `-storageDataPath`, so the data for each retention period is saved in a separate directory
-* `-httpListenAddr`, so clients may reach VictoriaMetrics instance with proper retention
-
-
-### Scalability and cluster version
-
-Though single-node VictoriaMetrics cannot scale to multiple nodes, it is optimized for resource usage - storage size / bandwidth / IOPS, RAM, CPU.
-This means that a single-node VictoriaMetrics may scale vertically and substitute moderately sized cluster built with competing solutions
-such as Thanos, Uber M3, InfluxDB or TimescaleDB.
-
-So try single-node VictoriaMetrics at first and then [switch to cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster) if you still need
-horizontally scalable long-term remote storage for really large Prometheus deployments.
-[Contact us](mailto:info@victoriametrics.com) for paid support.
-
-
-### Security
-
-Do not forget protecting sensitive endpoints in VictoriaMetrics when exposing it to untrusted networks such as internet.
-Consider setting the following command-line flags:
-
-* `-tls`, `-tlsCertFile` and `-tlsKeyFile` for switching from HTTP to HTTPS.
-* `-httpAuth.username` and `-httpAuth.password` for protecting all the HTTP endpoints
- with [HTTP Basic Authentication](https://en.wikipedia.org/wiki/Basic_access_authentication).
-* `-deleteAuthKey` for protecting `/api/v1/admin/tsdb/delete_series` endpoint. See [how to delete time series](#how-to-delete-time-series).
-* `-snapshotAuthKey` for protecting `/snapshot*` endpoints. See [how to work with snapshots](#how-to-work-with-snapshots).
-
-Explicitly set internal network interface for TCP and UDP ports for data ingestion with Graphite and OpenTSDB formats.
-For example, substitute `-graphiteListenAddr=:2003` with `-graphiteListenAddr=:2003`.
-
-
-### Tuning
-
-* There is no need in VictoriaMetrics tuning, since it uses reasonable defaults for command-line flags,
- which are automatically adjusted for the available CPU and RAM resources.
-* There is no need in Operating System tuning, since VictoriaMetrics is optimized for default OS settings.
- The only option is increasing the limit on [the number open files in the OS](https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a),
- so Prometheus instances could establish more connections to VictoriaMetrics.
-
-
-### Monitoring
-
-VictoriaMetrics exports internal metrics in Prometheus format on the `/metrics` page.
-Add this page to Prometheus' scrape config in order to collect VictoriaMetrics metrics.
-There is [an official Grafana dashboard for single-node VictoriaMetrics](https://grafana.com/dashboards/10229).
-
-
-### Troubleshooting
-
-* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
- then it is likely you have too many active time series for the current amount of RAM.
- It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
- ingestion performance.
- Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
- option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
## Community and contributions
-Feel free asking any questions regarding VictoriaMetrics [here](https://groups.google.com/forum/#!forum/victorametrics-users).
-
We are open to third-party pull requests provided they follow [KISS design principle](https://en.wikipedia.org/wiki/KISS_principle):
- Prefer simple code and architecture.
@@ -392,6 +176,17 @@ We are open to third-party pull requests provided they follow [KISS design princ
Adhering `KISS` principle simplifies the resulting code and architecture, so it can be reviewed, understood and verified by many people.
+Due to `KISS` cluster version of VictoriaMetrics has no the following "features" popular in distributed computing world:
+
+- Fragile [gossip protocols](https://github.com/improbable-eng/thanos/blob/master/docs/proposals/approved/201809_gossip-removal.md).
+- Hard-to-understand-and-implement-properly [Paxos protocols](https://www.quora.com/In-distributed-systems-what-is-a-simple-explanation-of-the-Paxos-algorithm).
+- Complex replication schemes, which may go nuts in unforesseen edge cases. The replication is offloaded to the underlying durable replicated storage
+ such as [persistent disks in Google Compute Engine](https://cloud.google.com/compute/docs/disks/#pdspecs).
+- Automatic data reshuffling between storage nodes, which may hurt cluster performance and availability.
+- Automatic cluster resizing, which may cost you a lot of money if improperly configured.
+- Automatic discovering and addition of new nodes in the cluster, which may mix data between dev and prod clusters :)
+- Automatic leader election, which may result in split brain disaster on network errors.
+
## Reporting bugs
diff --git a/app/victoria-metrics/Makefile b/app/victoria-metrics/Makefile
deleted file mode 100644
index 6077a7d83..000000000
--- a/app/victoria-metrics/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# All these commands must run from repository root.
-
-victoria-metrics-prod:
- APP_NAME=victoria-metrics $(MAKE) app-via-docker
-
-package-victoria-metrics:
- APP_NAME=victoria-metrics \
- $(MAKE) package-via-docker
-
-publish-victoria-metrics:
- APP_NAME=victoria-metrics $(MAKE) publish-via-docker
-
-run-victoria-metrics:
- mkdir -p victoria-metrics-data
- DOCKER_OPTS='-v $(shell pwd)/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 -p 2003:2003 -p 2003:2003/udp' \
- APP_NAME=victoria-metrics \
- ARGS='-graphiteListenAddr=:2003 -opentsdbListenAddr=:4242 -retentionPeriod=12 -search.maxUniqueTimeseries=1000000 -search.maxQueryDuration=10m' \
- $(MAKE) run-via-docker
-
-victoria-metrics-arm:
- CC=arm-linux-gnueabi-gcc CGO_ENABLED=1 GOARCH=arm GO111MODULE=on go build -mod=vendor -ldflags "$(GO_BUILDINFO)" -o bin/victoria-metrics-arm ./app/victoria-metrics
diff --git a/app/victoria-metrics/main.go b/app/victoria-metrics/main.go
deleted file mode 100644
index 0dead5cc0..000000000
--- a/app/victoria-metrics/main.go
+++ /dev/null
@@ -1,60 +0,0 @@
-package main
-
-import (
- "flag"
- "net/http"
- "time"
-
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert"
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect"
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
-)
-
-var httpListenAddr = flag.String("httpListenAddr", ":8428", "TCP address to listen for http connections")
-
-func main() {
- flag.Parse()
- buildinfo.Init()
- logger.Init()
- logger.Infof("starting VictoraMetrics at %q...", *httpListenAddr)
- startTime := time.Now()
- vmstorage.Init()
- vmselect.Init()
- vminsert.Init()
-
- go httpserver.Serve(*httpListenAddr, requestHandler)
- logger.Infof("started VictoriaMetrics in %s", time.Since(startTime))
-
- sig := procutil.WaitForSigterm()
- logger.Infof("received signal %s", sig)
-
- logger.Infof("gracefully shutting down webservice at %q", *httpListenAddr)
- startTime = time.Now()
- if err := httpserver.Stop(*httpListenAddr); err != nil {
- logger.Fatalf("cannot stop the webservice: %s", err)
- }
- vminsert.Stop()
- logger.Infof("successfully shut down the webservice in %s", time.Since(startTime))
-
- vmstorage.Stop()
- vmselect.Stop()
-
- logger.Infof("the VictoriaMetrics has been stopped in %s", time.Since(startTime))
-}
-
-func requestHandler(w http.ResponseWriter, r *http.Request) bool {
- if vminsert.RequestHandler(w, r) {
- return true
- }
- if vmselect.RequestHandler(w, r) {
- return true
- }
- if vmstorage.RequestHandler(w, r) {
- return true
- }
- return false
-}
diff --git a/app/vminsert/Makefile b/app/vminsert/Makefile
new file mode 100644
index 000000000..1a798b56f
--- /dev/null
+++ b/app/vminsert/Makefile
@@ -0,0 +1,31 @@
+# All these commands must run from repository root.
+
+run-vminsert:
+ DOCKER_OPTS='-p 8480:8480' \
+ APP_NAME=vminsert \
+ ARGS='-storageNode=localhost:8400' \
+ $(MAKE) run-via-docker
+
+vminsert:
+ APP_NAME=vminsert $(MAKE) app-local
+
+vminsert-race:
+ APP_NAME=vminsert RACE=-race $(MAKE) app-local
+
+vminsert-prod:
+ APP_NAME=vminsert $(MAKE) app-via-docker
+
+vminsert-prod-race:
+ APP_NAME=vminsert RACE=-race $(MAKE) app-via-docker
+
+package-vminsert:
+ APP_NAME=vminsert $(MAKE) package-via-docker
+
+package-vminsert-race:
+ APP_NAME=vminsert RACE=-race $(MAKE) package-via-docker
+
+publish-vminsert:
+ APP_NAME=vminsert $(MAKE) publish-via-docker
+
+publish-vminsert-race:
+ APP_NAME=vminsert RACE=-race $(MAKE) publish-via-docker
diff --git a/app/vminsert/README.md b/app/vminsert/README.md
index 050290fa6..02705f5b7 100644
--- a/app/vminsert/README.md
+++ b/app/vminsert/README.md
@@ -1 +1 @@
-`vminsert` routes the ingested data to `vmstorage`.
+`vminsert` routes the ingested data to `vmstorage` nodes.
diff --git a/app/vminsert/common/insert_ctx.go b/app/vminsert/common/insert_ctx.go
deleted file mode 100644
index 65fff3ec1..000000000
--- a/app/vminsert/common/insert_ctx.go
+++ /dev/null
@@ -1,106 +0,0 @@
-package common
-
-import (
- "fmt"
-
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
-)
-
-// InsertCtx contains common bits for data points insertion.
-type InsertCtx struct {
- Labels []prompb.Label
-
- mrs []storage.MetricRow
- metricNamesBuf []byte
-}
-
-// Reset resets ctx for future fill with rowsLen rows.
-func (ctx *InsertCtx) Reset(rowsLen int) {
- for _, label := range ctx.Labels {
- label.Name = nil
- label.Value = nil
- }
- ctx.Labels = ctx.Labels[:0]
-
- for i := range ctx.mrs {
- mr := &ctx.mrs[i]
- mr.MetricNameRaw = nil
- }
- ctx.mrs = ctx.mrs[:0]
-
- if n := rowsLen - cap(ctx.mrs); n > 0 {
- ctx.mrs = append(ctx.mrs[:cap(ctx.mrs)], make([]storage.MetricRow, n)...)
- }
- ctx.mrs = ctx.mrs[:rowsLen]
- ctx.metricNamesBuf = ctx.metricNamesBuf[:0]
-}
-
-func (ctx *InsertCtx) marshalMetricNameRaw(prefix []byte, labels []prompb.Label) []byte {
- start := len(ctx.metricNamesBuf)
- ctx.metricNamesBuf = append(ctx.metricNamesBuf, prefix...)
- ctx.metricNamesBuf = storage.MarshalMetricNameRaw(ctx.metricNamesBuf, labels)
- metricNameRaw := ctx.metricNamesBuf[start:]
- return metricNameRaw[:len(metricNameRaw):len(metricNameRaw)]
-}
-
-// WriteDataPoint writes (timestamp, value) with the given prefix and lables into ctx buffer.
-func (ctx *InsertCtx) WriteDataPoint(prefix []byte, labels []prompb.Label, timestamp int64, value float64) {
- metricNameRaw := ctx.marshalMetricNameRaw(prefix, labels)
- ctx.addRow(metricNameRaw, timestamp, value)
-}
-
-// WriteDataPointExt writes (timestamp, value) with the given metricNameRaw and labels into ctx buffer.
-//
-// It returns metricNameRaw for the given labels if len(metricNameRaw) == 0.
-func (ctx *InsertCtx) WriteDataPointExt(metricNameRaw []byte, labels []prompb.Label, timestamp int64, value float64) []byte {
- if len(metricNameRaw) == 0 {
- metricNameRaw = ctx.marshalMetricNameRaw(nil, labels)
- }
- ctx.addRow(metricNameRaw, timestamp, value)
- return metricNameRaw
-}
-
-func (ctx *InsertCtx) addRow(metricNameRaw []byte, timestamp int64, value float64) {
- mrs := ctx.mrs
- if cap(mrs) > len(mrs) {
- mrs = mrs[:len(mrs)+1]
- } else {
- mrs = append(mrs, storage.MetricRow{})
- }
- mr := &mrs[len(mrs)-1]
- ctx.mrs = mrs
- mr.MetricNameRaw = metricNameRaw
- mr.Timestamp = timestamp
- mr.Value = value
-}
-
-// AddLabel adds (name, value) label to ctx.Labels.
-//
-// name and value must exist until ctx.Labels is used.
-func (ctx *InsertCtx) AddLabel(name, value string) {
- labels := ctx.Labels
- if cap(labels) > len(labels) {
- labels = labels[:len(labels)+1]
- } else {
- labels = append(labels, prompb.Label{})
- }
- label := &labels[len(labels)-1]
-
- // Do not copy name and value contents for performance reasons.
- // This reduces GC overhead on the number of objects and allocations.
- label.Name = bytesutil.ToUnsafeBytes(name)
- label.Value = bytesutil.ToUnsafeBytes(value)
-
- ctx.Labels = labels
-}
-
-// FlushBufs flushes buffered rows to the underlying storage.
-func (ctx *InsertCtx) FlushBufs() error {
- if err := vmstorage.AddRows(ctx.mrs); err != nil {
- return fmt.Errorf("cannot store metrics: %s", err)
- }
- return nil
-}
diff --git a/app/victoria-metrics/deployment/Dockerfile b/app/vminsert/deployment/Dockerfile
similarity index 57%
rename from app/victoria-metrics/deployment/Dockerfile
rename to app/vminsert/deployment/Dockerfile
index f47a803a6..8d2b114b0 100644
--- a/app/victoria-metrics/deployment/Dockerfile
+++ b/app/vminsert/deployment/Dockerfile
@@ -1,5 +1,5 @@
FROM scratch
COPY --from=local/certs:1.0.2 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
-COPY bin/victoria-metrics-prod .
-EXPOSE 8428
-ENTRYPOINT ["/victoria-metrics-prod"]
+COPY bin/vminsert-prod .
+EXPOSE 8480
+ENTRYPOINT ["/vminsert-prod"]
diff --git a/app/vminsert/graphite/request_handler.go b/app/vminsert/graphite/request_handler.go
index 7c59d7cb2..0d82356a9 100644
--- a/app/vminsert/graphite/request_handler.go
+++ b/app/vminsert/graphite/request_handler.go
@@ -9,8 +9,9 @@ import (
"sync"
"time"
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/metrics"
)
@@ -20,27 +21,27 @@ var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="graphite"}`)
// insertHandler processes remote write for graphite plaintext protocol.
//
// See https://graphite.readthedocs.io/en/latest/feeding-carbon.html#the-plaintext-protocol
-func insertHandler(r io.Reader) error {
+func insertHandler(at *auth.Token, r io.Reader) error {
return concurrencylimiter.Do(func() error {
- return insertHandlerInternal(r)
+ return insertHandlerInternal(at, r)
})
}
-func insertHandlerInternal(r io.Reader) error {
+func insertHandlerInternal(at *auth.Token, r io.Reader) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
for ctx.Read(r) {
- if err := ctx.InsertRows(); err != nil {
+ if err := ctx.InsertRows(at); err != nil {
return err
}
}
return ctx.Error()
}
-func (ctx *pushCtx) InsertRows() error {
+func (ctx *pushCtx) InsertRows(at *auth.Token) error {
rows := ctx.Rows.Rows
ic := &ctx.Common
- ic.Reset(len(rows))
+ ic.Reset()
for i := range rows {
r := &rows[i]
ic.Labels = ic.Labels[:0]
@@ -49,7 +50,9 @@ func (ctx *pushCtx) InsertRows() error {
tag := &r.Tags[j]
ic.AddLabel(tag.Key, tag.Value)
}
- ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
+ if err := ic.WriteDataPoint(at, ic.Labels, r.Timestamp, r.Value); err != nil {
+ return err
+ }
}
rowsInserted.Add(len(rows))
return ic.FlushBufs()
@@ -110,7 +113,7 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
type pushCtx struct {
Rows Rows
- Common common.InsertCtx
+ Common netstorage.InsertCtx
reqBuf bytesutil.ByteBuffer
tailBuf []byte
@@ -128,7 +131,7 @@ func (ctx *pushCtx) Error() error {
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
- ctx.Common.Reset(0)
+ ctx.Common.Reset()
ctx.reqBuf.Reset()
ctx.tailBuf = ctx.tailBuf[:0]
diff --git a/app/vminsert/graphite/server.go b/app/vminsert/graphite/server.go
index 75879853b..38abb5afe 100644
--- a/app/vminsert/graphite/server.go
+++ b/app/vminsert/graphite/server.go
@@ -7,6 +7,7 @@ import (
"sync"
"time"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
@@ -70,7 +71,8 @@ func serveTCP(ln net.Listener) {
}
go func() {
writeRequestsTCP.Inc()
- if err := insertHandler(c); err != nil {
+ var at auth.Token // TODO: properly initialize auth token
+ if err := insertHandler(&at, c); err != nil {
writeErrorsTCP.Inc()
logger.Errorf("error in TCP Graphite conn %q<->%q: %s", c.LocalAddr(), c.RemoteAddr(), err)
}
@@ -88,6 +90,7 @@ func serveUDP(ln net.PacketConn) {
defer wg.Done()
var bb bytesutil.ByteBuffer
bb.B = bytesutil.Resize(bb.B, 64*1024)
+ var at auth.Token // TODO: properly initialize auth token
for {
bb.Reset()
bb.B = bb.B[:cap(bb.B)]
@@ -108,7 +111,7 @@ func serveUDP(ln net.PacketConn) {
}
bb.B = bb.B[:n]
writeRequestsUDP.Inc()
- if err := insertHandler(bb.NewReader()); err != nil {
+ if err := insertHandler(&at, bb.NewReader()); err != nil {
writeErrorsUDP.Inc()
logger.Errorf("error in UDP Graphite conn %q<->%q: %s", ln.LocalAddr(), addr, err)
continue
diff --git a/app/vminsert/influx/request_handler.go b/app/vminsert/influx/request_handler.go
index 2c713f66c..773f2b94f 100644
--- a/app/vminsert/influx/request_handler.go
+++ b/app/vminsert/influx/request_handler.go
@@ -10,8 +10,9 @@ import (
"sync"
"time"
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
@@ -22,13 +23,13 @@ var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="influx"}`)
// InsertHandler processes remote write for influx line protocol.
//
// See https://github.com/influxdata/influxdb/blob/4cbdc197b8117fee648d62e2e5be75c6575352f0/tsdb/README.md
-func InsertHandler(req *http.Request) error {
+func InsertHandler(at *auth.Token, req *http.Request) error {
return concurrencylimiter.Do(func() error {
- return insertHandlerInternal(req)
+ return insertHandlerInternal(at, req)
})
}
-func insertHandlerInternal(req *http.Request) error {
+func insertHandlerInternal(at *auth.Token, req *http.Request) error {
influxReadCalls.Inc()
r := req.Body
@@ -64,21 +65,17 @@ func insertHandlerInternal(req *http.Request) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
for ctx.Read(r, tsMultiplier) {
- if err := ctx.InsertRows(db); err != nil {
+ if err := ctx.InsertRows(at, db); err != nil {
return err
}
}
return ctx.Error()
}
-func (ctx *pushCtx) InsertRows(db string) error {
+func (ctx *pushCtx) InsertRows(at *auth.Token, db string) error {
rows := ctx.Rows.Rows
- rowsLen := 0
- for i := range rows {
- rowsLen += len(rows[i].Tags)
- }
ic := &ctx.Common
- ic.Reset(rowsLen)
+ ic.Reset()
for i := range rows {
r := &rows[i]
ic.Labels = ic.Labels[:0]
@@ -87,17 +84,25 @@ func (ctx *pushCtx) InsertRows(db string) error {
tag := &r.Tags[j]
ic.AddLabel(tag.Key, tag.Value)
}
- ctx.metricNameBuf = storage.MarshalMetricNameRaw(ctx.metricNameBuf[:0], ic.Labels)
+ ic.MetricNameBuf = storage.MarshalMetricNameRaw(ic.MetricNameBuf[:0], at.AccountID, at.ProjectID, ic.Labels)
+ metricNameBufLen := len(ic.MetricNameBuf)
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:0], r.Measurement...)
ctx.metricGroupBuf = append(ctx.metricGroupBuf, '.')
metricGroupPrefixLen := len(ctx.metricGroupBuf)
+ ic.Labels = ic.Labels[:0]
+ ic.AddLabel("", "placeholder")
+ placeholderLabel := &ic.Labels[len(ic.Labels)-1]
for j := range r.Fields {
f := &r.Fields[j]
ctx.metricGroupBuf = append(ctx.metricGroupBuf[:metricGroupPrefixLen], f.Key...)
metricGroup := bytesutil.ToUnsafeString(ctx.metricGroupBuf)
- ic.Labels = ic.Labels[:0]
+ ic.Labels = ic.Labels[:len(ic.Labels)-1]
ic.AddLabel("", metricGroup)
- ic.WriteDataPoint(ctx.metricNameBuf, ic.Labels[:1], r.Timestamp, f.Value)
+ ic.MetricNameBuf = storage.MarshalMetricLabelRaw(ic.MetricNameBuf[:metricNameBufLen], placeholderLabel)
+ storageNodeIdx := ic.GetStorageNodeIdx(at, ic.Labels)
+ if err := ic.WriteDataPointExt(at, storageNodeIdx, ic.MetricNameBuf, r.Timestamp, f.Value); err != nil {
+ return err
+ }
}
rowsInserted.Add(len(r.Fields))
}
@@ -189,12 +194,11 @@ var (
type pushCtx struct {
Rows Rows
- Common common.InsertCtx
+ Common netstorage.InsertCtx
reqBuf bytesutil.ByteBuffer
tailBuf []byte
copyBuf [16 * 1024]byte
- metricNameBuf []byte
metricGroupBuf []byte
err error
@@ -209,11 +213,9 @@ func (ctx *pushCtx) Error() error {
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
- ctx.Common.Reset(0)
-
+ ctx.Common.Reset()
ctx.reqBuf.Reset()
ctx.tailBuf = ctx.tailBuf[:0]
- ctx.metricNameBuf = ctx.metricNameBuf[:0]
ctx.metricGroupBuf = ctx.metricGroupBuf[:0]
ctx.err = nil
diff --git a/app/vminsert/main.go b/app/vminsert/main.go
index eff463b81..944b67f0e 100644
--- a/app/vminsert/main.go
+++ b/app/vminsert/main.go
@@ -1,68 +1,119 @@
-package vminsert
+package main
import (
"flag"
"fmt"
"net/http"
- "strings"
+ "time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/graphite"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/influx"
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/opentsdb"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/prometheus"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/metrics"
)
var (
graphiteListenAddr = flag.String("graphiteListenAddr", "", "TCP and UDP address to listen for Graphite plaintext data. Usually :2003 must be set. Doesn't work if empty")
opentsdbListenAddr = flag.String("opentsdbListenAddr", "", "TCP and UDP address to listen for OpentTSDB put messages. Usually :4242 must be set. Doesn't work if empty")
+ httpListenAddr = flag.String("httpListenAddr", ":8480", "Address to listen for http connections")
maxInsertRequestSize = flag.Int("maxInsertRequestSize", 32*1024*1024, "The maximum size of a single insert request in bytes")
+ storageNodes flagutil.Array
)
-// Init initializes vminsert.
-func Init() {
+func main() {
+ flag.Var(&storageNodes, "storageNode", "Vmstorage address, usage -storageNode=vmstorage-host1:8400 -storageNode=vmstorage-host2:8400")
+ flag.Parse()
+ buildinfo.Init()
+ logger.Init()
+
+ logger.Infof("initializing netstorage for storageNodes=%v...", storageNodes)
+ startTime := time.Now()
+ if len(storageNodes) == 0 {
+ logger.Fatalf("storageNodes cannot be empty")
+ }
+ netstorage.InitStorageNodes(storageNodes)
+ logger.Infof("successfully initialized netstorage in %s", time.Since(startTime))
+
if len(*graphiteListenAddr) > 0 {
go graphite.Serve(*graphiteListenAddr)
}
if len(*opentsdbListenAddr) > 0 {
go opentsdb.Serve(*opentsdbListenAddr)
}
-}
-// Stop stops vminsert.
-func Stop() {
+ go func() {
+ httpserver.Serve(*httpListenAddr, requestHandler)
+ }()
+
+ sig := procutil.WaitForSigterm()
+ logger.Infof("service received signal %s", sig)
+
+ logger.Infof("gracefully shutting down the service at %q", *httpListenAddr)
+ startTime = time.Now()
+ if err := httpserver.Stop(*httpListenAddr); err != nil {
+ logger.Fatalf("cannot stop the service: %s", err)
+ }
+ logger.Infof("successfully shut down the service in %s", time.Since(startTime))
+
if len(*graphiteListenAddr) > 0 {
graphite.Stop()
}
if len(*opentsdbListenAddr) > 0 {
opentsdb.Stop()
}
+
+ logger.Infof("shutting down neststorage...")
+ startTime = time.Now()
+ netstorage.Stop()
+ logger.Infof("successfully stopped netstorage in %s", time.Since(startTime))
+
+ logger.Infof("the vminsert has been stopped")
}
-// RequestHandler is a handler for Prometheus remote storage write API
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
- path := strings.Replace(r.URL.Path, "//", "/", -1)
- switch path {
- case "/api/v1/write":
+func requestHandler(w http.ResponseWriter, r *http.Request) bool {
+ p, err := httpserver.ParsePath(r.URL.Path)
+ if err != nil {
+ httpserver.Errorf(w, "cannot parse path %q: %s", r.URL.Path, err)
+ return true
+ }
+ if p.Prefix != "insert" {
+ // This is not our link.
+ return false
+ }
+ at, err := auth.NewToken(p.AuthToken)
+ if err != nil {
+ httpserver.Errorf(w, "auth error: %s", err)
+ return true
+ }
+
+ switch p.Suffix {
+ case "prometheus/", "prometheus":
prometheusWriteRequests.Inc()
- if err := prometheus.InsertHandler(r, int64(*maxInsertRequestSize)); err != nil {
+ if err := prometheus.InsertHandler(at, r, int64(*maxInsertRequestSize)); err != nil {
prometheusWriteErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
- case "/write", "/api/v2/write":
+ case "influx/write", "influx/api/v2/write":
influxWriteRequests.Inc()
- if err := influx.InsertHandler(r); err != nil {
+ if err := influx.InsertHandler(at, r); err != nil {
influxWriteErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
}
w.WriteHeader(http.StatusNoContent)
return true
- case "/query":
+ case "influx/query":
// Emulate fake response for influx query
influxQueryRequests.Inc()
fmt.Fprintf(w, `{"results":[{"series":[{"values":[]}]}]}`)
@@ -74,11 +125,11 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
var (
- prometheusWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/write", protocol="prometheus"}`)
- prometheusWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/write", protocol="prometheus"}`)
+ prometheusWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/prometheus/", protocol="prometheus"}`)
+ prometheusWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/prometheus/", protocol="prometheus"}`)
- influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/write", protocol="influx"}`)
- influxWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/write", protocol="influx"}`)
+ influxWriteRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/influx/", protocol="influx"}`)
+ influxWriteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/insert/{}/influx/", protocol="influx"}`)
- influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/query", protocol="influx"}`)
+ influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/insert/{}/influx/query", protocol="influx"}`)
)
diff --git a/app/vminsert/netstorage/insert_ctx.go b/app/vminsert/netstorage/insert_ctx.go
new file mode 100644
index 000000000..738e52f05
--- /dev/null
+++ b/app/vminsert/netstorage/insert_ctx.go
@@ -0,0 +1,134 @@
+package netstorage
+
+import (
+ "fmt"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+ xxhash "github.com/cespare/xxhash/v2"
+ jump "github.com/lithammer/go-jump-consistent-hash"
+)
+
+// InsertCtx is a generic context for inserting data
+type InsertCtx struct {
+ Labels []prompb.Label
+ MetricNameBuf []byte
+
+ bufs [][]byte
+ labelsBuf []byte
+ sizeBuf [8]byte
+}
+
+// Reset resets ctx.
+func (ctx *InsertCtx) Reset() {
+ for _, label := range ctx.Labels {
+ label.Name = nil
+ label.Value = nil
+ }
+ ctx.Labels = ctx.Labels[:0]
+ ctx.MetricNameBuf = ctx.MetricNameBuf[:0]
+
+ if ctx.bufs == nil {
+ ctx.bufs = make([][]byte, len(storageNodes))
+ }
+ for i := range ctx.bufs {
+ ctx.bufs[i] = ctx.bufs[i][:0]
+ }
+ ctx.labelsBuf = ctx.labelsBuf[:0]
+}
+
+// AddLabel adds (name, value) label to ctx.Labels.
+//
+// name and value must exist until ctx.Labels is used.
+func (ctx *InsertCtx) AddLabel(name, value string) {
+ labels := ctx.Labels
+ if cap(labels) > len(labels) {
+ labels = labels[:len(labels)+1]
+ } else {
+ labels = append(labels, prompb.Label{})
+ }
+ label := &labels[len(labels)-1]
+
+ // Do not copy name and value contents for performance reasons.
+ // This reduces GC overhead on the number of objects and allocations.
+ label.Name = bytesutil.ToUnsafeBytes(name)
+ label.Value = bytesutil.ToUnsafeBytes(value)
+
+ ctx.Labels = labels
+}
+
+// WriteDataPoint writes (timestamp, value) data point with the given at and labels to ctx buffer.
+func (ctx *InsertCtx) WriteDataPoint(at *auth.Token, labels []prompb.Label, timestamp int64, value float64) error {
+ ctx.MetricNameBuf = storage.MarshalMetricNameRaw(ctx.MetricNameBuf[:0], at.AccountID, at.ProjectID, labels)
+ storageNodeIdx := ctx.GetStorageNodeIdx(at, labels)
+ return ctx.WriteDataPointExt(at, storageNodeIdx, ctx.MetricNameBuf, timestamp, value)
+}
+
+// WriteDataPointExt writes the given metricNameRaw with (timestmap, value) to ctx buffer with the given storageNodeIdx.
+func (ctx *InsertCtx) WriteDataPointExt(at *auth.Token, storageNodeIdx int, metricNameRaw []byte, timestamp int64, value float64) error {
+ buf := ctx.bufs[storageNodeIdx]
+ sn := storageNodes[storageNodeIdx]
+ bufNew := storage.MarshalMetricRow(buf, metricNameRaw, timestamp, value)
+ if len(bufNew) >= consts.MaxInsertPacketSize {
+ // Send buf to storageNode, since it is too big.
+ if err := sn.sendWithFallback(buf, ctx.sizeBuf[:]); err != nil {
+ return fmt.Errorf("cannot send %d bytes to storageNodes: %s", len(buf), err)
+ }
+ buf = storage.MarshalMetricRow(bufNew[:0], metricNameRaw, timestamp, value)
+ } else {
+ buf = bufNew
+ }
+ ctx.bufs[storageNodeIdx] = buf
+ sn.RowsPushed.Inc()
+ return nil
+}
+
+// FlushBufs flushes ctx bufs to remote storage nodes.
+func (ctx *InsertCtx) FlushBufs() error {
+ // Send per-storageNode bufs.
+ sizeBuf := ctx.sizeBuf[:]
+ for i, buf := range ctx.bufs {
+ if len(buf) == 0 {
+ continue
+ }
+ sn := storageNodes[i]
+ if err := sn.sendWithFallback(buf, sizeBuf); err != nil {
+ return fmt.Errorf("cannot send data to storageNodes: %s", err)
+ }
+ }
+ return nil
+}
+
+// GetStorageNodeIdx returns storage node index for the given at and labels.
+//
+// The returned index must be passed to WriteDataPoint.
+func (ctx *InsertCtx) GetStorageNodeIdx(at *auth.Token, labels []prompb.Label) int {
+ if len(storageNodes) == 1 {
+ // Fast path - only a single storage node.
+ return 0
+ }
+
+ buf := ctx.labelsBuf[:0]
+ buf = encoding.MarshalUint32(buf, at.AccountID)
+ buf = encoding.MarshalUint32(buf, at.ProjectID)
+ for i := range labels {
+ label := &labels[i]
+ buf = marshalBytesFast(buf, label.Name)
+ buf = marshalBytesFast(buf, label.Value)
+ }
+ h := xxhash.Sum64(buf)
+ ctx.labelsBuf = buf
+
+ idx := int(jump.Hash(h, int32(len(storageNodes))))
+ return idx
+}
+
+func marshalBytesFast(dst []byte, s []byte) []byte {
+ dst = encoding.MarshalUint16(dst, uint16(len(s)))
+ dst = append(dst, s...)
+ return dst
+}
diff --git a/app/vminsert/netstorage/netstorage.go b/app/vminsert/netstorage/netstorage.go
new file mode 100644
index 000000000..1ef785172
--- /dev/null
+++ b/app/vminsert/netstorage/netstorage.go
@@ -0,0 +1,215 @@
+package netstorage
+
+import (
+ "fmt"
+ "sync"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
+ "github.com/VictoriaMetrics/metrics"
+)
+
+// sendWithFallback sends buf to storage node sn.
+//
+// It falls back to sending data to another storage node if sn is currently
+// unavailable.
+func (sn *storageNode) sendWithFallback(buf []byte, sizeBuf []byte) error {
+ deadline := time.Now().Add(30 * time.Second)
+ err := sn.sendBuf(buf, deadline, sizeBuf)
+ if err == nil {
+ return nil
+ }
+
+ // Failed to send the data to sn. Try sending it to another storageNodes.
+ if time.Until(deadline) <= 0 {
+ sn.timeouts.Inc()
+ return err
+ }
+ if len(storageNodes) == 1 {
+ return err
+ }
+ idx := func() int {
+ for i, snOther := range storageNodes {
+ if sn == snOther {
+ return i
+ }
+ }
+ logger.Panicf("BUG: cannot find storageNode %p in storageNodes %p", sn, storageNodes)
+ return -1
+ }()
+ for i := 0; i < len(storageNodes); i++ {
+ idx++
+ if idx >= len(storageNodes) {
+ idx = 0
+ }
+ err = storageNodes[idx].sendBuf(buf, deadline, sizeBuf)
+ if err == nil {
+ storageNodes[idx].fallbacks.Inc()
+ return nil
+ }
+ if time.Until(deadline) <= 0 {
+ sn.timeouts.Inc()
+ return err
+ }
+ }
+ return err
+}
+
+func (sn *storageNode) sendBuf(buf []byte, deadline time.Time, sizeBuf []byte) error {
+ // sizeBuf guarantees that the rows batch will be either fully
+ // read or fully discarded on the vmstorage.
+ // sizeBuf is used for read optimization in vmstorage.
+ encoding.MarshalUint64(sizeBuf[:0], uint64(len(buf)))
+
+ sn.bcLock.Lock()
+ defer sn.bcLock.Unlock()
+
+ if sn.bc == nil {
+ if err := sn.dial(); err != nil {
+ return fmt.Errorf("cannot dial %q: %s", sn.dialer.Addr(), err)
+ }
+ }
+
+ if err := sn.sendBufNolock(buf, deadline, sizeBuf); err != nil {
+ sn.closeConn()
+ return err
+ }
+ return nil
+}
+
+func (sn *storageNode) sendBufNolock(buf []byte, deadline time.Time, sizeBuf []byte) error {
+ if err := sn.bc.SetWriteDeadline(deadline); err != nil {
+ return fmt.Errorf("cannot set write deadline to %s: %s", deadline, err)
+ }
+ if _, err := sn.bc.Write(sizeBuf); err != nil {
+ return fmt.Errorf("cannot write data size %d: %s", len(buf), err)
+ }
+ if _, err := sn.bc.Write(buf); err != nil {
+ return fmt.Errorf("cannot write data: %s", err)
+ }
+ return nil
+}
+
+func (sn *storageNode) dial() error {
+ c, err := sn.dialer.Dial()
+ if err != nil {
+ sn.dialErrors.Inc()
+ return err
+ }
+
+ compressionLevel := 1
+ bc, err := handshake.VMInsertClient(c, compressionLevel)
+ if err != nil {
+ _ = c.Close()
+ sn.handshakeErrors.Inc()
+ return fmt.Errorf("handshake error: %s", err)
+ }
+
+ sn.bc = bc
+ return nil
+}
+
+func (sn *storageNode) closeConn() {
+ _ = sn.bc.Close()
+ sn.bc = nil
+ sn.connectionErrors.Inc()
+}
+
+func (sn *storageNode) run() {
+ mustStop := false
+ for !mustStop {
+ select {
+ case <-stopCh:
+ mustStop = true
+ case <-time.After(time.Second):
+ }
+
+ sn.bcLock.Lock()
+ if err := sn.flushNolock(); err != nil {
+ sn.closeConn()
+ logger.Errorf("cannot flush data to storageNode %q: %s", sn.dialer.Addr(), err)
+ }
+ sn.bcLock.Unlock()
+ }
+}
+
+func (sn *storageNode) flushNolock() error {
+ if sn.bc == nil {
+ return nil
+ }
+ if err := sn.bc.SetWriteDeadline(time.Now().Add(30 * time.Second)); err != nil {
+ return fmt.Errorf("cannot set write deadline: %s", err)
+ }
+ return sn.bc.Flush()
+}
+
+// storageNode is a client sending data to storage node.
+type storageNode struct {
+ dialer *netutil.TCPDialer
+
+ bc *handshake.BufferedConn
+ bcLock sync.Mutex
+
+ // The number of times the storage node was timed out (overflown).
+ timeouts *metrics.Counter
+
+ // The number of dial errors to storage node.
+ dialErrors *metrics.Counter
+
+ // The number of handshake errors to storage node.
+ handshakeErrors *metrics.Counter
+
+ // The number of connection errors to storage node.
+ connectionErrors *metrics.Counter
+
+ // The number of fallbacks to this node.
+ fallbacks *metrics.Counter
+
+ // The number of rows pushed to storage node.
+ RowsPushed *metrics.Counter
+}
+
+// storageNodes contains a list of storage node clients.
+var storageNodes []*storageNode
+
+var storageNodesWG sync.WaitGroup
+
+var stopCh = make(chan struct{})
+
+// InitStorageNodes initializes storage nodes' connections to the given addrs.
+func InitStorageNodes(addrs []string) {
+ if len(addrs) == 0 {
+ logger.Panicf("BUG: addrs must be non-empty")
+ }
+ if len(addrs) > 255 {
+ logger.Panicf("BUG: too much addresses: %d; max supported %d addresses", len(addrs), 255)
+ }
+
+ for _, addr := range addrs {
+ sn := &storageNode{
+ dialer: netutil.NewTCPDialer("vminsert", addr),
+
+ timeouts: metrics.NewCounter(fmt.Sprintf(`vm_rpc_timeouts_total{name="vminsert", addr=%q}`, addr)),
+ dialErrors: metrics.NewCounter(fmt.Sprintf(`vm_rpc_dial_errors_total{name="vminsert", addr=%q}`, addr)),
+ handshakeErrors: metrics.NewCounter(fmt.Sprintf(`vm_rpc_handshake_errors_total{name="vminsert", addr=%q}`, addr)),
+ connectionErrors: metrics.NewCounter(fmt.Sprintf(`vm_rpc_connection_errors_total{name="vminsert", addr=%q}`, addr)),
+ fallbacks: metrics.NewCounter(fmt.Sprintf(`vm_rpc_fallbacks_total{name="vminsert", addr=%q}`, addr)),
+ RowsPushed: metrics.NewCounter(fmt.Sprintf(`vm_rpc_rows_pushed_total{name="vminsert", addr=%q}`, addr)),
+ }
+ storageNodes = append(storageNodes, sn)
+ storageNodesWG.Add(1)
+ go func(addr string) {
+ sn.run()
+ storageNodesWG.Done()
+ }(addr)
+ }
+}
+
+// Stop gracefully stops netstorage.
+func Stop() {
+ close(stopCh)
+ storageNodesWG.Wait()
+}
diff --git a/app/vminsert/opentsdb/request_handler.go b/app/vminsert/opentsdb/request_handler.go
index eef981a5f..8eff6a95e 100644
--- a/app/vminsert/opentsdb/request_handler.go
+++ b/app/vminsert/opentsdb/request_handler.go
@@ -9,8 +9,9 @@ import (
"sync"
"time"
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/metrics"
)
@@ -20,27 +21,27 @@ var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="opentsdb"}`)
// insertHandler processes remote write for OpenTSDB put protocol.
//
// See http://opentsdb.net/docs/build/html/api_telnet/put.html
-func insertHandler(r io.Reader) error {
+func insertHandler(at *auth.Token, r io.Reader) error {
return concurrencylimiter.Do(func() error {
- return insertHandlerInternal(r)
+ return insertHandlerInternal(at, r)
})
}
-func insertHandlerInternal(r io.Reader) error {
+func insertHandlerInternal(at *auth.Token, r io.Reader) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
for ctx.Read(r) {
- if err := ctx.InsertRows(); err != nil {
+ if err := ctx.InsertRows(at); err != nil {
return err
}
}
return ctx.Error()
}
-func (ctx *pushCtx) InsertRows() error {
+func (ctx *pushCtx) InsertRows(at *auth.Token) error {
rows := ctx.Rows.Rows
ic := &ctx.Common
- ic.Reset(len(rows))
+ ic.Reset()
for i := range rows {
r := &rows[i]
ic.Labels = ic.Labels[:0]
@@ -49,7 +50,9 @@ func (ctx *pushCtx) InsertRows() error {
tag := &r.Tags[j]
ic.AddLabel(tag.Key, tag.Value)
}
- ic.WriteDataPoint(nil, ic.Labels, r.Timestamp, r.Value)
+ if err := ic.WriteDataPoint(at, ic.Labels, r.Timestamp, r.Value); err != nil {
+ return err
+ }
}
rowsInserted.Add(len(rows))
return ic.FlushBufs()
@@ -110,7 +113,7 @@ func (ctx *pushCtx) Read(r io.Reader) bool {
type pushCtx struct {
Rows Rows
- Common common.InsertCtx
+ Common netstorage.InsertCtx
reqBuf bytesutil.ByteBuffer
tailBuf []byte
@@ -128,7 +131,7 @@ func (ctx *pushCtx) Error() error {
func (ctx *pushCtx) reset() {
ctx.Rows.Reset()
- ctx.Common.Reset(0)
+ ctx.Common.Reset()
ctx.reqBuf.Reset()
ctx.tailBuf = ctx.tailBuf[:0]
diff --git a/app/vminsert/opentsdb/server.go b/app/vminsert/opentsdb/server.go
index b7f37a0ce..d39f524b3 100644
--- a/app/vminsert/opentsdb/server.go
+++ b/app/vminsert/opentsdb/server.go
@@ -7,6 +7,7 @@ import (
"sync"
"time"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
@@ -70,7 +71,8 @@ func serveTCP(ln net.Listener) {
}
go func() {
writeRequestsTCP.Inc()
- if err := insertHandler(c); err != nil {
+ var at auth.Token // TODO: properly initialize the auth token
+ if err := insertHandler(&at, c); err != nil {
writeErrorsTCP.Inc()
logger.Errorf("error in TCP OpenTSDB conn %q<->%q: %s", c.LocalAddr(), c.RemoteAddr(), err)
}
@@ -88,6 +90,7 @@ func serveUDP(ln net.PacketConn) {
defer wg.Done()
var bb bytesutil.ByteBuffer
bb.B = bytesutil.Resize(bb.B, 64*1024)
+ var at auth.Token // TODO: properly initialize the auth token
for {
bb.Reset()
bb.B = bb.B[:cap(bb.B)]
@@ -108,7 +111,7 @@ func serveUDP(ln net.PacketConn) {
}
bb.B = bb.B[:n]
writeRequestsUDP.Inc()
- if err := insertHandler(bb.NewReader()); err != nil {
+ if err := insertHandler(&at, bb.NewReader()); err != nil {
writeErrorsUDP.Inc()
logger.Errorf("error in UDP OpenTSDB conn %q<->%q: %s", ln.LocalAddr(), addr, err)
continue
diff --git a/app/vminsert/prometheus/request_handler.go b/app/vminsert/prometheus/request_handler.go
index ab544afac..e34a23bb5 100644
--- a/app/vminsert/prometheus/request_handler.go
+++ b/app/vminsert/prometheus/request_handler.go
@@ -6,40 +6,45 @@ import (
"runtime"
"sync"
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/common"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/concurrencylimiter"
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vminsert/netstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
)
var rowsInserted = metrics.NewCounter(`vm_rows_inserted_total{type="prometheus"}`)
// InsertHandler processes remote write for prometheus.
-func InsertHandler(r *http.Request, maxSize int64) error {
+func InsertHandler(at *auth.Token, r *http.Request, maxSize int64) error {
return concurrencylimiter.Do(func() error {
- return insertHandlerInternal(r, maxSize)
+ return insertHandlerInternal(at, r, maxSize)
})
}
-func insertHandlerInternal(r *http.Request, maxSize int64) error {
+func insertHandlerInternal(at *auth.Token, r *http.Request, maxSize int64) error {
ctx := getPushCtx()
defer putPushCtx(ctx)
if err := ctx.Read(r, maxSize); err != nil {
return err
}
- timeseries := ctx.req.Timeseries
- rowsLen := 0
- for i := range timeseries {
- rowsLen += len(timeseries[i].Samples)
- }
+
ic := &ctx.Common
- ic.Reset(rowsLen)
+ ic.Reset()
+ timeseries := ctx.req.Timeseries
for i := range timeseries {
ts := ×eries[i]
- var metricNameRaw []byte
+ storageNodeIdx := ic.GetStorageNodeIdx(at, ts.Labels)
+ ic.MetricNameBuf = ic.MetricNameBuf[:0]
for i := range ts.Samples {
r := &ts.Samples[i]
- metricNameRaw = ic.WriteDataPointExt(metricNameRaw, ts.Labels, r.Timestamp, r.Value)
+ if len(ic.MetricNameBuf) == 0 {
+ ic.MetricNameBuf = storage.MarshalMetricNameRaw(ic.MetricNameBuf[:0], at.AccountID, at.ProjectID, ts.Labels)
+ }
+ if err := ic.WriteDataPointExt(at, storageNodeIdx, ic.MetricNameBuf, r.Timestamp, r.Value); err != nil {
+ return err
+ }
}
rowsInserted.Add(len(ts.Samples))
}
@@ -47,14 +52,14 @@ func insertHandlerInternal(r *http.Request, maxSize int64) error {
}
type pushCtx struct {
- Common common.InsertCtx
+ Common netstorage.InsertCtx
req prompb.WriteRequest
reqBuf []byte
}
func (ctx *pushCtx) reset() {
- ctx.Common.Reset(0)
+ ctx.Common.Reset()
ctx.req.Reset()
ctx.reqBuf = ctx.reqBuf[:0]
}
diff --git a/app/vmselect/Makefile b/app/vmselect/Makefile
new file mode 100644
index 000000000..c175ba74c
--- /dev/null
+++ b/app/vmselect/Makefile
@@ -0,0 +1,32 @@
+# All these commands must run from repository root.
+
+run-vmselect:
+ mkdir -p vmselect-cache
+ DOCKER_OPTS='-v $(shell pwd)/vmselect-cache:/cache -p 8481:8481' \
+ APP_NAME=vmselect \
+ ARGS='-storageNode=localhost:8401 -selectNode=localhost:8481 -cacheDataPath=/cache' \
+ $(MAKE) run-via-docker
+
+vmselect:
+ APP_NAME=vmselect $(MAKE) app-local
+
+vmselect-race:
+ APP_NAME=vmselect RACE=-race $(MAKE) app-local
+
+vmselect-prod:
+ APP_NAME=vmselect $(MAKE) app-via-docker
+
+vmselect-prod-race:
+ APP_NAME=vmselect RACE=-race $(MAKE) app-via-docker
+
+package-vmselect:
+ APP_NAME=vmselect $(MAKE) package-via-docker
+
+package-vmselect-race:
+ APP_NAME=vmselect RACE=-race $(MAKE) package-via-docker
+
+publish-vmselect:
+ APP_NAME=vmselect $(MAKE) publish-via-docker
+
+publish-vmselect-race:
+ APP_NAME=vmselect RACE=-race $(MAKE) publish-via-docker
diff --git a/app/vmselect/README.md b/app/vmselect/README.md
index 1335e9407..1d1ed69cf 100644
--- a/app/vmselect/README.md
+++ b/app/vmselect/README.md
@@ -1,2 +1,6 @@
-`vmselect` performs the incoming queries and fetches the required data
-from `vmstorage`.
+`vmselect` performs the following tasks:
+
+- Splits incoming selects to tasks for `vmstorage` nodes and issues these tasks
+ to all the `vmstorage` nodes in the cluster.
+
+- Merges responses from all the `vmstorage` nodes and returns a single response.
diff --git a/app/vmselect/deployment/Dockerfile b/app/vmselect/deployment/Dockerfile
new file mode 100644
index 000000000..bbaef9028
--- /dev/null
+++ b/app/vmselect/deployment/Dockerfile
@@ -0,0 +1,5 @@
+FROM scratch
+COPY --from=local/certs:1.0.2 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vmselect-prod .
+EXPOSE 8481
+ENTRYPOINT ["/vmselect-prod"]
diff --git a/app/vmselect/main.go b/app/vmselect/main.go
index 4561abec4..9f344f05a 100644
--- a/app/vmselect/main.go
+++ b/app/vmselect/main.go
@@ -1,4 +1,4 @@
-package vmselect
+package main
import (
"flag"
@@ -10,37 +10,78 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/metrics"
)
var (
- deleteAuthKey = flag.String("deleteAuthKey", "", "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series")
+ httpListenAddr = flag.String("httpListenAddr", ":8481", "Address to listen for http connections")
+ cacheDataPath = flag.String("cacheDataPath", "", "Path to directory for cache files. Cache isn't saved if empty")
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", runtime.GOMAXPROCS(-1)*2, "The maximum number of concurrent search requests. It shouldn't exceed 2*vCPUs for better performance. See also -search.maxQueueDuration")
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
+
+ storageNodes flagutil.Array
)
-// Init initializes vmselect
-func Init() {
- tmpDirPath := *vmstorage.DataPath + "/tmp"
- fs.RemoveDirContents(tmpDirPath)
- netstorage.InitTmpBlocksDir(tmpDirPath)
- promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")
+func main() {
+ flag.Var(&storageNodes, "storageNode", "Vmstorage address, usage -storageNode=vmstorage-host1:8401 -storageNode=vmstorage-host2:8401")
+ flag.Parse()
+ buildinfo.Init()
+ logger.Init()
+
+ logger.Infof("starting netstorage at storageNodes=%v", storageNodes)
+ startTime := time.Now()
+ if len(storageNodes) == 0 {
+ logger.Fatalf("storageNodes cannot be empty")
+ }
+ netstorage.InitStorageNodes(storageNodes)
+ logger.Infof("started netstorage in %s", time.Since(startTime))
+
+ if len(*cacheDataPath) > 0 {
+ tmpDataPath := *cacheDataPath + "/tmp"
+ fs.RemoveDirContents(tmpDataPath)
+ netstorage.InitTmpBlocksDir(tmpDataPath)
+ promql.InitRollupResultCache(*cacheDataPath + "/rollupResult")
+ } else {
+ netstorage.InitTmpBlocksDir("")
+ promql.InitRollupResultCache("")
+ }
concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
+
+ go func() {
+ httpserver.Serve(*httpListenAddr, requestHandler)
+ }()
+
+ sig := procutil.WaitForSigterm()
+ logger.Infof("service received signal %s", sig)
+
+ logger.Infof("gracefully shutting down the service at %q", *httpListenAddr)
+ startTime = time.Now()
+ if err := httpserver.Stop(*httpListenAddr); err != nil {
+ logger.Fatalf("cannot stop the service: %s", err)
+ }
+ logger.Infof("successfully shut down the service in %s", time.Since(startTime))
+
+ logger.Infof("shutting down neststorage...")
+ startTime = time.Now()
+ netstorage.Stop()
+ if len(*cacheDataPath) > 0 {
+ promql.StopRollupResultCache()
+ }
+ logger.Infof("successfully stopped netstorage in %s", time.Since(startTime))
+
+ logger.Infof("the vmselect has been stopped")
}
var concurrencyCh chan struct{}
-// Stop stops vmselect
-func Stop() {
- promql.StopRollupResultCache()
-}
-
-// RequestHandler handles remote read API requests for Prometheus
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
+func requestHandler(w http.ResponseWriter, r *http.Request) bool {
// Limit the number of concurrent queries.
// Sleep for a second until giving up. This should resolve short bursts in requests.
t := time.NewTimer(*maxQueueDuration)
@@ -53,14 +94,41 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
- path := strings.Replace(r.URL.Path, "//", "/", -1)
- if strings.HasPrefix(path, "/api/v1/label/") {
- s := r.URL.Path[len("/api/v1/label/"):]
+ path := r.URL.Path
+ if path == "/internal/resetRollupResultCache" {
+ promql.ResetRollupResultCache()
+ return true
+ }
+
+ p, err := httpserver.ParsePath(path)
+ if err != nil {
+ httpserver.Errorf(w, "cannot parse path %q: %s", path, err)
+ return true
+ }
+ at, err := auth.NewToken(p.AuthToken)
+ if err != nil {
+ httpserver.Errorf(w, "auth error: %s", err)
+ return true
+ }
+ switch p.Prefix {
+ case "select":
+ return selectHandler(w, r, p, at)
+ case "delete":
+ return deleteHandler(w, r, p, at)
+ default:
+ // This is not our link
+ return false
+ }
+}
+
+func selectHandler(w http.ResponseWriter, r *http.Request, p *httpserver.Path, at *auth.Token) bool {
+ if strings.HasPrefix(p.Suffix, "prometheus/api/v1/label/") {
+ s := p.Suffix[len("prometheus/api/v1/label/"):]
if strings.HasSuffix(s, "/values") {
labelValuesRequests.Inc()
labelName := s[:len(s)-len("/values")]
httpserver.EnableCORS(w, r)
- if err := prometheus.LabelValuesHandler(labelName, w, r); err != nil {
+ if err := prometheus.LabelValuesHandler(at, labelName, w, r); err != nil {
labelValuesErrors.Inc()
sendPrometheusError(w, r, err)
return true
@@ -69,76 +137,78 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
}
}
- switch path {
- case "/api/v1/query":
+ switch p.Suffix {
+ case "prometheus/api/v1/query":
queryRequests.Inc()
httpserver.EnableCORS(w, r)
- if err := prometheus.QueryHandler(w, r); err != nil {
+ if err := prometheus.QueryHandler(at, w, r); err != nil {
queryErrors.Inc()
sendPrometheusError(w, r, err)
return true
}
return true
- case "/api/v1/query_range":
+ case "prometheus/api/v1/query_range":
queryRangeRequests.Inc()
httpserver.EnableCORS(w, r)
- if err := prometheus.QueryRangeHandler(w, r); err != nil {
+ if err := prometheus.QueryRangeHandler(at, w, r); err != nil {
queryRangeErrors.Inc()
sendPrometheusError(w, r, err)
return true
}
return true
- case "/api/v1/series":
+ case "prometheus/api/v1/series":
seriesRequests.Inc()
httpserver.EnableCORS(w, r)
- if err := prometheus.SeriesHandler(w, r); err != nil {
+ if err := prometheus.SeriesHandler(at, w, r); err != nil {
seriesErrors.Inc()
sendPrometheusError(w, r, err)
return true
}
return true
- case "/api/v1/series/count":
+ case "prometheus/api/v1/series/count":
seriesCountRequests.Inc()
httpserver.EnableCORS(w, r)
- if err := prometheus.SeriesCountHandler(w, r); err != nil {
+ if err := prometheus.SeriesCountHandler(at, w, r); err != nil {
seriesCountErrors.Inc()
sendPrometheusError(w, r, err)
return true
}
return true
- case "/api/v1/labels":
+ case "prometheus/api/v1/labels":
labelsRequests.Inc()
httpserver.EnableCORS(w, r)
- if err := prometheus.LabelsHandler(w, r); err != nil {
+ if err := prometheus.LabelsHandler(at, w, r); err != nil {
labelsErrors.Inc()
sendPrometheusError(w, r, err)
return true
}
return true
- case "/api/v1/export":
+ case "prometheus/api/v1/export":
exportRequests.Inc()
- if err := prometheus.ExportHandler(w, r); err != nil {
+ if err := prometheus.ExportHandler(at, w, r); err != nil {
exportErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
}
return true
- case "/federate":
+ case "prometheus/federate":
federateRequests.Inc()
- if err := prometheus.FederateHandler(w, r); err != nil {
+ if err := prometheus.FederateHandler(at, w, r); err != nil {
federateErrors.Inc()
- httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)
+ httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
}
return true
- case "/api/v1/admin/tsdb/delete_series":
+ default:
+ return false
+ }
+}
+
+func deleteHandler(w http.ResponseWriter, r *http.Request, p *httpserver.Path, at *auth.Token) bool {
+ switch p.Suffix {
+ case "prometheus/api/v1/admin/tsdb/delete_series":
deleteRequests.Inc()
- authKey := r.FormValue("authKey")
- if authKey != *deleteAuthKey {
- httpserver.Errorf(w, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)
- return true
- }
- if err := prometheus.DeleteHandler(r); err != nil {
+ if err := prometheus.DeleteHandler(at, r); err != nil {
deleteErrors.Inc()
httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
return true
@@ -160,30 +230,30 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
}
var (
- labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/label/{}/values"}`)
- labelValuesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/label/{}/values"}`)
+ labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/label/{}/values"}`)
+ labelValuesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="select/{}/prometheus/api/v1/label/{}/values"}`)
- queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query"}`)
- queryErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query"}`)
+ queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/query"}`)
+ queryErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/query"}`)
- queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query_range"}`)
- queryRangeErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query_range"}`)
+ queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/prometheus/api/v1/query_range"}`)
+ queryRangeErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/query_range"}`)
- seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series"}`)
- seriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series"}`)
+ seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/series"}`)
+ seriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/series"}`)
- seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series/count"}`)
- seriesCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series/count"}`)
+ seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/series/count"}`)
+ seriesCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/series/count"}`)
- labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels"}`)
- labelsErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels"}`)
+ labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/labels"}`)
+ labelsErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/labels"}`)
- deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
- deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)
+ deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)
+ deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)
- exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export"}`)
- exportErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export"}`)
+ exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/export"}`)
+ exportErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/export"}`)
- federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/federate"}`)
- federateErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/federate"}`)
+ federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/federate"}`)
+ federateErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/federate"}`)
)
diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go
index f0a3cf728..022421251 100644
--- a/app/vmselect/netstorage/netstorage.go
+++ b/app/vmselect/netstorage/netstorage.go
@@ -2,28 +2,24 @@ package netstorage
import (
"container/heap"
- "flag"
"fmt"
+ "io"
"runtime"
"sort"
"sync"
- "sync/atomic"
"time"
- "github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
)
-var (
- maxTagKeysPerSearch = flag.Int("search.maxTagKeys", 10e3, "The maximum number of tag keys returned per search")
- maxTagValuesPerSearch = flag.Int("search.maxTagValues", 10e3, "The maximum number of tag values returned per search")
- maxMetricsPerSearch = flag.Int("search.maxUniqueTimeseries", 100e3, "The maximum number of unique time series each search can scan")
-)
-
// Result is a single timeseries result.
//
// ProcessSearchQuery returns Result slice.
@@ -49,6 +45,7 @@ func (r *Result) reset() {
// Results holds results returned from ProcessSearchQuery.
type Results struct {
+ at *auth.Token
tr storage.TimeRange
deadline Deadline
@@ -102,7 +99,7 @@ func (rss *Results) RunParallel(f func(rs *Result)) error {
err = fmt.Errorf("timeout exceeded during query execution: %s", rss.deadline.Timeout)
break
}
- if err = pts.Unpack(rss.tbf, rs, rss.tr, maxWorkersCount); err != nil {
+ if err = pts.Unpack(rss.tbf, rs, rss.tr, rss.at, maxWorkersCount); err != nil {
break
}
if len(rs.Timestamps) == 0 {
@@ -148,7 +145,7 @@ type packedTimeseries struct {
}
// Unpack unpacks pts to dst.
-func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.TimeRange, maxWorkersCount int) error {
+func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.TimeRange, at *auth.Token, maxWorkersCount int) error {
dst.reset()
if err := dst.MetricName.Unmarshal(bytesutil.ToUnsafeBytes(pts.metricName)); err != nil {
@@ -175,7 +172,7 @@ func (pts *packedTimeseries) Unpack(tbf *tmpBlocksFile, dst *Result, tr storage.
var err error
for addr := range workCh {
sb := getSortBlock()
- if err = sb.unpackFrom(tbf, addr, tr); err != nil {
+ if err = sb.unpackFrom(tbf, addr, tr, at); err != nil {
break
}
@@ -294,7 +291,7 @@ func (sb *sortBlock) reset() {
sb.NextIdx = 0
}
-func (sb *sortBlock) unpackFrom(tbf *tmpBlocksFile, addr tmpBlockAddr, tr storage.TimeRange) error {
+func (sb *sortBlock) unpackFrom(tbf *tmpBlocksFile, addr tmpBlockAddr, tr storage.TimeRange, at *auth.Token) error {
tbf.MustReadBlockAt(&sb.b, addr)
if err := sb.b.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal block: %s", err)
@@ -352,21 +349,104 @@ func (sbh *sortBlocksHeap) Pop() interface{} {
return v
}
-// DeleteSeries deletes time series matching the given tagFilterss.
-func DeleteSeries(sq *storage.SearchQuery) (int, error) {
- tfss, err := setupTfss(sq.TagFilterss)
- if err != nil {
- return 0, err
+// DeleteSeries deletes time series matching the given sq.
+func DeleteSeries(at *auth.Token, sq *storage.SearchQuery, deadline Deadline) (int, error) {
+ requestData := sq.Marshal(nil)
+
+ // Send the query to all the storage nodes in parallel.
+ type nodeResult struct {
+ deletedCount int
+ err error
}
- return vmstorage.DeleteMetrics(tfss)
+ resultsCh := make(chan nodeResult, len(storageNodes))
+ for _, sn := range storageNodes {
+ go func(sn *storageNode) {
+ sn.deleteSeriesRequests.Inc()
+ deletedCount, err := sn.deleteMetrics(requestData, deadline)
+ if err != nil {
+ sn.deleteSeriesRequestErrors.Inc()
+ }
+ resultsCh <- nodeResult{
+ deletedCount: deletedCount,
+ err: err,
+ }
+ }(sn)
+ }
+
+ // Collect results
+ deletedTotal := 0
+ var errors []error
+ for i := 0; i < len(storageNodes); i++ {
+ // There is no need in timer here, since all the goroutines executing
+ // sn.deleteMetrics must be finished until the deadline.
+ nr := <-resultsCh
+ if nr.err != nil {
+ errors = append(errors, nr.err)
+ continue
+ }
+ deletedTotal += nr.deletedCount
+ }
+ if len(errors) > 0 {
+ // Return only the first error, since it has no sense in returning all errors.
+ return deletedTotal, fmt.Errorf("error occured during deleting time series: %s", errors[0])
+ }
+ return deletedTotal, nil
}
// GetLabels returns labels until the given deadline.
-func GetLabels(deadline Deadline) ([]string, error) {
- labels, err := vmstorage.SearchTagKeys(*maxTagKeysPerSearch)
- if err != nil {
- return nil, fmt.Errorf("error during labels search: %s", err)
+func GetLabels(at *auth.Token, deadline Deadline) ([]string, bool, error) {
+ // Send the query to all the storage nodes in parallel.
+ type nodeResult struct {
+ labels []string
+ err error
}
+ resultsCh := make(chan nodeResult, len(storageNodes))
+ for _, sn := range storageNodes {
+ go func(sn *storageNode) {
+ sn.labelsRequests.Inc()
+ labels, err := sn.getLabels(at.AccountID, at.ProjectID, deadline)
+ if err != nil {
+ sn.labelsRequestErrors.Inc()
+ err = fmt.Errorf("cannot get labels from vmstorage %s: %s", sn.connPool.Addr(), err)
+ }
+ resultsCh <- nodeResult{
+ labels: labels,
+ err: err,
+ }
+ }(sn)
+ }
+
+ // Collect results
+ var labels []string
+ var errors []error
+ for i := 0; i < len(storageNodes); i++ {
+ // There is no need in timer here, since all the goroutines executing
+ // sn.getLabels must be finished until the deadline.
+ nr := <-resultsCh
+ if nr.err != nil {
+ errors = append(errors, nr.err)
+ continue
+ }
+ labels = append(labels, nr.labels...)
+ }
+ isPartialResult := false
+ if len(errors) > 0 {
+ if len(labels) == 0 {
+ // Return only the first error, since it has no sense in returning all errors.
+ return nil, true, fmt.Errorf("error occured during fetching labels: %s", errors[0])
+ }
+
+ // Just log errors and return partial results.
+ // This allows gracefully degrade vmselect in the case
+ // if certain storageNodes are temporarily unavailable.
+ partialLabelsResults.Inc()
+ // Log only the first error, since it has no sense in returning all errors.
+ logger.Errorf("certain storageNodes are unhealthy when fetching labels: %s", errors[0])
+ isPartialResult = true
+ }
+
+ // Deduplicate labels
+ labels = deduplicateStrings(labels)
// Substitute "" with "__name__"
for i := range labels {
@@ -378,101 +458,217 @@ func GetLabels(deadline Deadline) ([]string, error) {
// Sort labels like Prometheus does
sort.Strings(labels)
- return labels, nil
+ return labels, isPartialResult, nil
}
// GetLabelValues returns label values for the given labelName
// until the given deadline.
-func GetLabelValues(labelName string, deadline Deadline) ([]string, error) {
+func GetLabelValues(at *auth.Token, labelName string, deadline Deadline) ([]string, bool, error) {
if labelName == "__name__" {
labelName = ""
}
- // Search for tag values
- labelValues, err := vmstorage.SearchTagValues([]byte(labelName), *maxTagValuesPerSearch)
- if err != nil {
- return nil, fmt.Errorf("error during label values search for labelName=%q: %s", labelName, err)
+ // Send the query to all the storage nodes in parallel.
+ type nodeResult struct {
+ labelValues []string
+ err error
}
+ resultsCh := make(chan nodeResult, len(storageNodes))
+ for _, sn := range storageNodes {
+ go func(sn *storageNode) {
+ sn.labelValuesRequests.Inc()
+ labelValues, err := sn.getLabelValues(at.AccountID, at.ProjectID, labelName, deadline)
+ if err != nil {
+ sn.labelValuesRequestErrors.Inc()
+ err = fmt.Errorf("cannot get label values from vmstorage %s: %s", sn.connPool.Addr(), err)
+ }
+ resultsCh <- nodeResult{
+ labelValues: labelValues,
+ err: err,
+ }
+ }(sn)
+ }
+
+ // Collect results
+ var labelValues []string
+ var errors []error
+ for i := 0; i < len(storageNodes); i++ {
+ // There is no need in timer here, since all the goroutines executing
+ // sn.getLabelValues must be finished until the deadline.
+ nr := <-resultsCh
+ if nr.err != nil {
+ errors = append(errors, nr.err)
+ continue
+ }
+ labelValues = append(labelValues, nr.labelValues...)
+ }
+ isPartialResult := false
+ if len(errors) > 0 {
+ if len(labelValues) == 0 {
+ // Return only the first error, since it has no sense in returning all errors.
+ return nil, true, fmt.Errorf("error occured during fetching labels: %s", errors[0])
+ }
+
+ // Just log errors and return partial results.
+ // This allows gracefully degrade vmselect in the case
+ // if certain storageNodes are temporarily unavailable.
+ partialLabelValuesResults.Inc()
+ // Log only the first error, since it has no sense in returning all errors.
+ logger.Errorf("certain storageNodes are unhealthy when fetching labels: %s", errors[0])
+ isPartialResult = true
+ }
+
+ // Deduplicate labels
+ labelValues = deduplicateStrings(labelValues)
// Sort labelValues like Prometheus does
sort.Strings(labelValues)
- return labelValues, nil
+ return labelValues, isPartialResult, nil
}
-// GetSeriesCount returns the number of unique series.
-func GetSeriesCount(deadline Deadline) (uint64, error) {
- n, err := vmstorage.GetSeriesCount()
- if err != nil {
- return 0, fmt.Errorf("error during series count request: %s", err)
+func deduplicateStrings(a []string) []string {
+ m := make(map[string]bool, len(a))
+ for _, s := range a {
+ m[s] = true
}
- return n, nil
-}
-
-func getStorageSearch() *storage.Search {
- v := ssPool.Get()
- if v == nil {
- return &storage.Search{}
+ a = a[:0]
+ for s := range m {
+ a = append(a, s)
}
- return v.(*storage.Search)
+ return a
}
-func putStorageSearch(sr *storage.Search) {
- n := atomic.LoadUint64(&sr.MissingMetricNamesForMetricID)
- missingMetricNamesForMetricID.Add(int(n))
- sr.MustClose()
- ssPool.Put(sr)
+// GetSeriesCount returns the number of unique series for the given at.
+func GetSeriesCount(at *auth.Token, deadline Deadline) (uint64, bool, error) {
+ // Send the query to all the storage nodes in parallel.
+ type nodeResult struct {
+ n uint64
+ err error
+ }
+ resultsCh := make(chan nodeResult, len(storageNodes))
+ for _, sn := range storageNodes {
+ go func(sn *storageNode) {
+ sn.seriesCountRequests.Inc()
+ n, err := sn.getSeriesCount(at.AccountID, at.ProjectID, deadline)
+ if err != nil {
+ sn.seriesCountRequestErrors.Inc()
+ err = fmt.Errorf("cannot get series count from vmstorage %s: %s", sn.connPool.Addr(), err)
+ }
+ resultsCh <- nodeResult{
+ n: n,
+ err: err,
+ }
+ }(sn)
+ }
+
+ // Collect results
+ var n uint64
+ var errors []error
+ for i := 0; i < len(storageNodes); i++ {
+ // There is no need in timer here, since all the goroutines executing
+ // sn.getSeriesCount must be finished until the deadline.
+ nr := <-resultsCh
+ if nr.err != nil {
+ errors = append(errors, nr.err)
+ continue
+ }
+ n += nr.n
+ }
+ isPartialResult := false
+ if len(errors) > 0 {
+ if n == 0 {
+ // Return only the first error, since it has no sense in returning all errors.
+ return 0, true, fmt.Errorf("error occured during fetching series count: %s", errors[0])
+ }
+
+ // Just log errors and return partial results.
+ // This allows gracefully degrade vmselect in the case
+ // if certain storageNodes are temporarily unavailable.
+ partialSeriesCountResults.Inc()
+ // Log only the first error, since it has no sense in returning all errors.
+ logger.Errorf("certain storageNodes are unhealthy when fetching series count: %s", errors[0])
+ isPartialResult = true
+ }
+
+ return n, isPartialResult, nil
}
-var ssPool sync.Pool
-
-var missingMetricNamesForMetricID = metrics.NewCounter(`vm_missing_metric_names_for_metric_id_total`)
-
// ProcessSearchQuery performs sq on storage nodes until the given deadline.
-func ProcessSearchQuery(sq *storage.SearchQuery, deadline Deadline) (*Results, error) {
- // Setup search.
- tfss, err := setupTfss(sq.TagFilterss)
- if err != nil {
- return nil, err
+func ProcessSearchQuery(at *auth.Token, sq *storage.SearchQuery, deadline Deadline) (*Results, bool, error) {
+ requestData := sq.Marshal(nil)
+
+ // Send the query to all the storage nodes in parallel.
+ type nodeResult struct {
+ results []*storage.MetricBlock
+ err error
}
+ resultsCh := make(chan nodeResult, len(storageNodes))
tr := storage.TimeRange{
MinTimestamp: sq.MinTimestamp,
MaxTimestamp: sq.MaxTimestamp,
}
+ for _, sn := range storageNodes {
+ go func(sn *storageNode) {
+ sn.searchRequests.Inc()
+ results, err := sn.processSearchQuery(requestData, tr, deadline)
+ if err != nil {
+ sn.searchRequestErrors.Inc()
+ err = fmt.Errorf("cannot perform search on vmstorage %s: %s", sn.connPool.Addr(), err)
+ }
+ resultsCh <- nodeResult{
+ results: results,
+ err: err,
+ }
+ }(sn)
+ }
- vmstorage.WG.Add(1)
- defer vmstorage.WG.Done()
-
- sr := getStorageSearch()
- defer putStorageSearch(sr)
- sr.Init(vmstorage.Storage, tfss, tr, *maxMetricsPerSearch)
-
+ // Collect results.
+ var errors []error
tbf := getTmpBlocksFile()
m := make(map[string][]tmpBlockAddr)
- for sr.NextMetricBlock() {
- addr, err := tbf.WriteBlock(sr.MetricBlock.Block)
- if err != nil {
- putTmpBlocksFile(tbf)
- return nil, fmt.Errorf("cannot write data to temporary blocks file: %s", err)
+ for i := 0; i < len(storageNodes); i++ {
+ // There is no need in timer here, since all the goroutines executing
+ // sn.processSearchQuery must be finished until the deadline.
+ nr := <-resultsCh
+ if nr.err != nil {
+ errors = append(errors, nr.err)
+ continue
}
- if time.Until(deadline.Deadline) < 0 {
- putTmpBlocksFile(tbf)
- return nil, fmt.Errorf("timeout exceeded while fetching data from storage: %s", deadline.Timeout)
+ for _, mb := range nr.results {
+ addr, err := tbf.WriteBlock(mb.Block)
+ if err != nil {
+ errors = append(errors, fmt.Errorf("cannot write data to temporary blocks file: %s", err))
+ break
+ }
+ metricName := mb.MetricName
+ m[string(metricName)] = append(m[string(metricName)], addr)
}
- metricName := sr.MetricBlock.MetricName
- m[string(metricName)] = append(m[string(metricName)], addr)
}
- if err := sr.Error(); err != nil {
- putTmpBlocksFile(tbf)
- return nil, fmt.Errorf("search error: %s", err)
+ isPartialResult := false
+ if len(errors) > 0 {
+ if len(m) == 0 {
+ // Return only the first error, since it has no sense in returning all errors.
+ putTmpBlocksFile(tbf)
+ return nil, true, fmt.Errorf("error occured during search: %s", errors[0])
+ }
+
+ // Just log errors and return partial results.
+ // This allows gracefully degrade vmselect in the case
+ // if certain storageNodes are temporarily unavailable.
+ partialSearchResults.Inc()
+ // Log only the first error, since it has no sense in returning all errors.
+ logger.Errorf("certain storageNodes are unhealthy during search: %s", errors[0])
+ isPartialResult = true
}
if err := tbf.Finalize(); err != nil {
putTmpBlocksFile(tbf)
- return nil, fmt.Errorf("cannot finalize temporary blocks file: %s", err)
+ return nil, false, fmt.Errorf("cannot finalize temporary blocks file: %s", err)
}
var rss Results
rss.packedTimeseries = make([]packedTimeseries, len(m))
+ rss.at = at
rss.tr = tr
rss.deadline = deadline
rss.tbf = tbf
@@ -483,9 +679,491 @@ func ProcessSearchQuery(sq *storage.SearchQuery, deadline Deadline) (*Results, e
pts.metricName = metricName
pts.addrs = addrs
}
- return &rss, nil
+
+ return &rss, isPartialResult, nil
}
+type storageNode struct {
+ connPool *netutil.ConnPool
+
+ // The channel for limiting the maximum number of concurrent queries to storageNode.
+ concurrentQueriesCh chan struct{}
+
+ // The number of DeleteSeries requests to storageNode.
+ deleteSeriesRequests *metrics.Counter
+
+ // The number of DeleteSeries request errors to storageNode.
+ deleteSeriesRequestErrors *metrics.Counter
+
+ // The number of requests to labels.
+ labelsRequests *metrics.Counter
+
+ // The number of errors during requests to labels.
+ labelsRequestErrors *metrics.Counter
+
+ // The number of requests to labelValues.
+ labelValuesRequests *metrics.Counter
+
+ // The number of errors during requests to labelValues.
+ labelValuesRequestErrors *metrics.Counter
+
+ // The number of requests to seriesCount.
+ seriesCountRequests *metrics.Counter
+
+ // The number of errors during requests to seriesCount.
+ seriesCountRequestErrors *metrics.Counter
+
+ // The number of search requests to storageNode.
+ searchRequests *metrics.Counter
+
+ // The number of search request errors to storageNode.
+ searchRequestErrors *metrics.Counter
+
+ // The number of metric blocks read.
+ metricBlocksRead *metrics.Counter
+
+ // The number of read metric rows.
+ metricRowsRead *metrics.Counter
+}
+
+func (sn *storageNode) deleteMetrics(requestData []byte, deadline Deadline) (int, error) {
+ var deletedCount int
+ f := func(bc *handshake.BufferedConn) error {
+ n, err := sn.deleteMetricsOnConn(bc, requestData)
+ if err != nil {
+ return err
+ }
+ deletedCount += n
+ return nil
+ }
+ if err := sn.execOnConn("deleteMetrics_v2", f, deadline); err != nil {
+ // Try again before giving up.
+ // There is no need in zeroing deletedCount.
+ if err = sn.execOnConn("deleteMetrics_v2", f, deadline); err != nil {
+ return deletedCount, err
+ }
+ }
+ return deletedCount, nil
+}
+
+func (sn *storageNode) getLabels(accountID, projectID uint32, deadline Deadline) ([]string, error) {
+ var labels []string
+ f := func(bc *handshake.BufferedConn) error {
+ ls, err := sn.getLabelsOnConn(bc, accountID, projectID)
+ if err != nil {
+ return err
+ }
+ labels = ls
+ return nil
+ }
+ if err := sn.execOnConn("labels", f, deadline); err != nil {
+ // Try again before giving up.
+ labels = nil
+ if err = sn.execOnConn("labels", f, deadline); err != nil {
+ return nil, err
+ }
+ }
+ return labels, nil
+}
+
+func (sn *storageNode) getLabelValues(accountID, projectID uint32, labelName string, deadline Deadline) ([]string, error) {
+ var labelValues []string
+ f := func(bc *handshake.BufferedConn) error {
+ lvs, err := sn.getLabelValuesOnConn(bc, accountID, projectID, labelName)
+ if err != nil {
+ return err
+ }
+ labelValues = lvs
+ return nil
+ }
+ if err := sn.execOnConn("labelValues", f, deadline); err != nil {
+ // Try again before giving up.
+ labelValues = nil
+ if err = sn.execOnConn("labelValues", f, deadline); err != nil {
+ return nil, err
+ }
+ }
+ return labelValues, nil
+}
+
+func (sn *storageNode) getSeriesCount(accountID, projectID uint32, deadline Deadline) (uint64, error) {
+ var n uint64
+ f := func(bc *handshake.BufferedConn) error {
+ nn, err := sn.getSeriesCountOnConn(bc, accountID, projectID)
+ if err != nil {
+ return err
+ }
+ n = nn
+ return nil
+ }
+ if err := sn.execOnConn("seriesCount", f, deadline); err != nil {
+ // Try again before giving up.
+ n = 0
+ if err = sn.execOnConn("seriesCount", f, deadline); err != nil {
+ return 0, err
+ }
+ }
+ return n, nil
+}
+
+func (sn *storageNode) processSearchQuery(requestData []byte, tr storage.TimeRange, deadline Deadline) ([]*storage.MetricBlock, error) {
+ var results []*storage.MetricBlock
+ f := func(bc *handshake.BufferedConn) error {
+ rs, err := sn.processSearchQueryOnConn(bc, requestData, tr)
+ if err != nil {
+ return err
+ }
+ results = rs
+ return nil
+ }
+ if err := sn.execOnConn("search_v2", f, deadline); err != nil {
+ // Try again before giving up.
+ results = nil
+ if err = sn.execOnConn("search_v2", f, deadline); err != nil {
+ return nil, err
+ }
+ }
+ return results, nil
+}
+
+func (sn *storageNode) execOnConn(rpcName string, f func(bc *handshake.BufferedConn) error, deadline Deadline) error {
+ select {
+ case sn.concurrentQueriesCh <- struct{}{}:
+ default:
+ return fmt.Errorf("too many concurrent queries (more than %d)", cap(sn.concurrentQueriesCh))
+ }
+ defer func() {
+ <-sn.concurrentQueriesCh
+ }()
+
+ bc, err := sn.connPool.Get()
+ if err != nil {
+ return fmt.Errorf("cannot obtain connection from a pool: %s", err)
+ }
+ if err := bc.SetDeadline(deadline.Deadline); err != nil {
+ _ = bc.Close()
+ logger.Panicf("FATAL: cannot set connection deadline: %s", err)
+ }
+ if err := writeBytes(bc, []byte(rpcName)); err != nil {
+ // Close the connection instead of returning it to the pool,
+ // since it may be broken.
+ _ = bc.Close()
+ return fmt.Errorf("cannot send rpcName=%q to the server: %s", rpcName, err)
+ }
+
+ if err := f(bc); err != nil {
+ remoteAddr := bc.RemoteAddr()
+ if _, ok := err.(*errRemote); ok {
+ // Remote error. The connection may be re-used. Return it to the pool.
+ sn.connPool.Put(bc)
+ } else {
+ // Local error.
+ // Close the connection instead of returning it to the pool,
+ // since it may be broken.
+ _ = bc.Close()
+ }
+ return fmt.Errorf("cannot execute rpcName=%q on vmstorage %q with timeout %s: %s", rpcName, remoteAddr, deadline.Timeout, err)
+ }
+ // Return the connection back to the pool, assuming it is healthy.
+ sn.connPool.Put(bc)
+ return nil
+}
+
+type errRemote struct {
+ msg string
+}
+
+func (er *errRemote) Error() string {
+ return er.msg
+}
+
+func (sn *storageNode) deleteMetricsOnConn(bc *handshake.BufferedConn, requestData []byte) (int, error) {
+ // Send the request to sn
+ if err := writeBytes(bc, requestData); err != nil {
+ return 0, fmt.Errorf("cannot send deleteMetrics request to conn: %s", err)
+ }
+ if err := bc.Flush(); err != nil {
+ return 0, fmt.Errorf("cannot flush deleteMetrics request to conn: %s", err)
+ }
+
+ // Read response error.
+ buf, err := readBytes(nil, bc, maxErrorMessageSize)
+ if err != nil {
+ return 0, fmt.Errorf("cannot read error message: %s", err)
+ }
+ if len(buf) > 0 {
+ return 0, &errRemote{msg: string(buf)}
+ }
+
+ // Read deletedCount
+ deletedCount, err := readUint64(bc)
+ if err != nil {
+ return 0, fmt.Errorf("cannot read deletedCount value: %s", err)
+ }
+ return int(deletedCount), nil
+}
+
+const maxLabelsSize = 16 * 1024 * 1024
+
+func (sn *storageNode) getLabelsOnConn(bc *handshake.BufferedConn, accountID, projectID uint32) ([]string, error) {
+ // Send the request to sn.
+ if err := writeUint32(bc, accountID); err != nil {
+ return nil, fmt.Errorf("cannot send accountID=%d to conn: %s", accountID, err)
+ }
+ if err := writeUint32(bc, projectID); err != nil {
+ return nil, fmt.Errorf("cannot send projectID=%d to conn: %s", projectID, err)
+ }
+ if err := bc.Flush(); err != nil {
+ return nil, fmt.Errorf("cannot flush request to conn: %s", err)
+ }
+
+ // Read response error.
+ buf, err := readBytes(nil, bc, maxErrorMessageSize)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read error message: %s", err)
+ }
+ if len(buf) > 0 {
+ return nil, &errRemote{msg: string(buf)}
+ }
+
+ // Read response
+ var labels []string
+ for {
+ buf, err = readBytes(buf[:0], bc, maxLabelsSize)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read labels: %s", err)
+ }
+ if len(buf) == 0 {
+ // Reached the end of the response
+ return labels, nil
+ }
+ labels = append(labels, string(buf))
+ }
+}
+
+const maxLabelValueSize = 16 * 1024 * 1024
+
+func (sn *storageNode) getLabelValuesOnConn(bc *handshake.BufferedConn, accountID, projectID uint32, labelName string) ([]string, error) {
+ // Send the request to sn.
+ if err := writeUint32(bc, accountID); err != nil {
+ return nil, fmt.Errorf("cannot send accountID=%d to conn: %s", accountID, err)
+ }
+ if err := writeUint32(bc, projectID); err != nil {
+ return nil, fmt.Errorf("cannot send projectID=%d to conn: %s", projectID, err)
+ }
+ if err := writeBytes(bc, []byte(labelName)); err != nil {
+ return nil, fmt.Errorf("cannot send labelName=%q to conn: %s", labelName, err)
+ }
+ if err := bc.Flush(); err != nil {
+ return nil, fmt.Errorf("cannot flush labelName to conn: %s", err)
+ }
+
+ // Read response error.
+ buf, err := readBytes(nil, bc, maxErrorMessageSize)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read error message: %s", err)
+ }
+ if len(buf) > 0 {
+ return nil, &errRemote{msg: string(buf)}
+ }
+
+ // Read response
+ var labelValues []string
+ for {
+ buf, err = readBytes(buf[:0], bc, maxLabelValueSize)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read labelValue: %s", err)
+ }
+ if len(buf) == 0 {
+ // Reached the end of the response
+ return labelValues, nil
+ }
+ labelValues = append(labelValues, string(buf))
+ }
+}
+
+func (sn *storageNode) getSeriesCountOnConn(bc *handshake.BufferedConn, accountID, projectID uint32) (uint64, error) {
+ // Send the request to sn.
+ if err := writeUint32(bc, accountID); err != nil {
+ return 0, fmt.Errorf("cannot send accountID=%d to conn: %s", accountID, err)
+ }
+ if err := writeUint32(bc, projectID); err != nil {
+ return 0, fmt.Errorf("cannot send projectID=%d to conn: %s", projectID, err)
+ }
+ if err := bc.Flush(); err != nil {
+ return 0, fmt.Errorf("cannot flush labelName to conn: %s", err)
+ }
+
+ // Read response error.
+ buf, err := readBytes(nil, bc, maxErrorMessageSize)
+ if err != nil {
+ return 0, fmt.Errorf("cannot read error message: %s", err)
+ }
+ if len(buf) > 0 {
+ return 0, &errRemote{msg: string(buf)}
+ }
+
+ // Read response
+ n, err := readUint64(bc)
+ if err != nil {
+ return 0, fmt.Errorf("cannot read series count: %s", err)
+ }
+ return n, nil
+}
+
+// maxMetricBlockSize is the maximum size of serialized MetricBlock.
+const maxMetricBlockSize = 1024 * 1024
+
+// maxErrorMessageSize is the maximum size of error message received
+// from vmstorage.
+const maxErrorMessageSize = 64 * 1024
+
+func (sn *storageNode) processSearchQueryOnConn(bc *handshake.BufferedConn, requestData []byte, tr storage.TimeRange) ([]*storage.MetricBlock, error) {
+ // Send the request to sn.
+ if err := writeBytes(bc, requestData); err != nil {
+ return nil, fmt.Errorf("cannot write requestData: %s", err)
+ }
+ if err := bc.Flush(); err != nil {
+ return nil, fmt.Errorf("cannot flush requestData to conn: %s", err)
+ }
+
+ var err error
+ var buf []byte
+
+ // Read response error.
+ buf, err = readBytes(buf[:0], bc, maxErrorMessageSize)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read error message: %s", err)
+ }
+ if len(buf) > 0 {
+ return nil, &errRemote{msg: string(buf)}
+ }
+
+ // Read response. It may consist of multiple MetricBlocks.
+ var results []*storage.MetricBlock
+ metricBlocksRead := 0
+ for {
+ buf, err = readBytes(buf[:0], bc, maxMetricBlockSize)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read MetricBlock #%d: %s", metricBlocksRead, err)
+ }
+ if len(buf) == 0 {
+ // Reached the end of the response
+ return results, nil
+ }
+ var mb storage.MetricBlock
+ mb.Block = &storage.Block{}
+ tail, err := mb.Unmarshal(buf)
+ if err != nil {
+ return nil, fmt.Errorf("cannot unmarshal MetricBlock: %s", err)
+ }
+ if len(tail) != 0 {
+ return nil, fmt.Errorf("non-empty tail after unmarshaling MetricBlock: (len=%d) %q", len(tail), tail)
+ }
+ metricBlocksRead++
+ sn.metricBlocksRead.Inc()
+ sn.metricRowsRead.Add(mb.Block.RowsCount())
+ results = append(results, &mb)
+ }
+}
+
+func writeBytes(bc *handshake.BufferedConn, buf []byte) error {
+ sizeBuf := encoding.MarshalUint64(nil, uint64(len(buf)))
+ if _, err := bc.Write(sizeBuf); err != nil {
+ return err
+ }
+ if _, err := bc.Write(buf); err != nil {
+ return err
+ }
+ return nil
+}
+
+func writeUint32(bc *handshake.BufferedConn, n uint32) error {
+ buf := encoding.MarshalUint32(nil, n)
+ if _, err := bc.Write(buf); err != nil {
+ return err
+ }
+ return nil
+}
+
+func readBytes(buf []byte, bc *handshake.BufferedConn, maxDataSize int) ([]byte, error) {
+ buf = bytesutil.Resize(buf, 8)
+ if _, err := io.ReadFull(bc, buf); err != nil {
+ return buf, fmt.Errorf("error read data size: %s", err)
+ }
+ dataSize := encoding.UnmarshalUint64(buf)
+ if dataSize > uint64(maxDataSize) {
+ return buf, fmt.Errorf("too big data size: %d; it mustn't exceed %d bytes", dataSize, maxDataSize)
+ }
+ buf = bytesutil.Resize(buf, int(dataSize))
+ if dataSize == 0 {
+ return buf, nil
+ }
+ if _, err := io.ReadFull(bc, buf); err != nil {
+ return buf, fmt.Errorf("cannot read data with size %d: %s", dataSize, err)
+ }
+ return buf, nil
+}
+
+func readUint64(bc *handshake.BufferedConn) (uint64, error) {
+ var buf [8]byte
+ if _, err := io.ReadFull(bc, buf[:]); err != nil {
+ return 0, fmt.Errorf("cannot read uint64: %s", err)
+ }
+ n := encoding.UnmarshalUint64(buf[:])
+ return n, nil
+}
+
+var storageNodes []*storageNode
+
+// InitStorageNodes initializes storage nodes' connections to the given addrs.
+func InitStorageNodes(addrs []string) {
+ if len(addrs) == 0 {
+ logger.Panicf("BUG: addrs must be non-empty")
+ }
+
+ for _, addr := range addrs {
+ sn := &storageNode{
+ // There is no need in requests compression, since they are usually very small.
+ connPool: netutil.NewConnPool("vmselect", addr, handshake.VMSelectClient, 0),
+
+ concurrentQueriesCh: make(chan struct{}, maxConcurrentQueriesPerStorageNode),
+
+ deleteSeriesRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="deleteSeries", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ deleteSeriesRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="deleteSeries", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ labelsRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="labels", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ labelsRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="labels", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ labelValuesRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="labelValues", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ labelValuesRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="labelValues", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ seriesCountRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="seriesCount", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ seriesCountRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="seriesCount", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ searchRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="search", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ searchRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="search", type="rpcClient", name="vmselect", addr=%q}`, addr)),
+ metricBlocksRead: metrics.NewCounter(fmt.Sprintf(`vm_metric_blocks_read_total{name="vmselect", addr=%q}`, addr)),
+ metricRowsRead: metrics.NewCounter(fmt.Sprintf(`vm_metric_rows_read_total{name="vmselect", addr=%q}`, addr)),
+ }
+ metrics.NewGauge(fmt.Sprintf(`vm_concurrent_queries{name="vmselect", addr=%q}`, addr), func() float64 {
+ return float64(len(sn.concurrentQueriesCh))
+ })
+ storageNodes = append(storageNodes, sn)
+ }
+}
+
+// Stop gracefully stops netstorage.
+func Stop() {
+ // Nothing to do at the moment.
+}
+
+var (
+ partialLabelsResults = metrics.NewCounter(`vm_partial_labels_results_total{name="vmselect"}`)
+ partialLabelValuesResults = metrics.NewCounter(`vm_partial_label_values_results_total{name="vmselect"}`)
+ partialSeriesCountResults = metrics.NewCounter(`vm_partial_series_count_results_total{name="vmselect"}`)
+ partialSearchResults = metrics.NewCounter(`vm_partial_search_results_total{name="vmselect"}`)
+)
+
+// The maximum number of concurrent queries per storageNode.
+const maxConcurrentQueriesPerStorageNode = 100
+
func getResult() *Result {
v := rsPool.Get()
if v == nil {
@@ -505,21 +1183,6 @@ func putResult(rs *Result) {
var rsPool sync.Pool
-func setupTfss(tagFilterss [][]storage.TagFilter) ([]*storage.TagFilters, error) {
- tfss := make([]*storage.TagFilters, 0, len(tagFilterss))
- for _, tagFilters := range tagFilterss {
- tfs := storage.NewTagFilters()
- for i := range tagFilters {
- tf := &tagFilters[i]
- if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
- return nil, fmt.Errorf("cannot parse tag filter %s: %s", tf, err)
- }
- }
- tfss = append(tfss, tfs)
- }
- return tfss, nil
-}
-
// Deadline contains deadline with the corresponding timeout for pretty error messages.
type Deadline struct {
Deadline time.Time
diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go
index 14b581ffc..f16f3579d 100644
--- a/app/vmselect/prometheus/prometheus.go
+++ b/app/vmselect/prometheus/prometheus.go
@@ -12,6 +12,9 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
"github.com/valyala/quicktemplate"
@@ -20,8 +23,14 @@ import (
var (
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum time for search query execution")
maxQueryLen = flag.Int("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
+
+ selectNodes flagutil.Array
)
+func init() {
+ flag.Var(&selectNodes, "selectNode", "vmselect address, usage -selectNode=vmselect-host1:8481 -selectNode=vmselect-host2:8481")
+}
+
// Default step used if not set.
const defaultStep = 5 * 60 * 1000
@@ -30,7 +39,7 @@ const defaultStep = 5 * 60 * 1000
const latencyOffset = 60 * 1000
// FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/
-func FederateHandler(w http.ResponseWriter, r *http.Request) error {
+func FederateHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
ct := currentTime()
if err := r.ParseForm(); err != nil {
@@ -49,11 +58,13 @@ func FederateHandler(w http.ResponseWriter, r *http.Request) error {
return err
}
sq := &storage.SearchQuery{
+ AccountID: at.AccountID,
+ ProjectID: at.ProjectID,
MinTimestamp: start,
MaxTimestamp: end,
TagFilterss: tagFilterss,
}
- rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+ rss, _, err := netstorage.ProcessSearchQuery(at, sq, deadline)
if err != nil {
return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
}
@@ -87,7 +98,7 @@ func FederateHandler(w http.ResponseWriter, r *http.Request) error {
var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/federate"}`)
// ExportHandler exports data in raw format from /api/v1/export.
-func ExportHandler(w http.ResponseWriter, r *http.Request) error {
+func ExportHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
ct := currentTime()
if err := r.ParseForm(); err != nil {
@@ -106,7 +117,7 @@ func ExportHandler(w http.ResponseWriter, r *http.Request) error {
if start >= end {
start = end - defaultStep
}
- if err := exportHandler(w, matches, start, end, format, deadline); err != nil {
+ if err := exportHandler(at, w, matches, start, end, format, deadline); err != nil {
return err
}
exportDuration.UpdateDuration(startTime)
@@ -115,7 +126,7 @@ func ExportHandler(w http.ResponseWriter, r *http.Request) error {
var exportDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export"}`)
-func exportHandler(w http.ResponseWriter, matches []string, start, end int64, format string, deadline netstorage.Deadline) error {
+func exportHandler(at *auth.Token, w http.ResponseWriter, matches []string, start, end int64, format string, deadline netstorage.Deadline) error {
writeResponseFunc := WriteExportStdResponse
writeLineFunc := WriteExportJSONLine
contentType := "application/json"
@@ -132,14 +143,20 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
return err
}
sq := &storage.SearchQuery{
+ AccountID: at.AccountID,
+ ProjectID: at.ProjectID,
MinTimestamp: start,
MaxTimestamp: end,
TagFilterss: tagFilterss,
}
- rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+ rss, isPartial, err := netstorage.ProcessSearchQuery(at, sq, deadline)
if err != nil {
return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
}
+ if isPartial {
+ rss.Cancel()
+ return fmt.Errorf("some of the storage nodes are unavailable at the moment")
+ }
resultsCh := make(chan *quicktemplate.ByteBuffer, runtime.GOMAXPROCS(-1))
doneCh := make(chan error)
@@ -166,7 +183,7 @@ func exportHandler(w http.ResponseWriter, matches []string, start, end int64, fo
// DeleteHandler processes /api/v1/admin/tsdb/delete_series prometheus API request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
-func DeleteHandler(r *http.Request) error {
+func DeleteHandler(at *auth.Token, r *http.Request) error {
startTime := time.Now()
if err := r.ParseForm(); err != nil {
return fmt.Errorf("cannot parse request form values: %s", err)
@@ -175,19 +192,25 @@ func DeleteHandler(r *http.Request) error {
return fmt.Errorf("start and end aren't supported. Remove these args from the query in order to delete all the matching metrics")
}
matches := r.Form["match[]"]
+ deadline := getDeadline(r)
tagFilterss, err := getTagFilterssFromMatches(matches)
if err != nil {
return err
}
sq := &storage.SearchQuery{
+ AccountID: at.AccountID,
+ ProjectID: at.ProjectID,
TagFilterss: tagFilterss,
}
- deletedCount, err := netstorage.DeleteSeries(sq)
+ deletedCount, err := netstorage.DeleteSeries(at, sq, deadline)
if err != nil {
return fmt.Errorf("cannot delete time series matching %q: %s", matches, err)
}
if deletedCount > 0 {
- promql.ResetRollupResultCache()
+ // Reset rollup result cache on all the vmselect nodes,
+ // since the cache may contain deleted data.
+ // TODO: reset only cache for (account, project)
+ resetRollupResultCaches()
}
deleteDuration.UpdateDuration(startTime)
return nil
@@ -195,13 +218,45 @@ func DeleteHandler(r *http.Request) error {
var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/admin/tsdb/delete_series"}`)
+func resetRollupResultCaches() {
+ if len(selectNodes) == 0 {
+ logger.Panicf("BUG: missing -selectNode flag")
+ }
+ for _, selectNode := range selectNodes {
+ callURL := fmt.Sprintf("http://%s/internal/resetRollupResultCache", selectNode)
+ resp, err := httpClient.Get(callURL)
+ if err != nil {
+ logger.Errorf("error when accessing %q: %s", callURL, err)
+ resetRollupResultCacheErrors.Inc()
+ continue
+ }
+ if resp.StatusCode != http.StatusOK {
+ _ = resp.Body.Close()
+ logger.Errorf("unexpected status code at %q; got %d; want %d", callURL, resp.StatusCode, http.StatusOK)
+ resetRollupResultCacheErrors.Inc()
+ continue
+ }
+ _ = resp.Body.Close()
+ }
+ resetRollupResultCacheCalls.Inc()
+}
+
+var (
+ resetRollupResultCacheErrors = metrics.NewCounter("vm_reset_rollup_result_cache_errors_total")
+ resetRollupResultCacheCalls = metrics.NewCounter("vm_reset_rollup_result_cache_calls_total")
+)
+
+var httpClient = &http.Client{
+ Timeout: time.Second * 5,
+}
+
// LabelValuesHandler processes /api/v1/label//values request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
-func LabelValuesHandler(labelName string, w http.ResponseWriter, r *http.Request) error {
+func LabelValuesHandler(at *auth.Token, labelName string, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
deadline := getDeadline(r)
- labelValues, err := netstorage.GetLabelValues(labelName, deadline)
+ labelValues, _, err := netstorage.GetLabelValues(at, labelName, deadline)
if err != nil {
return fmt.Errorf(`cannot obtain label values for %q: %s`, labelName, err)
}
@@ -217,10 +272,10 @@ var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
// LabelsHandler processes /api/v1/labels request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
-func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
+func LabelsHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
deadline := getDeadline(r)
- labels, err := netstorage.GetLabels(deadline)
+ labels, _, err := netstorage.GetLabels(at, deadline)
if err != nil {
return fmt.Errorf("cannot obtain labels: %s", err)
}
@@ -234,13 +289,14 @@ func LabelsHandler(w http.ResponseWriter, r *http.Request) error {
var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels"}`)
// SeriesCountHandler processes /api/v1/series/count request.
-func SeriesCountHandler(w http.ResponseWriter, r *http.Request) error {
+func SeriesCountHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
deadline := getDeadline(r)
- n, err := netstorage.GetSeriesCount(deadline)
+ n, _, err := netstorage.GetSeriesCount(at, deadline)
if err != nil {
return fmt.Errorf("cannot obtain series count: %s", err)
}
+
w.Header().Set("Content-Type", "application/json")
WriteSeriesCountResponse(w, n)
seriesCountDuration.UpdateDuration(startTime)
@@ -252,7 +308,7 @@ var seriesCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="
// SeriesHandler processes /api/v1/series request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers
-func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
+func SeriesHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
ct := currentTime()
@@ -272,11 +328,13 @@ func SeriesHandler(w http.ResponseWriter, r *http.Request) error {
start = end - defaultStep
}
sq := &storage.SearchQuery{
+ AccountID: at.AccountID,
+ ProjectID: at.ProjectID,
MinTimestamp: start,
MaxTimestamp: end,
TagFilterss: tagFilterss,
}
- rss, err := netstorage.ProcessSearchQuery(sq, deadline)
+ rss, _, err := netstorage.ProcessSearchQuery(at, sq, deadline)
if err != nil {
return fmt.Errorf("cannot fetch data for %q: %s", sq, err)
}
@@ -315,7 +373,7 @@ var seriesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/
// QueryHandler processes /api/v1/query request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
-func QueryHandler(w http.ResponseWriter, r *http.Request) error {
+func QueryHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
ct := currentTime()
@@ -350,7 +408,7 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
start -= offset
end := start
start = end - window
- if err := exportHandler(w, []string{childQuery}, start, end, "promapi", deadline); err != nil {
+ if err := exportHandler(at, w, []string{childQuery}, start, end, "promapi", deadline); err != nil {
return err
}
queryDuration.UpdateDuration(startTime)
@@ -358,10 +416,11 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) error {
}
ec := promql.EvalConfig{
- Start: start,
- End: start,
- Step: step,
- Deadline: deadline,
+ AuthToken: at,
+ Start: start,
+ End: start,
+ Step: step,
+ Deadline: deadline,
}
result, err := promql.Exec(&ec, query)
if err != nil {
@@ -379,7 +438,7 @@ var queryDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v
// QueryRangeHandler processes /api/v1/query_range request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
-func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
+func QueryRangeHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
ct := currentTime()
@@ -403,11 +462,12 @@ func QueryRangeHandler(w http.ResponseWriter, r *http.Request) error {
start, end = promql.AdjustStartEnd(start, end, step)
ec := promql.EvalConfig{
- Start: start,
- End: end,
- Step: step,
- Deadline: deadline,
- MayCache: mayCache,
+ AuthToken: at,
+ Start: start,
+ End: end,
+ Step: step,
+ Deadline: deadline,
+ MayCache: mayCache,
}
result, err := promql.Exec(&ec, query)
if err != nil {
diff --git a/app/vmselect/promql/eval.go b/app/vmselect/promql/eval.go
index 7a26d81f1..f0f94b81d 100644
--- a/app/vmselect/promql/eval.go
+++ b/app/vmselect/promql/eval.go
@@ -8,6 +8,7 @@ import (
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
@@ -62,9 +63,10 @@ func AdjustStartEnd(start, end, step int64) (int64, int64) {
// EvalConfig is the configuration required for query evaluation via Exec
type EvalConfig struct {
- Start int64
- End int64
- Step int64
+ AuthToken *auth.Token
+ Start int64
+ End int64
+ Step int64
Deadline netstorage.Deadline
@@ -77,6 +79,7 @@ type EvalConfig struct {
// newEvalConfig returns new EvalConfig copy from src.
func newEvalConfig(src *EvalConfig) *EvalConfig {
var ec EvalConfig
+ ec.AuthToken = src.AuthToken
ec.Start = src.Start
ec.End = src.End
ec.Step = src.Step
@@ -510,11 +513,14 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
// Fetch the remaining part of the result.
sq := &storage.SearchQuery{
+ AccountID: ec.AuthToken.AccountID,
+ ProjectID: ec.AuthToken.ProjectID,
MinTimestamp: start - window - maxSilenceInterval,
MaxTimestamp: ec.End + ec.Step,
TagFilterss: [][]storage.TagFilter{me.TagFilters},
}
- rss, err := netstorage.ProcessSearchQuery(sq, ec.Deadline)
+
+ rss, denyCache, err := netstorage.ProcessSearchQuery(ec.AuthToken, sq, ec.Deadline)
if err != nil {
return nil, err
}
@@ -570,8 +576,9 @@ func evalRollupFuncWithMetricExpr(ec *EvalConfig, name string, rf rollupFunc, me
}
}
tss = mergeTimeseries(tssCached, tss, start, ec)
- rollupResultCacheV.Put(name, ec, me, window, tss)
-
+ if !denyCache {
+ rollupResultCacheV.Put(name, ec, me, window, tss)
+ }
return tss, nil
}
@@ -628,6 +635,8 @@ var bbPool bytesutil.ByteBufferPool
func evalNumber(ec *EvalConfig, n float64) []*timeseries {
var ts timeseries
ts.denyReuse = true
+ ts.MetricName.AccountID = ec.AuthToken.AccountID
+ ts.MetricName.ProjectID = ec.AuthToken.ProjectID
timestamps := ec.getSharedTimestamps()
values := make([]float64, len(timestamps))
for i := range timestamps {
diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go
index e683fcf02..8219c9b36 100644
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@@ -5,6 +5,7 @@ import (
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
@@ -47,15 +48,24 @@ func TestExpandWithExprsError(t *testing.T) {
}
func TestExecSuccess(t *testing.T) {
+ accountID := uint32(123)
+ projectID := uint32(567)
start := int64(1000e3)
end := int64(2000e3)
step := int64(200e3)
timestampsExpected := []int64{1000e3, 1200e3, 1400e3, 1600e3, 1800e3, 2000e3}
- metricNameExpected := storage.MetricName{}
+ metricNameExpected := storage.MetricName{
+ AccountID: accountID,
+ ProjectID: projectID,
+ }
f := func(q string, resultExpected []netstorage.Result) {
t.Helper()
ec := &EvalConfig{
+ AuthToken: &auth.Token{
+ AccountID: accountID,
+ ProjectID: projectID,
+ },
Start: start,
End: end,
Step: step,
@@ -3423,6 +3433,10 @@ func TestExecError(t *testing.T) {
f := func(q string) {
t.Helper()
ec := &EvalConfig{
+ AuthToken: &auth.Token{
+ AccountID: 123,
+ ProjectID: 567,
+ },
Start: 1000,
End: 2000,
Step: 100,
@@ -3574,6 +3588,12 @@ func testResultsEqual(t *testing.T, result, resultExpected []netstorage.Result)
func testMetricNamesEqual(t *testing.T, mn, mnExpected *storage.MetricName) {
t.Helper()
+ if mn.AccountID != mnExpected.AccountID {
+ t.Fatalf(`unexpected accountID; got %d; want %d`, mn.AccountID, mnExpected.AccountID)
+ }
+ if mn.ProjectID != mnExpected.ProjectID {
+ t.Fatalf(`unexpected projectID; got %d; want %d`, mn.ProjectID, mnExpected.ProjectID)
+ }
if string(mn.MetricGroup) != string(mnExpected.MetricGroup) {
t.Fatalf(`unexpected MetricGroup; got %q; want %q`, mn.MetricGroup, mnExpected.MetricGroup)
}
diff --git a/app/vmselect/promql/rollup_result_cache.go b/app/vmselect/promql/rollup_result_cache.go
index c12c22916..28d5856ed 100644
--- a/app/vmselect/promql/rollup_result_cache.go
+++ b/app/vmselect/promql/rollup_result_cache.go
@@ -8,6 +8,7 @@ import (
"sync/atomic"
"time"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
@@ -37,6 +38,8 @@ var (
)
// InitRollupResultCache initializes the rollupResult cache
+//
+// if cachePath is empty, then the cache isn't stored to persistent disk.
func InitRollupResultCache(cachePath string) {
rollupResultCachePath = cachePath
startTime := time.Now()
@@ -106,6 +109,8 @@ func StopRollupResultCache() {
}
}
+// TODO: convert this cache to distributed cache shared among vmselect
+// instances in the cluster.
type rollupResultCache struct {
c *fastcache.Cache
}
@@ -127,7 +132,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
bb := bbPool.Get()
defer bbPool.Put(bb)
- bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+ bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, window, ec.Step)
metainfoBuf := rrc.c.Get(nil, bb.B)
if len(metainfoBuf) == 0 {
return nil, ec.Start
@@ -145,7 +150,7 @@ func (rrc *rollupResultCache) Get(funcName string, ec *EvalConfig, me *metricExp
if len(resultBuf) == 0 {
mi.RemoveKey(key)
metainfoBuf = mi.Marshal(metainfoBuf[:0])
- bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+ bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, window, ec.Step)
rrc.c.Set(bb.B, metainfoBuf)
return nil, ec.Start
}
@@ -235,7 +240,7 @@ func (rrc *rollupResultCache) Put(funcName string, ec *EvalConfig, me *metricExp
bb.B = key.Marshal(bb.B[:0])
rrc.c.SetBig(bb.B, tssMarshaled)
- bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, me, window, ec.Step)
+ bb.B = marshalRollupResultCacheKey(bb.B[:0], funcName, ec.AuthToken, me, window, ec.Step)
metainfoBuf := rrc.c.Get(nil, bb.B)
var mi rollupResultCacheMetainfo
if len(metainfoBuf) > 0 {
@@ -265,8 +270,10 @@ var tooBigRollupResults = metrics.NewCounter("vm_too_big_rollup_results_total")
// Increment this value every time the format of the cache changes.
const rollupResultCacheVersion = 4
-func marshalRollupResultCacheKey(dst []byte, funcName string, me *metricExpr, window, step int64) []byte {
+func marshalRollupResultCacheKey(dst []byte, funcName string, at *auth.Token, me *metricExpr, window, step int64) []byte {
dst = append(dst, rollupResultCacheVersion)
+ dst = encoding.MarshalUint32(dst, at.AccountID)
+ dst = encoding.MarshalUint32(dst, at.ProjectID)
dst = encoding.MarshalUint64(dst, uint64(len(funcName)))
dst = append(dst, funcName...)
dst = encoding.MarshalInt64(dst, window)
diff --git a/app/vmselect/promql/rollup_result_cache_test.go b/app/vmselect/promql/rollup_result_cache_test.go
index e2cdfac2e..f86c2a660 100644
--- a/app/vmselect/promql/rollup_result_cache_test.go
+++ b/app/vmselect/promql/rollup_result_cache_test.go
@@ -3,6 +3,7 @@ package promql
import (
"testing"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
@@ -15,6 +16,11 @@ func TestRollupResultCache(t *testing.T) {
End: 2000,
Step: 200,
+ AuthToken: &auth.Token{
+ AccountID: 333,
+ ProjectID: 843,
+ },
+
MayCache: true,
}
me := &metricExpr{
diff --git a/app/vmstorage/Makefile b/app/vmstorage/Makefile
new file mode 100644
index 000000000..046f98e5a
--- /dev/null
+++ b/app/vmstorage/Makefile
@@ -0,0 +1,32 @@
+# All these commands must run from repository root.
+
+run-vmstorage:
+ mkdir -p vmstorage-data
+ DOCKER_OPTS='-v $(shell pwd)/vmstorage-data:/vmstorage-data -p 8482:8482 -p 8400:8400 -p 8401:8401' \
+ APP_NAME=vmstorage \
+ ARGS='-retentionPeriod=12' \
+ $(MAKE) run-via-docker
+
+vmstorage:
+ APP_NAME=vmstorage $(MAKE) app-local
+
+vmstorage-race:
+ APP_NAME=vmstorage RACE=-race $(MAKE) app-local
+
+vmstorage-prod:
+ APP_NAME=vmstorage $(MAKE) app-via-docker
+
+vmstorage-prod-race:
+ APP_NAME=vmstorage RACE=-race $(MAKE) app-via-docker
+
+package-vmstorage:
+ APP_NAME=vmstorage $(MAKE) package-via-docker
+
+package-vmstorage-race:
+ APP_NAME=vmstorage RACE=-race $(MAKE) package-via-docker
+
+publish-vmstorage:
+ APP_NAME=vmstorage $(MAKE) publish-via-docker
+
+publish-vmstorage-race:
+ APP_NAME=vmstorage RACE=-race $(MAKE) publish-via-docker
diff --git a/app/vmstorage/README.md b/app/vmstorage/README.md
index 6df28ba61..741a669d2 100644
--- a/app/vmstorage/README.md
+++ b/app/vmstorage/README.md
@@ -1,5 +1,5 @@
`vmstorage` performs the following tasks:
-- Accepts inserts from `vminsert` and stores them to local storage.
+- Accepts inserts from `vminsert` nodes and stores them to local storage.
-- Performs select requests from `vmselect`.
+- Performs select requests from `vmselect` nodes.
diff --git a/app/vmstorage/deployment/Dockerfile b/app/vmstorage/deployment/Dockerfile
new file mode 100644
index 000000000..0341eb7cb
--- /dev/null
+++ b/app/vmstorage/deployment/Dockerfile
@@ -0,0 +1,7 @@
+FROM scratch
+COPY --from=local/certs:1.0.2 /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
+COPY bin/vmstorage-prod .
+EXPOSE 8482
+EXPOSE 8400
+EXPOSE 8401
+ENTRYPOINT ["/vmstorage-prod"]
diff --git a/app/vmstorage/main.go b/app/vmstorage/main.go
index 1e08b323a..ae612f0e5 100644
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -1,4 +1,4 @@
-package vmstorage
+package main
import (
"flag"
@@ -8,122 +8,84 @@ import (
"sync"
"time"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/transport"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
- "github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
"github.com/VictoriaMetrics/metrics"
)
var (
+ httpListenAddr = flag.String("httpListenAddr", ":8482", "Address to listen for http connections")
retentionPeriod = flag.Int("retentionPeriod", 1, "Retention period in months")
+ storageDataPath = flag.String("storageDataPath", "vmstorage-data", "Path to storage data")
+ vminsertAddr = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
+ vmselectAddr = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
snapshotAuthKey = flag.String("snapshotAuthKey", "", "authKey, which must be passed in query string to /snapshot* pages")
-
- precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss")
-
- // DataPath is a path to storage data.
- DataPath = flag.String("storageDataPath", "victoria-metrics-data", "Path to storage data")
)
-// Init initializes vmstorage.
-func Init() {
- if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
- logger.Fatalf("invalid `-precisionBits`: %s", err)
- }
- logger.Infof("opening storage at %q with retention period %d months", *DataPath, *retentionPeriod)
+func main() {
+ flag.Parse()
+ buildinfo.Init()
+ logger.Init()
+
+ logger.Infof("opening storage at %q with retention period %d months", *storageDataPath, *retentionPeriod)
startTime := time.Now()
- strg, err := storage.OpenStorage(*DataPath, *retentionPeriod)
+ strg, err := storage.OpenStorage(*storageDataPath, *retentionPeriod)
if err != nil {
- logger.Fatalf("cannot open a storage at %s with retention period %d months: %s", *DataPath, *retentionPeriod, err)
+ logger.Fatalf("cannot open a storage at %s with retention period %d months: %s", *storageDataPath, *retentionPeriod, err)
}
- Storage = strg
var m storage.Metrics
- Storage.UpdateMetrics(&m)
+ strg.UpdateMetrics(&m)
tm := &m.TableMetrics
partsCount := tm.SmallPartsCount + tm.BigPartsCount
blocksCount := tm.SmallBlocksCount + tm.BigBlocksCount
rowsCount := tm.SmallRowsCount + tm.BigRowsCount
logger.Infof("successfully opened storage %q in %s; partsCount: %d; blocksCount: %d; rowsCount: %d",
- *DataPath, time.Since(startTime), partsCount, blocksCount, rowsCount)
+ *storageDataPath, time.Since(startTime), partsCount, blocksCount, rowsCount)
- registerStorageMetrics(Storage)
-}
+ registerStorageMetrics(strg)
-// Storage is a storage.
-//
-// Every storage call must be wrapped into WG.Add(1) ... WG.Done()
-// for proper graceful shutdown when Stop is called.
-var Storage *storage.Storage
+ srv, err := transport.NewServer(*vminsertAddr, *vmselectAddr, strg)
+ if err != nil {
+ logger.Fatalf("cannot create a server with vminsertAddr=%s, vmselectAddr=%s: %s", *vminsertAddr, *vmselectAddr, err)
+ }
-// WG must be incremented before Storage call.
-//
-// Use syncwg instead of sync, since Add is called from concurrent goroutines.
-var WG syncwg.WaitGroup
+ go srv.RunVMInsert()
+ go srv.RunVMSelect()
-// AddRows adds mrs to the storage.
-func AddRows(mrs []storage.MetricRow) error {
- WG.Add(1)
- err := Storage.AddRows(mrs, uint8(*precisionBits))
- WG.Done()
- return err
-}
+ requestHandler := newRequestHandler(strg)
+ go func() {
+ httpserver.Serve(*httpListenAddr, requestHandler)
+ }()
-// DeleteMetrics deletes metrics matching tfss.
-//
-// Returns the number of deleted metrics.
-func DeleteMetrics(tfss []*storage.TagFilters) (int, error) {
- WG.Add(1)
- n, err := Storage.DeleteMetrics(tfss)
- WG.Done()
- return n, err
-}
+ sig := procutil.WaitForSigterm()
+ logger.Infof("service received signal %s", sig)
-// SearchTagKeys searches for tag keys
-func SearchTagKeys(maxTagKeys int) ([]string, error) {
- WG.Add(1)
- keys, err := Storage.SearchTagKeys(maxTagKeys)
- WG.Done()
- return keys, err
-}
+ logger.Infof("gracefully shutting down the service")
+ startTime = time.Now()
+ srv.MustClose()
+ logger.Infof("successfully shut down the service in %s", time.Since(startTime))
-// SearchTagValues searches for tag values for the given tagKey
-func SearchTagValues(tagKey []byte, maxTagValues int) ([]string, error) {
- WG.Add(1)
- values, err := Storage.SearchTagValues(tagKey, maxTagValues)
- WG.Done()
- return values, err
-}
-
-// GetSeriesCount returns the number of time series in the storage.
-func GetSeriesCount() (uint64, error) {
- WG.Add(1)
- n, err := Storage.GetSeriesCount()
- WG.Done()
- return n, err
-}
-
-// Stop stops the vmstorage
-func Stop() {
- logger.Infof("gracefully closing the storage at %s", *DataPath)
- startTime := time.Now()
- WG.WaitAndBlock()
- Storage.MustClose()
+ logger.Infof("gracefully closing the storage at %s", *storageDataPath)
+ startTime = time.Now()
+ strg.MustClose()
logger.Infof("successfully closed the storage in %s", time.Since(startTime))
- logger.Infof("the storage has been stopped")
+ logger.Infof("the vmstorage has been stopped")
}
-// RequestHandler is a storage request handler.
-func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
- path := r.URL.Path
- prometheusCompatibleResponse := false
- if path == "/api/v1/admin/tsdb/snapshot" {
- // Handle Prometheus API - https://prometheus.io/docs/prometheus/latest/querying/api/#snapshot .
- prometheusCompatibleResponse = true
- path = "/snapshot/create"
+func newRequestHandler(strg *storage.Storage) httpserver.RequestHandler {
+ return func(w http.ResponseWriter, r *http.Request) bool {
+ return requestHandler(w, r, strg)
}
+}
+
+func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storage) bool {
+ path := r.URL.Path
if !strings.HasPrefix(path, "/snapshot") {
return false
}
@@ -137,22 +99,18 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
switch path {
case "/create":
w.Header().Set("Content-Type", "application/json")
- snapshotPath, err := Storage.CreateSnapshot()
+ snapshotPath, err := strg.CreateSnapshot()
if err != nil {
msg := fmt.Sprintf("cannot create snapshot: %s", err)
logger.Errorf("%s", msg)
fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
return true
}
- if prometheusCompatibleResponse {
- fmt.Fprintf(w, `{"status":"success","data":{"name":%q}}`, snapshotPath)
- } else {
- fmt.Fprintf(w, `{"status":"ok","snapshot":%q}`, snapshotPath)
- }
+ fmt.Fprintf(w, `{"status":"ok","snapshot":%q}`, snapshotPath)
return true
case "/list":
w.Header().Set("Content-Type", "application/json")
- snapshots, err := Storage.ListSnapshots()
+ snapshots, err := strg.ListSnapshots()
if err != nil {
msg := fmt.Sprintf("cannot list snapshots: %s", err)
logger.Errorf("%s", msg)
@@ -171,7 +129,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/delete":
w.Header().Set("Content-Type", "application/json")
snapshotName := r.FormValue("snapshot")
- if err := Storage.DeleteSnapshot(snapshotName); err != nil {
+ if err := strg.DeleteSnapshot(snapshotName); err != nil {
msg := fmt.Sprintf("cannot delete snapshot %q: %s", snapshotName, err)
logger.Errorf("%s", msg)
fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
@@ -181,7 +139,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
case "/delete_all":
w.Header().Set("Content-Type", "application/json")
- snapshots, err := Storage.ListSnapshots()
+ snapshots, err := strg.ListSnapshots()
if err != nil {
msg := fmt.Sprintf("cannot list snapshots: %s", err)
logger.Errorf("%s", msg)
@@ -189,7 +147,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
return true
}
for _, snapshotName := range snapshots {
- if err := Storage.DeleteSnapshot(snapshotName); err != nil {
+ if err := strg.DeleteSnapshot(snapshotName); err != nil {
msg := fmt.Sprintf("cannot delete snapshot %q: %s", snapshotName, err)
logger.Errorf("%s", msg)
fmt.Fprintf(w, `{"status":"error","msg":%q}`, msg)
diff --git a/app/vmstorage/transport/server.go b/app/vmstorage/transport/server.go
new file mode 100644
index 000000000..9beefd1e0
--- /dev/null
+++ b/app/vmstorage/transport/server.go
@@ -0,0 +1,736 @@
+package transport
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "net"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
+ "github.com/VictoriaMetrics/metrics"
+)
+
+var (
+ maxTagKeysPerSearch = flag.Int("search.maxTagKeys", 10e3, "The maximum number of tag keys returned per search")
+ maxTagValuesPerSearch = flag.Int("search.maxTagValues", 10e3, "The maximum number of tag values returned per search")
+ maxMetricsPerSearch = flag.Int("search.maxUniqueTimeseries", 100e3, "The maximum number of unique time series each search can scan")
+
+ precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss")
+)
+
+// Server processes connections from vminsert and vmselect.
+type Server struct {
+ storage *storage.Storage
+
+ vminsertLN net.Listener
+ vmselectLN net.Listener
+
+ vminsertWG sync.WaitGroup
+ vmselectWG sync.WaitGroup
+
+ vminsertConnsMap connsMap
+ vmselectConnsMap connsMap
+
+ stopFlag uint64
+}
+
+type connsMap struct {
+ mu sync.Mutex
+ m map[net.Conn]struct{}
+}
+
+func (cm *connsMap) Init() {
+ cm.m = make(map[net.Conn]struct{})
+}
+
+func (cm *connsMap) Add(c net.Conn) {
+ cm.mu.Lock()
+ cm.m[c] = struct{}{}
+ cm.mu.Unlock()
+}
+
+func (cm *connsMap) Delete(c net.Conn) {
+ cm.mu.Lock()
+ delete(cm.m, c)
+ cm.mu.Unlock()
+}
+
+func (cm *connsMap) CloseAll() {
+ cm.mu.Lock()
+ for c := range cm.m {
+ _ = c.Close()
+ }
+ cm.mu.Unlock()
+}
+
+// NewServer returns new Server.
+func NewServer(vminsertAddr, vmselectAddr string, storage *storage.Storage) (*Server, error) {
+ vminsertLN, err := netutil.NewTCPListener("vminsert", vminsertAddr)
+ if err != nil {
+ return nil, fmt.Errorf("unable to listen vminsertAddr %s: %s", vminsertAddr, err)
+ }
+ vmselectLN, err := netutil.NewTCPListener("vmselect", vmselectAddr)
+ if err != nil {
+ return nil, fmt.Errorf("unable to listen vmselectAddr %s: %s", vmselectAddr, err)
+ }
+ if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
+ return nil, fmt.Errorf("invalid -precisionBits: %s", err)
+ }
+
+ // Set network-level write timeouts to reasonable values in order to protect
+ // from broken networks.
+ // Do not set read timeouts, since they are managed separately -
+ // search for SetReadDeadline in this file.
+ vminsertLN.WriteTimeout = time.Minute
+ vmselectLN.WriteTimeout = time.Minute
+
+ s := &Server{
+ storage: storage,
+
+ vminsertLN: vminsertLN,
+ vmselectLN: vmselectLN,
+ }
+ s.vminsertConnsMap.Init()
+ s.vmselectConnsMap.Init()
+ return s, nil
+}
+
+// RunVMInsert runs a server accepting connections from vminsert.
+func (s *Server) RunVMInsert() {
+ logger.Infof("accepting vminsert conns at %s", s.vminsertLN.Addr())
+ for {
+ c, err := s.vminsertLN.Accept()
+ if err != nil {
+ if pe, ok := err.(net.Error); ok && pe.Temporary() {
+ continue
+ }
+ if s.isStopping() {
+ return
+ }
+ logger.Panicf("FATAL: cannot process vminsert conns at %s: %s", s.vminsertLN.Addr(), err)
+ }
+ logger.Infof("accepted vminsert conn from %s", c.RemoteAddr())
+
+ vminsertConns.Inc()
+ s.vminsertConnsMap.Add(c)
+ s.vminsertWG.Add(1)
+ go func() {
+ defer func() {
+ s.vminsertConnsMap.Delete(c)
+ vminsertConns.Dec()
+ s.vminsertWG.Done()
+ }()
+
+ // There is no need in response compression, since
+ // vmstorage doesn't send anything back to vminsert.
+ compressionLevel := 0
+ bc, err := handshake.VMInsertServer(c, compressionLevel)
+ if err != nil {
+ if s.isStopping() {
+ // c is stopped inside Server.MustClose
+ return
+ }
+ logger.Errorf("cannot perform vminsert handshake with client %q: %s", c.RemoteAddr(), err)
+ _ = c.Close()
+ return
+ }
+ defer func() {
+ if !s.isStopping() {
+ logger.Infof("closing vminsert conn from %s", c.RemoteAddr())
+ }
+ _ = bc.Close()
+ }()
+
+ logger.Infof("processing vminsert conn from %s", c.RemoteAddr())
+ if err := s.processVMInsertConn(bc); err != nil {
+ if s.isStopping() {
+ return
+ }
+ vminsertConnErrors.Inc()
+ logger.Errorf("cannot process vminsert conn from %s: %s", c.RemoteAddr(), err)
+ }
+ }()
+ }
+}
+
+var (
+ vminsertConns = metrics.NewCounter("vm_vminsert_conns")
+ vminsertConnErrors = metrics.NewCounter("vm_vminsert_conn_errors_total")
+)
+
+// RunVMSelect runs a server accepting connections from vmselect.
+func (s *Server) RunVMSelect() {
+ logger.Infof("accepting vmselect conns at %s", s.vmselectLN.Addr())
+ for {
+ c, err := s.vmselectLN.Accept()
+ if err != nil {
+ if pe, ok := err.(net.Error); ok && pe.Temporary() {
+ continue
+ }
+ if s.isStopping() {
+ return
+ }
+ logger.Panicf("FATAL: cannot process vmselect conns at %s: %s", s.vmselectLN.Addr(), err)
+ }
+ logger.Infof("accepted vmselect conn from %s", c.RemoteAddr())
+
+ vmselectConns.Inc()
+ s.vmselectConnsMap.Add(c)
+ s.vmselectWG.Add(1)
+ go func() {
+ defer func() {
+ s.vmselectConnsMap.Delete(c)
+ vmselectConns.Dec()
+ s.vmselectWG.Done()
+ }()
+
+ // Do not compress responses to vmselect, since these responses
+ // already contain compressed data.
+ compressionLevel := 0
+ bc, err := handshake.VMSelectServer(c, compressionLevel)
+ if err != nil {
+ if s.isStopping() {
+ // c is closed inside Server.MustClose
+ return
+ }
+ logger.Errorf("cannot perform vmselect handshake with client %q: %s", c.RemoteAddr(), err)
+ _ = c.Close()
+ return
+ }
+
+ defer func() {
+ if !s.isStopping() {
+ logger.Infof("closing vmselect conn from %s", c.RemoteAddr())
+ }
+ _ = bc.Close()
+ }()
+
+ logger.Infof("processing vmselect conn from %s", c.RemoteAddr())
+ if err := s.processVMSelectConn(bc); err != nil {
+ if s.isStopping() {
+ return
+ }
+ vmselectConnErrors.Inc()
+ logger.Errorf("cannot process vmselect conn %s: %s", c.RemoteAddr(), err)
+ }
+ }()
+ }
+}
+
+var (
+ vmselectConns = metrics.NewCounter("vm_vmselect_conns")
+ vmselectConnErrors = metrics.NewCounter("vm_vmselect_conn_errors_total")
+)
+
+// MustClose gracefully closes the server,
+// so it no longer touches s.storage after returning.
+func (s *Server) MustClose() {
+ // Mark the server as stoping.
+ s.setIsStopping()
+
+ // Stop accepting new connections from vminsert and vmselect.
+ if err := s.vminsertLN.Close(); err != nil {
+ logger.Panicf("FATAL: cannot close vminsert listener: %s", err)
+ }
+ if err := s.vmselectLN.Close(); err != nil {
+ logger.Panicf("FATAL: cannot close vmselect listener: %s", err)
+ }
+
+ // Close existing connections from vminsert, so the goroutines
+ // processing these connections are finished.
+ s.vminsertConnsMap.CloseAll()
+
+ // Close existing connections from vmselect, so the goroutines
+ // processing these connections are finished.
+ s.vmselectConnsMap.CloseAll()
+
+ // Wait until all the goroutines processing vminsert and vmselect conns
+ // are finished.
+ s.vminsertWG.Wait()
+ s.vmselectWG.Wait()
+}
+
+func (s *Server) setIsStopping() {
+ atomic.StoreUint64(&s.stopFlag, 1)
+}
+
+func (s *Server) isStopping() bool {
+ return atomic.LoadUint64(&s.stopFlag) != 0
+}
+
+func (s *Server) processVMInsertConn(r io.Reader) error {
+ sizeBuf := make([]byte, 8)
+ var buf []byte
+ var mrs []storage.MetricRow
+ for {
+ if _, err := io.ReadFull(r, sizeBuf); err != nil {
+ if err == io.EOF {
+ // Remote end gracefully closed the connection.
+ return nil
+ }
+ return fmt.Errorf("cannot read packet size: %s", err)
+ }
+ packetSize := encoding.UnmarshalUint64(sizeBuf)
+ if packetSize > consts.MaxInsertPacketSize {
+ return fmt.Errorf("too big packet size: %d; shouldn't exceed %d", packetSize, consts.MaxInsertPacketSize)
+ }
+ buf = bytesutil.Resize(buf, int(packetSize))
+ if _, err := io.ReadFull(r, buf); err != nil {
+ return fmt.Errorf("cannot read packet with size %d: %s", packetSize, err)
+ }
+ vminsertPacketsRead.Inc()
+
+ // Read metric rows from the packet.
+ mrs = mrs[:0]
+ tail := buf
+ for len(tail) > 0 {
+ if len(mrs) < cap(mrs) {
+ mrs = mrs[:len(mrs)+1]
+ } else {
+ mrs = append(mrs, storage.MetricRow{})
+ }
+ mr := &mrs[len(mrs)-1]
+ var err error
+ tail, err = mr.Unmarshal(tail)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal MetricRow: %s", err)
+ }
+ }
+ vminsertMetricsRead.Add(len(mrs))
+ if err := s.storage.AddRows(mrs, uint8(*precisionBits)); err != nil {
+ return fmt.Errorf("cannot store metrics: %s", err)
+ }
+ }
+}
+
+var (
+ vminsertPacketsRead = metrics.NewCounter("vm_vminsert_packets_read_total")
+ vminsertMetricsRead = metrics.NewCounter("vm_vminsert_metrics_read_total")
+)
+
+func (s *Server) processVMSelectConn(bc *handshake.BufferedConn) error {
+ ctx := &vmselectRequestCtx{
+ bc: bc,
+ sizeBuf: make([]byte, 8),
+ }
+ for {
+ err := s.processVMSelectRequest(ctx)
+ n := atomic.LoadUint64(&ctx.sr.MissingMetricNamesForMetricID)
+ missingMetricNamesForMetricID.Add(int(n))
+ if err != nil {
+ if err == io.EOF {
+ // Remote client gracefully closed the connection.
+ return nil
+ }
+ return fmt.Errorf("cannot process vmselect request: %s", err)
+ }
+ if err := bc.Flush(); err != nil {
+ return fmt.Errorf("cannot flush compressed buffers: %s", err)
+ }
+ }
+}
+
+var missingMetricNamesForMetricID = metrics.NewCounter(`vm_missing_metric_names_for_metric_id_total`)
+
+type vmselectRequestCtx struct {
+ bc *handshake.BufferedConn
+ sizeBuf []byte
+ dataBuf []byte
+
+ sq storage.SearchQuery
+ tfss []*storage.TagFilters
+ sr storage.Search
+}
+
+func (ctx *vmselectRequestCtx) readUint32() (uint32, error) {
+ ctx.sizeBuf = bytesutil.Resize(ctx.sizeBuf, 4)
+ if _, err := io.ReadFull(ctx.bc, ctx.sizeBuf); err != nil {
+ if err == io.EOF {
+ return 0, err
+ }
+ return 0, fmt.Errorf("cannot read uint32: %s", err)
+ }
+ n := encoding.UnmarshalUint32(ctx.sizeBuf)
+ return n, nil
+}
+
+func (ctx *vmselectRequestCtx) readDataBufBytes(maxDataSize int) error {
+ ctx.sizeBuf = bytesutil.Resize(ctx.sizeBuf, 8)
+ if _, err := io.ReadFull(ctx.bc, ctx.sizeBuf); err != nil {
+ if err == io.EOF {
+ return err
+ }
+ return fmt.Errorf("cannot read data size: %s", err)
+ }
+ dataSize := encoding.UnmarshalUint64(ctx.sizeBuf)
+ if dataSize > uint64(maxDataSize) {
+ return fmt.Errorf("too big data size: %d; it mustn't exceed %d bytes", dataSize, maxDataSize)
+ }
+ ctx.dataBuf = bytesutil.Resize(ctx.dataBuf, int(dataSize))
+ if dataSize == 0 {
+ return nil
+ }
+ if _, err := io.ReadFull(ctx.bc, ctx.dataBuf); err != nil {
+ return fmt.Errorf("cannot read data with size %d: %s", dataSize, err)
+ }
+ return nil
+}
+
+func (ctx *vmselectRequestCtx) writeDataBufBytes() error {
+ if err := ctx.writeUint64(uint64(len(ctx.dataBuf))); err != nil {
+ return fmt.Errorf("cannot write data size: %s", err)
+ }
+ if len(ctx.dataBuf) == 0 {
+ return nil
+ }
+ if _, err := ctx.bc.Write(ctx.dataBuf); err != nil {
+ return fmt.Errorf("cannot write data with size %d: %s", len(ctx.dataBuf), err)
+ }
+ return nil
+}
+
+func (ctx *vmselectRequestCtx) writeString(s string) error {
+ ctx.dataBuf = append(ctx.dataBuf[:0], s...)
+ return ctx.writeDataBufBytes()
+}
+
+func (ctx *vmselectRequestCtx) writeUint64(n uint64) error {
+ ctx.sizeBuf = encoding.MarshalUint64(ctx.sizeBuf[:0], n)
+ if _, err := ctx.bc.Write(ctx.sizeBuf); err != nil {
+ return fmt.Errorf("cannot write uint64 %d: %s", n, err)
+ }
+ return nil
+}
+
+const maxRPCNameSize = 128
+
+var zeroTime time.Time
+
+func (s *Server) processVMSelectRequest(ctx *vmselectRequestCtx) error {
+ // Read rpcName
+ // Do not set deadline on reading rpcName, since it may take a
+ // lot of time for idle connection.
+ if err := ctx.readDataBufBytes(maxRPCNameSize); err != nil {
+ if err == io.EOF {
+ // Remote client gracefully closed the connection.
+ return err
+ }
+ return fmt.Errorf("cannot read rpcName: %s", err)
+ }
+
+ // Limit the time required for reading request args.
+ if err := ctx.bc.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
+ return fmt.Errorf("cannot set read deadline for reading request args: %s", err)
+ }
+ defer func() {
+ _ = ctx.bc.SetReadDeadline(zeroTime)
+ }()
+
+ switch string(ctx.dataBuf) {
+ case "search_v2":
+ return s.processVMSelectSearchQuery(ctx)
+ case "labelValues":
+ return s.processVMSelectLabelValues(ctx)
+ case "labels":
+ return s.processVMSelectLabels(ctx)
+ case "seriesCount":
+ return s.processVMSelectSeriesCount(ctx)
+ case "deleteMetrics_v2":
+ return s.processVMSelectDeleteMetrics(ctx)
+ default:
+ return fmt.Errorf("unsupported rpcName: %q", ctx.dataBuf)
+ }
+}
+
+const maxTagFiltersSize = 64 * 1024
+
+func (s *Server) processVMSelectDeleteMetrics(ctx *vmselectRequestCtx) error {
+ vmselectDeleteMetricsRequests.Inc()
+
+ // Read request
+ if err := ctx.readDataBufBytes(maxTagFiltersSize); err != nil {
+ return fmt.Errorf("cannot read labelName: %s", err)
+ }
+ tail, err := ctx.sq.Unmarshal(ctx.dataBuf)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal SearchQuery: %s", err)
+ }
+ if len(tail) > 0 {
+ return fmt.Errorf("unexpected non-zero tail left after unmarshaling SearchQuery: (len=%d) %q", len(tail), tail)
+ }
+
+ // Setup ctx.tfss
+ if err := ctx.setupTfss(); err != nil {
+ // Send the error message to vmselect.
+ errMsg := err.Error()
+ if err := ctx.writeString(errMsg); err != nil {
+ return fmt.Errorf("cannot send error message: %s", err)
+ }
+ return nil
+ }
+
+ // Delete the given metrics.
+ deletedCount, err := s.storage.DeleteMetrics(ctx.tfss)
+ if err != nil {
+ if err := ctx.writeString(err.Error()); err != nil {
+ return fmt.Errorf("cannot send error message: %s", err)
+ }
+ return nil
+ }
+
+ // Send an empty error message to vmselect.
+ if err := ctx.writeString(""); err != nil {
+ return fmt.Errorf("cannot send empty error message: %s", err)
+ }
+ // Send deletedCount to vmselect.
+ if err := ctx.writeUint64(uint64(deletedCount)); err != nil {
+ return fmt.Errorf("cannot send deletedCount=%d: %s", deletedCount, err)
+ }
+ return nil
+}
+
+func (s *Server) processVMSelectLabels(ctx *vmselectRequestCtx) error {
+ vmselectLabelsRequests.Inc()
+
+ // Read request
+ accountID, err := ctx.readUint32()
+ if err != nil {
+ return fmt.Errorf("cannot read accountID: %s", err)
+ }
+ projectID, err := ctx.readUint32()
+ if err != nil {
+ return fmt.Errorf("cannot read projectID: %s", err)
+ }
+
+ // Search for tag keys
+ labels, err := s.storage.SearchTagKeys(accountID, projectID, *maxTagKeysPerSearch)
+ if err != nil {
+ // Send the error message to vmselect.
+ errMsg := fmt.Sprintf("error during labels search: %s", err)
+ if err := ctx.writeString(errMsg); err != nil {
+ return fmt.Errorf("cannot send error message: %s", err)
+ }
+ return nil
+ }
+
+ // Send an empty error message to vmselect.
+ if err := ctx.writeString(""); err != nil {
+ return fmt.Errorf("cannot send empty error message: %s", err)
+ }
+
+ // Send labels to vmselect
+ for _, label := range labels {
+ if len(label) == 0 {
+ // Do this substitution in order to prevent clashing with 'end of response' marker.
+ label = "__name__"
+ }
+ if err := ctx.writeString(label); err != nil {
+ return fmt.Errorf("cannot write label %q: %s", label, err)
+ }
+ }
+
+ // Send 'end of response' marker
+ if err := ctx.writeString(""); err != nil {
+ return fmt.Errorf("cannot send 'end of response' marker")
+ }
+ return nil
+}
+
+const maxLabelValueSize = 16 * 1024
+
+func (s *Server) processVMSelectLabelValues(ctx *vmselectRequestCtx) error {
+ vmselectLabelValuesRequests.Inc()
+
+ // Read request
+ accountID, err := ctx.readUint32()
+ if err != nil {
+ return fmt.Errorf("cannot read accountID: %s", err)
+ }
+ projectID, err := ctx.readUint32()
+ if err != nil {
+ return fmt.Errorf("cannot read projectID: %s", err)
+ }
+ if err := ctx.readDataBufBytes(maxLabelValueSize); err != nil {
+ return fmt.Errorf("cannot read labelName: %s", err)
+ }
+ labelName := ctx.dataBuf
+
+ // Search for tag values
+ labelValues, err := s.storage.SearchTagValues(accountID, projectID, labelName, *maxTagValuesPerSearch)
+ if err != nil {
+ // Send the error message to vmselect.
+ errMsg := fmt.Sprintf("error during label values search for labelName=%q: %s", labelName, err)
+ if err := ctx.writeString(errMsg); err != nil {
+ return fmt.Errorf("cannot send error message: %s", err)
+ }
+ return nil
+ }
+
+ // Send an empty error message to vmselect.
+ if err := ctx.writeString(""); err != nil {
+ return fmt.Errorf("cannot send empty error message: %s", err)
+ }
+
+ // Send labelValues to vmselect
+ for _, labelValue := range labelValues {
+ if len(labelValue) == 0 {
+ // Skip empty label values, since they have no sense for prometheus.
+ continue
+ }
+ if err := ctx.writeString(labelValue); err != nil {
+ return fmt.Errorf("cannot write labelValue %q: %s", labelValue, err)
+ }
+ }
+
+ // Send 'end of response' marker
+ if err := ctx.writeString(""); err != nil {
+ return fmt.Errorf("cannot send 'end of response' marker")
+ }
+ return nil
+}
+
+func (s *Server) processVMSelectSeriesCount(ctx *vmselectRequestCtx) error {
+ vmselectSeriesCountRequests.Inc()
+
+ // Read request
+ accountID, err := ctx.readUint32()
+ if err != nil {
+ return fmt.Errorf("cannot read accountID: %s", err)
+ }
+ projectID, err := ctx.readUint32()
+ if err != nil {
+ return fmt.Errorf("cannot read projectID: %s", err)
+ }
+
+ // Execute the request
+ n, err := s.storage.GetSeriesCount(accountID, projectID)
+ if err != nil {
+ // Send the error message to vmselect.
+ errMsg := fmt.Sprintf("error during obtaining series count: %s", err)
+ if err := ctx.writeString(errMsg); err != nil {
+ return fmt.Errorf("cannot send error message: %s", err)
+ }
+ return nil
+ }
+
+ // Send an empty error message to vmselect.
+ if err := ctx.writeString(""); err != nil {
+ return fmt.Errorf("cannot send empty error message: %s", err)
+ }
+
+ // Send series count to vmselect.
+ if err := ctx.writeUint64(n); err != nil {
+ return fmt.Errorf("cannot write series count to vmselect: %s", err)
+ }
+ return nil
+}
+
+// maxSearchQuerySize is the maximum size of SearchQuery packet in bytes.
+const maxSearchQuerySize = 1024 * 1024
+
+func (s *Server) processVMSelectSearchQuery(ctx *vmselectRequestCtx) error {
+ vmselectSearchQueryRequests.Inc()
+
+ // Read search query.
+ if err := ctx.readDataBufBytes(maxSearchQuerySize); err != nil {
+ return fmt.Errorf("cannot read searchQuery: %s", err)
+ }
+ tail, err := ctx.sq.Unmarshal(ctx.dataBuf)
+ if err != nil {
+ return fmt.Errorf("cannot unmarshal SearchQuery: %s", err)
+ }
+ if len(tail) > 0 {
+ return fmt.Errorf("unexpected non-zero tail left after unmarshaling SearchQuery: (len=%d) %q", len(tail), tail)
+ }
+
+ // Setup search.
+ if err := ctx.setupTfss(); err != nil {
+ // Send the error message to vmselect.
+ errMsg := err.Error()
+ if err := ctx.writeString(errMsg); err != nil {
+ return fmt.Errorf("cannot send error message: %s", err)
+ }
+ return nil
+ }
+ tr := storage.TimeRange{
+ MinTimestamp: ctx.sq.MinTimestamp,
+ MaxTimestamp: ctx.sq.MaxTimestamp,
+ }
+ ctx.sr.Init(s.storage, ctx.tfss, tr, *maxMetricsPerSearch)
+ defer ctx.sr.MustClose()
+ if err := ctx.sr.Error(); err != nil {
+ // Send the error message to vmselect.
+ errMsg := fmt.Sprintf("search error: %s", err)
+ if err := ctx.writeString(errMsg); err != nil {
+ return fmt.Errorf("cannot send error message: %s", err)
+ }
+ return nil
+ }
+
+ // Send empty error message to vmselect.
+ if err := ctx.writeString(""); err != nil {
+ return fmt.Errorf("cannot send empty error message: %s", err)
+ }
+
+ // Send found blocks to vmselect.
+ for ctx.sr.NextMetricBlock() {
+ mb := ctx.sr.MetricBlock
+
+ vmselectMetricBlocksRead.Inc()
+ vmselectMetricRowsRead.Add(mb.Block.RowsCount())
+
+ ctx.dataBuf = mb.Marshal(ctx.dataBuf[:0])
+ if err := ctx.writeDataBufBytes(); err != nil {
+ return fmt.Errorf("cannot send MetricBlock: %s", err)
+ }
+ }
+ if err := ctx.sr.Error(); err != nil {
+ return fmt.Errorf("search error: %s", err)
+ }
+
+ // Send 'end of response' marker
+ if err := ctx.writeString(""); err != nil {
+ return fmt.Errorf("cannot send 'end of response' marker")
+ }
+ return nil
+}
+
+var (
+ vmselectDeleteMetricsRequests = metrics.NewCounter("vm_vmselect_delete_metrics_requests_total")
+ vmselectLabelsRequests = metrics.NewCounter("vm_vmselect_labels_requests_total")
+ vmselectLabelValuesRequests = metrics.NewCounter("vm_vmselect_label_values_requests_total")
+ vmselectSeriesCountRequests = metrics.NewCounter("vm_vmselect_series_count_requests_total")
+ vmselectSearchQueryRequests = metrics.NewCounter("vm_vmselect_search_query_requests_total")
+ vmselectMetricBlocksRead = metrics.NewCounter("vm_vmselect_metric_blocks_read_total")
+ vmselectMetricRowsRead = metrics.NewCounter("vm_vmselect_metric_rows_read_total")
+)
+
+func (ctx *vmselectRequestCtx) setupTfss() error {
+ tfss := ctx.tfss[:0]
+ for _, tagFilters := range ctx.sq.TagFilterss {
+ if len(tfss) < cap(tfss) {
+ tfss = tfss[:len(tfss)+1]
+ } else {
+ tfss = append(tfss, &storage.TagFilters{})
+ }
+ tfs := tfss[len(tfss)-1]
+ tfs.Reset(ctx.sq.AccountID, ctx.sq.ProjectID)
+ for i := range tagFilters {
+ tf := &tagFilters[i]
+ if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
+ return fmt.Errorf("cannot parse tag filter %s: %s", tf, err)
+ }
+ }
+ }
+ ctx.tfss = tfss
+ return nil
+}
diff --git a/deployment/docker/Makefile b/deployment/docker/Makefile
index 84537632f..a57f1f616 100644
--- a/deployment/docker/Makefile
+++ b/deployment/docker/Makefile
@@ -1,3 +1,5 @@
+# All these commands must run from repository root.
+
DOCKER_NAMESPACE := valyala
BUILDER_IMAGE := local/builder:go1.12.5
CERTS_IMAGE := local/certs:1.0.2
diff --git a/deployment/docker/docker-compose.yml b/deployment/docker/docker-compose.yml
new file mode 100644
index 000000000..cb1f8b3b7
--- /dev/null
+++ b/deployment/docker/docker-compose.yml
@@ -0,0 +1,68 @@
+version: '3.5'
+services:
+ prometheus:
+ container_name: prometheus
+ image: prom/prometheus:v2.3.2
+ depends_on:
+ - "vminsert"
+ - "vmselect"
+ ports:
+ - 9090:9090
+ volumes:
+ - promdata:/prometheus
+ - ./prometheus.yml:/etc/prometheus/prometheus.yml
+ command:
+ - '--config.file=/etc/prometheus/prometheus.yml'
+ - '--storage.tsdb.path=/prometheus'
+ - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+ - '--web.console.templates=/usr/share/prometheus/consoles'
+ networks:
+ - docker_net
+ restart: always
+ vmstorage:
+ container_name: vmstorage
+ image: valyala/vmstorage:heads-cluster-0-gca0d4847
+ ports:
+ - 8482:8482
+ - 8400:8400
+ - 8401:8401
+ volumes:
+ - strgdata:/storage
+ command:
+ - '--storageDataPath=/storage'
+ - '--vminsertAddr=:8401'
+ - '--vmselectAddr=:8400'
+ - '--httpListenAddr=:8482'
+ networks:
+ - docker_net
+ restart: always
+ vmselect:
+ container_name: vmselect
+ image: valyala/vmselect:heads-cluster-0-gca0d4847
+ depends_on:
+ - "vmstorage"
+ ports:
+ - 8480:8480
+ command:
+ - '--storageNode=vmstorage:8400'
+ networks:
+ - docker_net
+ restart: always
+ vminsert:
+ container_name: vminsert
+ image: valyala/vminsert:heads-cluster-0-gca0d4847
+ depends_on:
+ - "vmstorage"
+ command:
+ - '--storageNode=vmstorage:8401'
+ ports:
+ - 8481:8481
+ networks:
+ - docker_net
+ restart: always
+volumes:
+ promdata: {}
+ strgdata: {}
+networks:
+ docker_net:
+ driver: bridge
diff --git a/deployment/docker/prometheus.yml b/deployment/docker/prometheus.yml
new file mode 100644
index 000000000..dc2f7d4a0
--- /dev/null
+++ b/deployment/docker/prometheus.yml
@@ -0,0 +1,23 @@
+global:
+ scrape_interval: 10s
+ evaluation_interval: 10s
+
+remote_write:
+ - url: "http://vminsert:8480/insert/0/prometheus/"
+
+scrape_configs:
+ - job_name: 'prometheus'
+ static_configs:
+ - targets: ['prometheus:9090']
+
+ - job_name: 'vminsert'
+ static_configs:
+ - targets: ['vminsert:8480']
+
+ - job_name: 'vmselect'
+ static_configs:
+ - targets: ['vmselect:8481']
+
+ - job_name: 'vmstorage'
+ static_configs:
+ - targets: ['vmstorage:8482']
diff --git a/deployment/k8s/helm/Makefile b/deployment/k8s/helm/Makefile
new file mode 100644
index 000000000..45509957d
--- /dev/null
+++ b/deployment/k8s/helm/Makefile
@@ -0,0 +1,26 @@
+# All these commands must run from repository root.
+
+HELM_PROJECT=victoria-metrics
+HELM_PATH=deployment/k8s/helm/${HELM_PROJECT}
+HELM_APP_VERSION=1.0
+
+helm-init:
+ @helm init
+
+helm-install:
+ helm install $(HELM_PATH) -n $(ENV)
+
+helm-install-dev:
+ ENV=dev $(MAKE) helm-install
+
+helm-upgrade:
+ helm upgrade $(ENV) $(HELM_PATH)
+
+helm-upgrade-dev:
+ ENV=dev $(MAKE) helm-upgrade
+
+helm-delete:
+ helm del --purge $(ENV)
+
+helm-delete-dev:
+ ENV=dev $(MAKE) helm-delete
diff --git a/deployment/k8s/helm/README.md b/deployment/k8s/helm/README.md
new file mode 100644
index 000000000..cf7bf6867
--- /dev/null
+++ b/deployment/k8s/helm/README.md
@@ -0,0 +1,37 @@
+### Victoria metrics helm chart
+
+#### Create cluster from chart
+
+```$bash
+$ ENV= make helm-install
+```
+
+for DEV env :
+
+```$bash
+$ make helm-install-dev
+```
+
+#### Upgrade cluster from chart
+
+```$bash
+$ ENV= make helm-upgrade
+```
+
+for DEV env :
+
+```$bash
+$ make helm-upgrade-dev
+```
+
+#### Delete chart from cluster
+
+```$bash
+$ ENV= make helm-delete
+```
+
+for DEV env :
+
+```$bash
+$ make helm-delete-dev
+```
diff --git a/deployment/k8s/helm/victoria-metrics/.helmignore b/deployment/k8s/helm/victoria-metrics/.helmignore
new file mode 100644
index 000000000..50af03172
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/.helmignore
@@ -0,0 +1,22 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/deployment/k8s/helm/victoria-metrics/Chart.yaml b/deployment/k8s/helm/victoria-metrics/Chart.yaml
new file mode 100644
index 000000000..48c2d77b2
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/Chart.yaml
@@ -0,0 +1,5 @@
+apiVersion: v1
+appVersion: "1.0"
+description: A Helm chart for Kubernetes
+name: victoria-metrics
+version: 0.1.0
diff --git a/deployment/k8s/helm/victoria-metrics/README.md b/deployment/k8s/helm/victoria-metrics/README.md
new file mode 100644
index 000000000..c89442c67
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/README.md
@@ -0,0 +1,8 @@
+# Victoria Metrics
+
+## TL;DR;
+
+1. Install helm chart. Check the output.
+2. Specify Remote Write URL in Prometheus.
+3. Configure Grafana's Prometheus Data Source.
+
diff --git a/deployment/k8s/helm/victoria-metrics/templates/NOTES.txt b/deployment/k8s/helm/victoria-metrics/templates/NOTES.txt
new file mode 100644
index 000000000..a276072d2
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/templates/NOTES.txt
@@ -0,0 +1,76 @@
+{{ if .Values.vminsert.enabled }}
+Write API:
+
+The Victoria Metrics write api can be accessed via port {{ .Values.vmselect.service.servicePort }} on the following DNS name from within your cluster:
+{{ template "victoria-metrics.vminsert.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomainSuffix }}
+
+Get the Victoria Metrics insert service URL by running these commands in the same shell:
+{{- if contains "NodePort" .Values.vminsert.service.type }}
+ export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "victoria-metrics.vminsert.fullname" . }})
+ export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+ echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.vminsert.service.type }}
+ NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+ You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "victoria-metrics.vminsert.fullname" . }}'
+
+ export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "victoria-metrics.vminsert.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
+ echo http://$SERVICE_IP:{{ .Values.vminsert.service.servicePort }}
+{{- else if contains "ClusterIP" .Values.vminsert.service.type }}
+ export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ .Values.vminsert.name }}" -o jsonpath="{.items[0].metadata.name}")
+ kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8480
+{{- end }}
+
+You need to update your prometheus configuration file and add next lines into it:
+
+prometheus.yml
+```yaml
+remote_write:
+ - url: "http:///insert/{{ .Values.vmstorage.retentionPeriod }}m/1/{{.Release.Name}}/prometheus/"
+
+```
+
+for e.g. inside the kubernetes cluster:
+```yaml
+remote_write:
+ - url: "http://{{ template "victoria-metrics.vminsert.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomainSuffix }}:{{ .Values.vminsert.service.servicePort }}/insert/{{ .Values.vmstorage.retentionPeriod }}m/1/{{.Release.Name}}/prometheus/"
+
+```
+{{- end }}
+
+{{- if .Values.vmselect.enabled }}
+Read API:
+
+The Victoria Metrics read api can be accessed via port {{ .Values.vmselect.service.servicePort }} on the following DNS name from within your cluster:
+{{ template "victoria-metrics.vmselect.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomainSuffix }}
+
+Get the Victoria Metrics select service URL by running these commands in the same shell:
+{{- if contains "NodePort" .Values.vmselect.service.type }}
+ export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "victoria-metrics.vminsert.fullname" . }})
+ export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+ echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.vmselect.service.type }}
+ NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+ You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "victoria-metrics.vminsert.fullname" . }}'
+
+ export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "victoria-metrics.vmselect.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
+ echo http://$SERVICE_IP:{{ .Values.vmselect.service.servicePort }}
+{{- else if contains "ClusterIP" .Values.vmselect.service.type }}
+ export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ .Values.vmselect.name }}" -o jsonpath="{.items[0].metadata.name}")
+ kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8481
+{{- end }}
+
+You need to update specify select service URL in your Grafana:
+ NOTE: you need to use Prometheus Data Source
+
+Input for URL field in Grafana
+
+```
+http:///select/{{ .Values.vmstorage.retentionPeriod }}m/1/{{.Release.Name}}/prometheus/
+```
+
+for e.g. inside the kubernetes cluster:
+```
+http://{{ template "victoria-metrics.vmselect.fullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.clusterDomainSuffix }}:{{ .Values.vmselect.service.servicePort }}/select/{{ .Values.vmstorage.retentionPeriod }}m/1/{{.Release.Name}}/prometheus/"
+```
+{{- end }}
+
diff --git a/deployment/k8s/helm/victoria-metrics/templates/_helpers.tpl b/deployment/k8s/helm/victoria-metrics/templates/_helpers.tpl
new file mode 100644
index 000000000..7ed138313
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/templates/_helpers.tpl
@@ -0,0 +1,129 @@
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "victoria-metrics.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "victoria-metrics.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create unified labels for victoria-metrics components
+*/}}
+{{- define "victoria-metrics.common.matchLabels" -}}
+app.kubernetes.io/name: {{ include "victoria-metrics.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end -}}
+
+{{- define "victoria-metrics.common.metaLabels" -}}
+helm.sh/chart: {{ include "victoria-metrics.chart" . }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end -}}
+
+{{- define "victoria-metrics.vmstorage.labels" -}}
+{{ include "victoria-metrics.vmstorage.matchLabels" . }}
+{{ include "victoria-metrics.common.metaLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vmstorage.matchLabels" -}}
+app: {{ .Values.vmstorage.name }}
+{{ include "victoria-metrics.common.matchLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vmselect.labels" -}}
+{{ include "victoria-metrics.vmselect.matchLabels" . }}
+{{ include "victoria-metrics.common.metaLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vmselect.matchLabels" -}}
+app: {{ .Values.vmselect.name }}
+{{ include "victoria-metrics.common.matchLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vminsert.labels" -}}
+{{ include "victoria-metrics.vminsert.matchLabels" . }}
+{{ include "victoria-metrics.common.metaLabels" . }}
+{{- end -}}
+
+{{- define "victoria-metrics.vminsert.matchLabels" -}}
+app: {{ .Values.vminsert.name }}
+{{ include "victoria-metrics.common.matchLabels" . }}
+{{- end -}}
+
+{{/*
+Create a fully qualified vmstorage name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+*/}}
+{{- define "victoria-metrics.vmstorage.fullname" -}}
+{{- if .Values.vmstorage.fullnameOverride -}}
+{{- .Values.vmstorage.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- printf "%s-%s" .Release.Name .Values.server.name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s-%s" .Release.Name $name .Values.vmstorage.name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create a fully qualified vmselect name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+*/}}
+{{- define "victoria-metrics.vmselect.fullname" -}}
+{{- if .Values.vmselect.fullnameOverride -}}
+{{- .Values.vmselect.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- printf "%s-%s" .Release.Name .Values.vmselect.name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s-%s" .Release.Name $name .Values.vmselect.name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create a fully qualified vmselect name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+*/}}
+{{- define "victoria-metrics.vminsert.fullname" -}}
+{{- if .Values.vminsert.fullnameOverride -}}
+{{- .Values.vminsert.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- printf "%s-%s" .Release.Name .Values.vminsert.name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s-%s" .Release.Name $name .Values.vminsert.name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "victoria-metrics.vmselect.vmstorage-pod-fqdn" -}}
+{{- $pod := include "victoria-metrics.vmstorage.fullname" . -}}
+{{- $svc := include "victoria-metrics.vmstorage.fullname" . -}}
+{{- $namespace := .Release.Namespace -}}
+{{- $dnsSuffix := .Values.clusterDomainSuffix -}}
+{{- range $i := until (.Values.vmstorage.replicaCount | int) -}}
+{{- printf "- --storageNode=%s-%d.%s.%s.svc.%s:8400\n" $pod $i $svc $namespace $dnsSuffix -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "victoria-metrics.vminsert.vmstorage-pod-fqdn" -}}
+{{- $pod := include "victoria-metrics.vmstorage.fullname" . -}}
+{{- $svc := include "victoria-metrics.vmstorage.fullname" . -}}
+{{- $namespace := .Release.Namespace -}}
+{{- $dnsSuffix := .Values.clusterDomainSuffix -}}
+{{- range $i := until (.Values.vmstorage.replicaCount | int) -}}
+{{- printf "- --storageNode=%s-%d.%s.%s.svc.%s:8401\n" $pod $i $svc $namespace $dnsSuffix -}}
+{{- end -}}
+{{- end -}}
+
diff --git a/deployment/k8s/helm/victoria-metrics/templates/vminsert-deployment.yaml b/deployment/k8s/helm/victoria-metrics/templates/vminsert-deployment.yaml
new file mode 100644
index 000000000..1797722c9
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/templates/vminsert-deployment.yaml
@@ -0,0 +1,65 @@
+{{- if .Values.vminsert.enabled -}}
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+ labels:
+ {{- include "victoria-metrics.vminsert.labels" . | nindent 4 }}
+ name: {{ template "victoria-metrics.vminsert.fullname" . }}
+spec:
+ selector:
+ matchLabels:
+ {{- include "victoria-metrics.vminsert.matchLabels" . | nindent 6 }}
+ replicas: {{ .Values.vminsert.replicaCount }}
+ template:
+ metadata:
+ {{- if .Values.vminsert.podAnnotations }}
+ annotations:
+{{ toYaml .Values.vminsert.podAnnotations | indent 8 }}
+ {{- end }}
+ labels:
+ {{- include "victoria-metrics.vminsert.labels" . | nindent 8 }}
+ spec:
+{{- if .Values.vminsert.priorityClassName }}
+ priorityClassName: "{{ .Values.vminsert.priorityClassName }}"
+{{- end }}
+ containers:
+ - name: {{ template "victoria-metrics.name" . }}-{{ .Values.vminsert.name }}
+ image: "{{ .Values.vminsert.image.repository }}:{{ .Values.vminsert.image.tag }}"
+ imagePullPolicy: "{{ .Values.vminsert.image.pullPolicy }}"
+ args:
+ {{- include "victoria-metrics.vminsert.vmstorage-pod-fqdn" . | nindent 12 }}
+ {{- range $key, $value := .Values.vminsert.extraArgs }}
+ - --{{ $key }}={{ $value }}
+ {{- end }}
+ ports:
+ - name: http
+ containerPort: 8480
+ readinessProbe:
+ httpGet:
+ path: /health
+ port: http
+ initialDelaySeconds: 5
+ periodSeconds: 15
+ resources:
+{{ toYaml .Values.vminsert.resources | indent 12 }}
+ {{- if .Values.imagePullSecrets }}
+ imagePullSecrets:
+ {{ toYaml .Values.imagePullSecrets | indent 2 }}
+ {{- end }}
+ {{- if .Values.vminsert.nodeSelector }}
+ nodeSelector:
+{{ toYaml .Values.vminsert.nodeSelector | indent 8 }}
+ {{- end }}
+ {{- if .Values.vminsert.securityContext }}
+ securityContext:
+{{ toYaml .Values.vminsert.securityContext | indent 8 }}
+ {{- end }}
+ {{- if .Values.vminsert.tolerations }}
+ tolerations:
+{{ toYaml .Values.vminsert.tolerations | indent 8 }}
+ {{- end }}
+ {{- if .Values.vminsert.affinity }}
+ affinity:
+{{ toYaml .Values.vminsert.affinity | indent 8 }}
+ {{- end }}
+{{- end }}
diff --git a/deployment/k8s/helm/victoria-metrics/templates/vminsert-service.yaml b/deployment/k8s/helm/victoria-metrics/templates/vminsert-service.yaml
new file mode 100644
index 000000000..48c36f1f4
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/templates/vminsert-service.yaml
@@ -0,0 +1,40 @@
+{{- if .Values.vminsert.enabled -}}
+apiVersion: v1
+kind: Service
+metadata:
+{{- if .Values.vminsert.service.annotations }}
+ annotations:
+{{ toYaml .Values.vminsert.service.annotations | indent 4}}
+{{- end }}
+ labels:
+ {{- include "victoria-metrics.vminsert.labels" . | nindent 4 }}
+{{- if .Values.vminsert.service.labels }}
+{{ toYaml .Values.vminsert.service.labels | indent 4}}
+{{- end }}
+ name: {{ template "victoria-metrics.vminsert.fullname" . }}
+spec:
+{{- if .Values.vminsert.service.clusterIP }}
+ clusterIP: {{ .Values.vminsert.service.clusterIP }}
+{{- end }}
+{{- if .Values.vminsert.service.externalIPs }}
+ externalIPs:
+{{ toYaml .Values.vminsert.service.externalIPs | indent 4 }}
+{{- end }}
+{{- if .Values.vminsert.service.loadBalancerIP }}
+ loadBalancerIP: {{ .Values.vminsert.service.loadBalancerIP }}
+{{- end }}
+{{- if .Values.vminsert.service.loadBalancerSourceRanges }}
+ loadBalancerSourceRanges:
+ {{- range $cidr := .Values.vminsert.service.loadBalancerSourceRanges }}
+ - {{ $cidr }}
+ {{- end }}
+{{- end }}
+ ports:
+ - name: http
+ port: {{ .Values.vminsert.service.servicePort }}
+ protocol: TCP
+ targetPort: http
+ selector:
+ {{- include "victoria-metrics.vminsert.matchLabels" . | nindent 4 }}
+ type: "{{ .Values.vminsert.service.type }}"
+{{- end }}
diff --git a/deployment/k8s/helm/victoria-metrics/templates/vmselect-deployment.yaml b/deployment/k8s/helm/victoria-metrics/templates/vmselect-deployment.yaml
new file mode 100644
index 000000000..2d987f4b7
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmselect-deployment.yaml
@@ -0,0 +1,72 @@
+{{- if .Values.vmselect.enabled -}}
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+ labels:
+ {{- include "victoria-metrics.vmselect.labels" . | nindent 4 }}
+ name: {{ template "victoria-metrics.vmselect.fullname" . }}
+spec:
+ selector:
+ matchLabels:
+ {{- include "victoria-metrics.vmselect.matchLabels" . | nindent 6 }}
+ replicas: {{ .Values.vmselect.replicaCount }}
+ template:
+ metadata:
+ {{- if .Values.vmselect.podAnnotations }}
+ annotations:
+{{ toYaml .Values.vmselect.podAnnotations | indent 8 }}
+ {{- end }}
+ labels:
+ {{- include "victoria-metrics.vmselect.labels" . | nindent 8 }}
+ spec:
+{{- if .Values.vmselect.priorityClassName }}
+ priorityClassName: "{{ .Values.vmselect.priorityClassName }}"
+{{- end }}
+ containers:
+ - name: {{ template "victoria-metrics.name" . }}-{{ .Values.vmselect.name }}
+ image: "{{ .Values.vmselect.image.repository }}:{{ .Values.vmselect.image.tag }}"
+ imagePullPolicy: "{{ .Values.vmselect.image.pullPolicy }}"
+ args:
+ - {{ printf "%s=%s" "--cacheDataPath" .Values.vmselect.cacheMountPath | quote}}
+ {{- include "victoria-metrics.vmselect.vmstorage-pod-fqdn" . | nindent 12 }}
+ {{- range $key, $value := .Values.vmselect.extraArgs }}
+ - --{{ $key }}={{ $value }}
+ {{- end }}
+ ports:
+ - name: http
+ containerPort: 8481
+ readinessProbe:
+ httpGet:
+ path: /health
+ port: http
+ initialDelaySeconds: 5
+ periodSeconds: 15
+ volumeMounts:
+ - mountPath: {{ .Values.vmselect.cacheMountPath }}
+ name: cache-volume
+ resources:
+{{ toYaml .Values.vmselect.resources | indent 12 }}
+ {{- if .Values.imagePullSecrets }}
+ imagePullSecrets:
+ {{ toYaml .Values.imagePullSecrets | indent 2 }}
+ {{- end }}
+ {{- if .Values.vmselect.nodeSelector }}
+ nodeSelector:
+{{ toYaml .Values.vmselect.nodeSelector | indent 8 }}
+ {{- end }}
+ {{- if .Values.vmselect.securityContext }}
+ securityContext:
+{{ toYaml .Values.vmselect.securityContext | indent 8 }}
+ {{- end }}
+ {{- if .Values.vmselect.tolerations }}
+ tolerations:
+{{ toYaml .Values.vmselect.tolerations | indent 8 }}
+ {{- end }}
+ {{- if .Values.vmselect.affinity }}
+ affinity:
+{{ toYaml .Values.vmselect.affinity | indent 8 }}
+ {{- end }}
+ volumes:
+ - name: cache-volume
+ emptyDir: {}
+{{- end }}
diff --git a/deployment/k8s/helm/victoria-metrics/templates/vmselect-service.yaml b/deployment/k8s/helm/victoria-metrics/templates/vmselect-service.yaml
new file mode 100644
index 000000000..f92120d44
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmselect-service.yaml
@@ -0,0 +1,40 @@
+{{- if .Values.vmselect.enabled -}}
+apiVersion: v1
+kind: Service
+metadata:
+{{- if .Values.vmselect.service.annotations }}
+ annotations:
+{{ toYaml .Values.vmselect.service.annotations | indent 4}}
+{{- end }}
+ labels:
+ {{- include "victoria-metrics.vmselect.labels" . | nindent 4 }}
+{{- if .Values.vmselect.service.labels }}
+{{ toYaml .Values.vmselect.service.labels | indent 4}}
+{{- end }}
+ name: {{ template "victoria-metrics.vmselect.fullname" . }}
+spec:
+{{- if .Values.vmselect.service.clusterIP }}
+ clusterIP: {{ .Values.vmselect.service.clusterIP }}
+{{- end }}
+{{- if .Values.vmselect.service.externalIPs }}
+ externalIPs:
+{{ toYaml .Values.vmselect.service.externalIPs | indent 4 }}
+{{- end }}
+{{- if .Values.vmselect.service.loadBalancerIP }}
+ loadBalancerIP: {{ .Values.vmselect.service.loadBalancerIP }}
+{{- end }}
+{{- if .Values.vmselect.service.loadBalancerSourceRanges }}
+ loadBalancerSourceRanges:
+ {{- range $cidr := .Values.vmselect.service.loadBalancerSourceRanges }}
+ - {{ $cidr }}
+ {{- end }}
+{{- end }}
+ ports:
+ - name: http
+ port: {{ .Values.vmselect.service.servicePort }}
+ protocol: TCP
+ targetPort: http
+ selector:
+ {{- include "victoria-metrics.vmselect.matchLabels" . | nindent 4 }}
+ type: "{{ .Values.vmselect.service.type }}"
+{{- end }}
diff --git a/deployment/k8s/helm/victoria-metrics/templates/vmstorage-service.yaml b/deployment/k8s/helm/victoria-metrics/templates/vmstorage-service.yaml
new file mode 100644
index 000000000..4b694907b
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmstorage-service.yaml
@@ -0,0 +1,32 @@
+{{- if .Values.vmstorage.enabled -}}
+apiVersion: v1
+kind: Service
+metadata:
+{{- if .Values.vmstorage.service.annotations }}
+ annotations:
+{{ toYaml .Values.vmstorage.service.annotations | indent 4 }}
+{{- end }}
+ labels:
+ {{- include "victoria-metrics.vmstorage.labels" . | nindent 4 }}
+{{- if .Values.vmstorage.service.labels }}
+{{ toYaml .Values.vmstorage.service.labels | indent 4 }}
+{{- end }}
+ name: {{ template "victoria-metrics.vmstorage.fullname" . }}
+spec:
+ clusterIP: None
+ ports:
+ - port: {{ .Values.vmstorage.service.servicePort }}
+ targetPort: http
+ protocol: TCP
+ name: http
+ - port: {{ .Values.vmstorage.service.vmselectPort }}
+ targetPort: vmselect
+ protocol: TCP
+ name: vmselect
+ - port: {{ .Values.vmstorage.service.vminsertPort }}
+ targetPort: vminsert
+ protocol: TCP
+ name: vminsert
+ selector:
+ {{- include "victoria-metrics.vmstorage.matchLabels" . | nindent 4 }}
+{{- end -}}
diff --git a/deployment/k8s/helm/victoria-metrics/templates/vmstorage-statefulset.yaml b/deployment/k8s/helm/victoria-metrics/templates/vmstorage-statefulset.yaml
new file mode 100644
index 000000000..367350358
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/templates/vmstorage-statefulset.yaml
@@ -0,0 +1,167 @@
+{{- if .Values.vmstorage.enabled -}}
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+{{- if .Values.vmstorage.annotations }}
+ annotations:
+{{ toYaml .Values.vmstorage.annotations | indent 4 }}
+{{- end }}
+ labels:
+ {{- include "victoria-metrics.vmstorage.labels" . | nindent 4 }}
+ name: {{ template "victoria-metrics.vmstorage.fullname" . }}
+spec:
+ serviceName: {{ template "victoria-metrics.vmstorage.fullname" . }}
+ selector:
+ matchLabels:
+ {{- include "victoria-metrics.vmstorage.matchLabels" . | nindent 6 }}
+ replicas: {{ .Values.vmstorage.replicaCount }}
+ podManagementPolicy: {{ .Values.vmstorage.podManagementPolicy }}
+ template:
+ metadata:
+ {{- if .Values.vmstorage.podAnnotations }}
+ annotations:
+{{ toYaml .Values.vmstorage.podAnnotations | indent 8 }}
+ {{- end }}
+ labels:
+ {{- include "victoria-metrics.vmstorage.labels" . | nindent 8 }}
+ spec:
+{{- if .Values.vmstorage.affinity }}
+ affinity:
+{{ toYaml .Values.vmstorage.affinity | indent 8 }}
+{{- end }}
+{{- if .Values.vmstorage.priorityClassName }}
+ priorityClassName: "{{ .Values.vmstorage.priorityClassName }}"
+{{- end }}
+{{- if .Values.vmstorage.schedulerName }}
+ schedulerName: "{{ .Values.vmstorage.schedulerName }}"
+{{- end }}
+ containers:
+ - name: {{ template "victoria-metrics.name" . }}-{{ .Values.vmstorage.name }}
+ image: "{{ .Values.vmstorage.image.repository }}:{{ .Values.vmstorage.image.tag }}"
+ imagePullPolicy: "{{ .Values.vmstorage.image.pullPolicy }}"
+ args:
+ - {{ printf "%s=%d" "--retentionPeriod" (int .Values.vmstorage.retentionPeriod) | quote}}
+ - {{ printf "%s=%s" "--storageDataPath" .Values.vmstorage.persistentVolume.mountPath | quote}}
+ - '--vminsertAddr=:8401'
+ - '--vmselectAddr=:8400'
+ - '--httpListenAddr=:8482'
+ {{- range $key, $value := .Values.vmstorage.extraArgs }}
+ - --{{ $key }}={{ $value }}
+ {{- end }}
+ ports:
+ - name: http
+ containerPort: 8482
+ - name: vmselect
+ containerPort: 8400
+ - name: vminsert
+ containerPort: 8401
+ readinessProbe:
+ httpGet:
+ path: /health
+ port: http
+ initialDelaySeconds: 5
+ periodSeconds: 15
+ timeoutSeconds: 5
+ livenessProbe:
+ tcpSocket:
+ port: http
+ initialDelaySeconds: 5
+ periodSeconds: 15
+ timeoutSeconds: 5
+ resources:
+{{ toYaml .Values.vmstorage.resources | indent 12 }}
+ volumeMounts:
+ - name: vmstorage-volume
+ mountPath: {{ .Values.vmstorage.persistentVolume.mountPath }}
+ subPath: {{ .Values.vmstorage.persistentVolume.subPath }}
+ {{- range .Values.vmstorage.extraHostPathMounts }}
+ - name: {{ .name }}
+ mountPath: {{ .mountPath }}
+ subPath: {{ .subPath }}
+ readOnly: {{ .readOnly }}
+ {{- end }}
+ {{- range .Values.vmstorage.extraConfigmapMounts }}
+ - name: {{ $.Values.vmstorage.name }}-{{ .name }}
+ mountPath: {{ .mountPath }}
+ subPath: {{ .subPath }}
+ readOnly: {{ .readOnly }}
+ {{- end }}
+ {{- range .Values.vmstorage.extraSecretMounts }}
+ - name: {{ .name }}
+ mountPath: {{ .mountPath }}
+ subPath: {{ .subPath }}
+ readOnly: {{ .readOnly }}
+ {{- end }}
+ {{- if .Values.imagePullSecrets }}
+ imagePullSecrets:
+ {{ toYaml .Values.imagePullSecrets | indent 2 }}
+ {{- end }}
+ {{- if .Values.vmstorage.nodeSelector }}
+ nodeSelector:
+{{ toYaml .Values.vmstorage.nodeSelector | indent 8 }}
+ {{- end }}
+ {{- if .Values.vmstorage.securityContext }}
+ securityContext:
+{{ toYaml .Values.vmstorage.securityContext | indent 8 }}
+ {{- end }}
+ {{- if .Values.vmstorage.tolerations }}
+ tolerations:
+{{ toYaml .Values.vmstorage.tolerations | indent 8 }}
+ {{- end }}
+ {{- if .Values.vmstorage.affinity }}
+ affinity:
+{{ toYaml .Values.vmstorage.affinity | indent 8 }}
+ {{- end }}
+ terminationGracePeriodSeconds: {{ .Values.vmstorage.terminationGracePeriodSeconds }}
+ volumes:
+ {{- range .Values.vmstorage.extraHostPathMounts }}
+ - name: {{ .name }}
+ hostPath:
+ path: {{ .hostPath }}
+ {{- end }}
+ {{- range .Values.vmstorage.extraConfigmapMounts }}
+ - name: {{ $.Values.vmstorage.name }}-{{ .name }}
+ configMap:
+ name: {{ .configMap }}
+ {{- end }}
+ {{- range .Values.vmstorage.extraConfigmapMounts }}
+ - name: {{ $.Values.vmstorage.name }}-{{ .name }}
+ configMap:
+ name: {{ .configMap }}
+ {{- end }}
+ {{- range .Values.vmstorage.extraSecretMounts }}
+ - name: {{ .name }}
+ secret:
+ secretName: {{ .secretName }}
+ {{- end }}
+ {{- range .Values.vmstorage.extraConfigmapMounts }}
+ - name: {{ .name }}
+ configMap:
+ name: {{ .configMap }}
+ {{- end }}
+{{- if .Values.vmstorage.persistentVolume.enabled }}
+ volumeClaimTemplates:
+ - metadata:
+ name: vmstorage-volume
+ {{- if .Values.vmstorage.persistentVolume.annotations }}
+ annotations:
+{{ toYaml .Values.vmstorage.persistentVolume.annotations | indent 10 }}
+ {{- end }}
+ spec:
+ accessModes:
+{{ toYaml .Values.vmstorage.persistentVolume.accessModes | indent 10 }}
+ resources:
+ requests:
+ storage: "{{ .Values.vmstorage.persistentVolume.size }}"
+ {{- if .Values.vmstorage.persistentVolume.storageClass }}
+ {{- if (eq "-" .Values.vmstorage.persistentVolume.storageClass) }}
+ storageClassName: ""
+ {{- else }}
+ storageClassName: "{{ .Values.vmstorage.persistentVolume.storageClass }}"
+ {{- end }}
+ {{- end }}
+{{- else }}
+ - name: vmstorage-volume
+ emptyDir: {}
+{{- end }}
+{{- end }}
diff --git a/deployment/k8s/helm/victoria-metrics/values.yaml b/deployment/k8s/helm/victoria-metrics/values.yaml
new file mode 100644
index 000000000..28fa09c93
--- /dev/null
+++ b/deployment/k8s/helm/victoria-metrics/values.yaml
@@ -0,0 +1,213 @@
+# Default values for victoria-metrics.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+## Tour k8s cluster domain suffix, uses for pods' FQDN
+## Ref: https://kubernetes.io/docs/tasks/administer-cluster/dns-custom-nameservers/
+##
+clusterDomainSuffix: cluster.local
+
+vmselect:
+ enabled: true
+ name: vmselect
+ image:
+ repository: valyala/vmselect
+ tag: heads-cluster-0-gca0d4847
+ pullPolicy: IfNotPresent
+ priorityClassName: ""
+ extraArgs: {}
+
+ ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
+ ##
+ tolerations: []
+ # - key: "key"
+ # operator: "Equal|Exists"
+ # value: "value"
+ # effect: "NoSchedule|PreferNoSchedule"
+
+ ## Ref: https://kubernetes.io/docs/user-guide/node-selection/
+ ##
+ nodeSelector: {}
+ podAnnotations:
+ prometheus.io/scrape: "true"
+ replicaCount: 2
+ resources: {}
+ # limits:
+ # cpu: 50m
+ # memory: 64Mi
+ # requests:
+ # cpu: 50m
+ # memory: 64Mi
+ securityContext: {}
+ ## Root folder for cache
+ ##
+ cacheMountPath: /cache
+ service:
+ annotations: {}
+ labels: {}
+ clusterIP: ""
+ ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
+ ##
+ externalIPs: []
+ loadBalancerIP: ""
+ loadBalancerSourceRanges: []
+ servicePort: 8481
+ type: ClusterIP
+
+vminsert:
+ enabled: true
+ name: vminsert
+ image:
+ repository: valyala/vminsert
+ tag: heads-cluster-0-gca0d4847
+ pullPolicy: IfNotPresent
+ priorityClassName: ""
+ extraArgs: {}
+
+ ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
+ ##
+ tolerations: []
+ # - key: "key"
+ # operator: "Equal|Exists"
+ # value: "value"
+ # effect: "NoSchedule|PreferNoSchedule"
+
+ ## Ref: https://kubernetes.io/docs/user-guide/node-selection/
+ ##
+ nodeSelector: {}
+ podAnnotations:
+ prometheus.io/scrape: "true"
+ replicaCount: 2
+ resources: {}
+ # limits:
+ # cpu: 50m
+ # memory: 64Mi
+ # requests:
+ # cpu: 50m
+ # memory: 64Mi
+ securityContext: {}
+ service:
+ annotations: {}
+ labels: {}
+ clusterIP: ""
+ ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
+ ##
+ externalIPs: []
+ loadBalancerIP: ""
+ loadBalancerSourceRanges: []
+ servicePort: 8480
+ type: ClusterIP
+
+vmstorage:
+ enabled: true
+ name: vmstorage
+ image:
+ repository: valyala/vmstorage
+ tag: heads-cluster-0-gca0d4847
+ pullPolicy: IfNotPresent
+ priorityClassName: ""
+ fullnameOverride:
+ ## Data retention period in month
+ ##
+ retentionPeriod: 1
+ ## Additional vmstorage container arguments
+ ##
+ extraArgs: {}
+
+ ## Additional vmstorage hostPath mounts
+ ##
+ extraHostPathMounts: []
+ # - name: certs-dir
+ # mountPath: /etc/kubernetes/certs
+ # subPath: ""
+ # hostPath: /etc/kubernetes/certs
+ # readOnly: true
+
+ extraConfigmapMounts: []
+ # - name: certs-configmap
+ # mountPath: /certs
+ # subPath: ""
+ # configMap: certs-configmap
+ # readOnly: true
+
+ ## Additional Vmstorage Secret mounts
+ # Defines additional mounts with secrets. Secrets must be manually created in the namespace.
+ extraSecretMounts: []
+ # - name: secret-files
+ # mountPath: /etc/secrets
+ # subPath: ""
+ # secretName: secret-files
+ # readOnly: true
+
+ ## Node tolerations for server scheduling to nodes with taints
+ ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
+ ##
+ tolerations: []
+ # - key: "key"
+ # operator: "Equal|Exists"
+ # value: "value"
+ # effect: "NoSchedule|PreferNoSchedule"
+
+ ## Ref: https://kubernetes.io/docs/user-guide/node-selection/
+ ##
+ nodeSelector: {}
+
+ ## Pod affinity
+ ##
+ affinity: {}
+
+ ## Use an alternate scheduler, e.g. "stork".
+ ## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/
+ ##
+ # schedulerName:
+
+ persistentVolume:
+ ## If true, vmstorage will create/use a Persistent Volume Claim
+ ## If false, use emptyDir
+ ##
+ enabled: true
+
+ ## Must match those of existing PV or dynamic provisioner
+ ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/
+ ##
+ accessModes:
+ - ReadWriteOnce
+ annotations: {}
+
+ ## Requires vmstorage.persistentVolume.enabled: true
+ ## If defined, PVC must be created manually before volume will be bound
+ existingClaim: ""
+
+ ## Vmstorage data Persistent Volume mount root path
+ ##
+ mountPath: /storage
+ size: 8Gi
+ subPath: ""
+
+
+ podAnnotations: {
+ prometheus.io/scrape: "true"
+ }
+ replicaCount: 2
+ podManagementPolicy: OrderedReady
+
+ ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
+ ##
+ resources: {}
+ # limits:
+ # cpu: 500m
+ # memory: 512Mi
+ # requests:
+ # cpu: 500m
+ # memory: 512Mi
+
+ ## Security context to be added to server pods
+ ##
+ securityContext: {}
+ service:
+ annotations: {}
+ labels: {}
+ servicePort: 8482
+ vmselectPort: 8400
+ vminsertPort: 8401
+ terminationGracePeriodSeconds: 60
diff --git a/go.mod b/go.mod
index cc0a71ebb..2272042bf 100644
--- a/go.mod
+++ b/go.mod
@@ -5,6 +5,7 @@ require (
github.com/VictoriaMetrics/metrics v1.4.0
github.com/cespare/xxhash/v2 v2.0.1-0.20190104013014-3767db7a7e18
github.com/golang/snappy v0.0.1
+ github.com/lithammer/go-jump-consistent-hash v1.0.0
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/valyala/fastjson v1.4.1
github.com/valyala/gozstd v1.5.0
diff --git a/go.sum b/go.sum
index 934851e73..8a6b282ed 100644
--- a/go.sum
+++ b/go.sum
@@ -20,6 +20,8 @@ github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/cpuid v0.0.0-20180405133222-e7e905edc00e/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/lithammer/go-jump-consistent-hash v1.0.0 h1:TmRnbmkUcGJzfiCXhy/D1FFtGLYEQfGWawHffhsTevI=
+github.com/lithammer/go-jump-consistent-hash v1.0.0/go.mod h1:Snz99O1UkmvgsOV76Jm7Zu4sokENziqvUCbPztFABIU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
diff --git a/lib/auth/auth.go b/lib/auth/auth.go
new file mode 100644
index 000000000..7056eb0b4
--- /dev/null
+++ b/lib/auth/auth.go
@@ -0,0 +1,35 @@
+package auth
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+// Token contains settings for request processing
+type Token struct {
+ ProjectID uint32
+ AccountID uint32
+}
+
+// NewToken returns new Token for the given authToken
+func NewToken(authToken string) (*Token, error) {
+ tmp := strings.Split(authToken, ":")
+ if len(tmp) > 2 {
+ return nil, fmt.Errorf("unexpected number of items in authToken %q; got %d; want 1 or 2", authToken, len(tmp))
+ }
+ var at Token
+ accountID, err := strconv.Atoi(tmp[0])
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse accountID from %q: %s", tmp[0], err)
+ }
+ at.AccountID = uint32(accountID)
+ if len(tmp) > 1 {
+ projectID, err := strconv.Atoi(tmp[1])
+ if err != nil {
+ return nil, fmt.Errorf("cannot parse projectID from %q: %s", tmp[1], err)
+ }
+ at.ProjectID = uint32(projectID)
+ }
+ return &at, nil
+}
diff --git a/lib/consts/consts.go b/lib/consts/consts.go
new file mode 100644
index 000000000..3b16e56f9
--- /dev/null
+++ b/lib/consts/consts.go
@@ -0,0 +1,4 @@
+package consts
+
+// MaxInsertPacketSize is the maximum packet size in bytes vminsert may send to vmstorage.
+const MaxInsertPacketSize = 100 * 1024 * 1024
diff --git a/lib/handshake/buffered_conn.go b/lib/handshake/buffered_conn.go
new file mode 100644
index 000000000..774adf036
--- /dev/null
+++ b/lib/handshake/buffered_conn.go
@@ -0,0 +1,80 @@
+package handshake
+
+import (
+ "bufio"
+ "io"
+ "net"
+
+ "github.com/valyala/gozstd"
+)
+
+type bufferedWriter interface {
+ Write(p []byte) (int, error)
+ Flush() error
+}
+
+// BufferedConn is a net.Conn with Flush suport.
+type BufferedConn struct {
+ net.Conn
+
+ br io.Reader
+ bw bufferedWriter
+}
+
+const bufferSize = 64 * 1024
+
+// newBufferedConn returns buffered connection with the given compression level.
+func newBufferedConn(c net.Conn, compressionLevel int, isReadCompressed bool) *BufferedConn {
+ bc := &BufferedConn{
+ Conn: c,
+ }
+ if compressionLevel <= 0 {
+ bc.bw = bufio.NewWriterSize(c, bufferSize)
+ } else {
+ bc.bw = gozstd.NewWriterLevel(c, compressionLevel)
+ }
+ if !isReadCompressed {
+ bc.br = bufio.NewReaderSize(c, bufferSize)
+ } else {
+ bc.br = gozstd.NewReader(c)
+ }
+ return bc
+}
+
+// Read reads up to len(p) from bc to p.
+func (bc *BufferedConn) Read(p []byte) (int, error) {
+ return bc.br.Read(p)
+}
+
+// Write writes p to bc.
+//
+// Do not forget to call Flush if needed.
+func (bc *BufferedConn) Write(p []byte) (int, error) {
+ return bc.bw.Write(p)
+}
+
+// Close closes bc.
+func (bc *BufferedConn) Close() error {
+ // Close the Conn at first. It is expected that all the required data
+ // is already flushed to the Conn.
+ err := bc.Conn.Close()
+ bc.Conn = nil
+
+ if zr, ok := bc.br.(*gozstd.Reader); ok {
+ zr.Release()
+ }
+ bc.br = nil
+
+ if zw, ok := bc.bw.(*gozstd.Writer); ok {
+ // Do not call zw.Close(), since we already closed the underlying conn.
+ zw.Release()
+ }
+ bc.bw = nil
+
+ return err
+}
+
+// Flush flushes internal write buffers to the underlying conn.
+func (bc *BufferedConn) Flush() error {
+ return bc.bw.Flush()
+}
diff --git a/lib/handshake/handshake.go b/lib/handshake/handshake.go
new file mode 100644
index 000000000..0a71bdd1a
--- /dev/null
+++ b/lib/handshake/handshake.go
@@ -0,0 +1,170 @@
+package handshake
+
+import (
+ "fmt"
+ "io"
+ "net"
+ "time"
+)
+
+const (
+ vminsertHello = "vminsert.01"
+ vmselectHello = "vmselect.01"
+
+ successResponse = "ok"
+)
+
+// Func must perform handshake on the given c using the given compressionLevel.
+//
+// It must return BufferedConn wrapper for c on successful handshake.
+type Func func(c net.Conn, compressionLevel int) (*BufferedConn, error)
+
+// VMInsertClient performs client-side handshake for vminsert protocol.
+//
+// compressionLevel is the level used for compression of the data sent
+// to the server.
+// compressionLevel <= 0 means 'no compression'
+func VMInsertClient(c net.Conn, compressionLevel int) (*BufferedConn, error) {
+ return genericClient(c, vminsertHello, compressionLevel)
+}
+
+// VMInsertServer performs server-side handshake for vminsert protocol.
+//
+// compressionLevel is the level used for compression of the data sent
+// to the client.
+// compressionLevel <= 0 means 'no compression'
+func VMInsertServer(c net.Conn, compressionLevel int) (*BufferedConn, error) {
+ return genericServer(c, vminsertHello, compressionLevel)
+}
+
+// VMSelectClient performs client-side handshake for vmselect protocol.
+//
+// compressionLevel is the level used for compression of the data sent
+// to the server.
+// compressionLevel <= 0 means 'no compression'
+func VMSelectClient(c net.Conn, compressionLevel int) (*BufferedConn, error) {
+ return genericClient(c, vmselectHello, compressionLevel)
+}
+
+// VMSelectServer performs server-side handshake for vmselect protocol.
+//
+// compressionLevel is the level used for compression of the data sent
+// to the client.
+// compressionLevel <= 0 means 'no compression'
+func VMSelectServer(c net.Conn, compressionLevel int) (*BufferedConn, error) {
+ return genericServer(c, vmselectHello, compressionLevel)
+}
+
+func genericServer(c net.Conn, msg string, compressionLevel int) (*BufferedConn, error) {
+ if err := readMessage(c, msg); err != nil {
+ return nil, fmt.Errorf("cannot read hello: %s", err)
+ }
+ if err := writeMessage(c, successResponse); err != nil {
+ return nil, fmt.Errorf("cannot write success response on hello: %s", err)
+ }
+ isRemoteCompressed, err := readIsCompressed(c)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read isCompressed flag: %s", err)
+ }
+ if err := writeMessage(c, successResponse); err != nil {
+ return nil, fmt.Errorf("cannot write success response on isCompressed: %s", err)
+ }
+ if err := writeIsCompressed(c, compressionLevel > 0); err != nil {
+ return nil, fmt.Errorf("cannot write isCompressed flag: %s", err)
+ }
+ if err := readMessage(c, successResponse); err != nil {
+ return nil, fmt.Errorf("cannot read success response on isCompressed: %s", err)
+ }
+ bc := newBufferedConn(c, compressionLevel, isRemoteCompressed)
+ return bc, nil
+}
+
+func genericClient(c net.Conn, msg string, compressionLevel int) (*BufferedConn, error) {
+ if err := writeMessage(c, msg); err != nil {
+ return nil, fmt.Errorf("cannot write hello: %s", err)
+ }
+ if err := readMessage(c, successResponse); err != nil {
+ return nil, fmt.Errorf("cannot read success response after sending hello: %s", err)
+ }
+ if err := writeIsCompressed(c, compressionLevel > 0); err != nil {
+ return nil, fmt.Errorf("cannot write isCompressed flag: %s", err)
+ }
+ if err := readMessage(c, successResponse); err != nil {
+ return nil, fmt.Errorf("cannot read success response on isCompressed: %s", err)
+ }
+ isRemoteCompressed, err := readIsCompressed(c)
+ if err != nil {
+ return nil, fmt.Errorf("cannot read isCompressed flag: %s", err)
+ }
+ if err := writeMessage(c, successResponse); err != nil {
+ return nil, fmt.Errorf("cannot write success response on isCompressed: %s", err)
+ }
+ bc := newBufferedConn(c, compressionLevel, isRemoteCompressed)
+ return bc, nil
+}
+
+func writeIsCompressed(c net.Conn, isCompressed bool) error {
+ var buf [1]byte
+ if isCompressed {
+ buf[0] = 1
+ }
+ return writeMessage(c, string(buf[:]))
+}
+
+func readIsCompressed(c net.Conn) (bool, error) {
+ buf, err := readData(c, 1)
+ if err != nil {
+ return false, err
+ }
+ isCompressed := (buf[0] != 0)
+ return isCompressed, nil
+}
+
+func writeMessage(c net.Conn, msg string) error {
+ if err := c.SetWriteDeadline(time.Now().Add(time.Second)); err != nil {
+ return fmt.Errorf("cannot set write deadline: %s", err)
+ }
+ if _, err := io.WriteString(c, msg); err != nil {
+ return fmt.Errorf("cannot write %q to server: %s", msg, err)
+ }
+ if fc, ok := c.(flusher); ok {
+ if err := fc.Flush(); err != nil {
+ return fmt.Errorf("cannot flush %q to server: %s", msg, err)
+ }
+ }
+ if err := c.SetWriteDeadline(zeroTime); err != nil {
+ return fmt.Errorf("cannot reset write deadline: %s", err)
+ }
+ return nil
+}
+
+type flusher interface {
+ Flush() error
+}
+
+func readMessage(c net.Conn, msg string) error {
+ buf, err := readData(c, len(msg))
+ if err != nil {
+ return err
+ }
+ if string(buf) != msg {
+ return fmt.Errorf("unexpected message obtained; got %q; want %q", buf, msg)
+ }
+ return nil
+}
+
+func readData(c net.Conn, dataLen int) ([]byte, error) {
+ if err := c.SetReadDeadline(time.Now().Add(time.Second)); err != nil {
+ return nil, fmt.Errorf("cannot set read deadline: %s", err)
+ }
+ data := make([]byte, dataLen)
+ if _, err := io.ReadFull(c, data); err != nil {
+ return nil, fmt.Errorf("cannot read message with size %d: %s", dataLen, err)
+ }
+ if err := c.SetReadDeadline(zeroTime); err != nil {
+ return nil, fmt.Errorf("cannot reset read deadline: %s", err)
+ }
+ return data, nil
+}
+
+var zeroTime time.Time
diff --git a/lib/handshake/handshake_test.go b/lib/handshake/handshake_test.go
new file mode 100644
index 000000000..4d4d9b229
--- /dev/null
+++ b/lib/handshake/handshake_test.go
@@ -0,0 +1,61 @@
+package handshake
+
+import (
+ "fmt"
+ "net"
+ "testing"
+ "time"
+)
+
+func TestVMInsertHandshake(t *testing.T) {
+ testHandshake(t, VMInsertClient, VMInsertServer)
+}
+
+func TestVMSelectHandshake(t *testing.T) {
+ testHandshake(t, VMSelectClient, VMSelectServer)
+}
+
+func testHandshake(t *testing.T, clientFunc, serverFunc Func) {
+ t.Helper()
+
+ c, s := net.Pipe()
+ ch := make(chan error, 1)
+ go func() {
+ bcs, err := serverFunc(s, 3)
+ if err != nil {
+ ch <- fmt.Errorf("error on outer handshake: %s", err)
+ return
+ }
+ bcc, err := clientFunc(bcs, 3)
+ if err != nil {
+ ch <- fmt.Errorf("error on inner handshake: %s", err)
+ return
+ }
+ if bcc == nil {
+ ch <- fmt.Errorf("expecting non-nil conn")
+ return
+ }
+ ch <- nil
+ }()
+
+ bcc, err := clientFunc(c, 0)
+ if err != nil {
+ t.Fatalf("error on outer handshake: %s", err)
+ }
+ bcs, err := serverFunc(bcc, 0)
+ if err != nil {
+ t.Fatalf("error on inner handshake: %s", err)
+ }
+ if bcs == nil {
+ t.Fatalf("expecting non-nil conn")
+ }
+
+ select {
+ case <-time.After(5 * time.Second):
+ t.Fatalf("timeout")
+ case err := <-ch:
+ if err != nil {
+ t.Fatalf("unexpected error on the server side: %s", err)
+ }
+ }
+}
diff --git a/lib/httpserver/httpserver.go b/lib/httpserver/httpserver.go
index c03ac83d3..d8da38f88 100644
--- a/lib/httpserver/httpserver.go
+++ b/lib/httpserver/httpserver.go
@@ -5,7 +5,6 @@ import (
"compress/gzip"
"context"
"crypto/tls"
- "flag"
"fmt"
"io"
"net"
@@ -22,17 +21,6 @@ import (
"github.com/VictoriaMetrics/metrics"
)
-var (
- tlsEnable = flag.Bool("tls", false, "Whether to enable TLS (aka HTTPS) for incoming requests. tlsCertFile and tlsKeyFile must be set if tls=true")
- tlsCertFile = flag.String("tlsCertFile", "", "Path to file with TLS certificate. Used only if tls=true. Prefer ECDSA certs instead of RSA certs, since RSA certs are slow")
- tlsKeyFile = flag.String("tlsKeyFile", "", "Path to file with TLS key. Used only if tls=true")
-
- httpAuthUsername = flag.String("httpAuth.username", "", "Username for HTTP Basic Auth. The authentication is disabled if empty. See also -httpAuth.password")
- httpAuthPassword = flag.String("httpAuth.password", "", "Password for HTTP Basic Auth. The authentication is disabled -httpAuth.username is empty")
- metricsAuthKey = flag.String("metricsAuthKey", "", "Auth key for /metrics. It overrides httpAuth settings")
- pprofAuthKey = flag.String("pprofAuthKey", "", "Auth key for /debug/pprof. It overrides httpAuth settings")
-)
-
var (
servers = make(map[string]*http.Server)
serversLock sync.Mutex
@@ -52,29 +40,13 @@ type RequestHandler func(w http.ResponseWriter, r *http.Request) bool
// charges a lot for the egress traffic. The compression may be disabled
// by calling DisableResponseCompression before writing the first byte to w.
func Serve(addr string, rh RequestHandler) {
- scheme := "http"
- if *tlsEnable {
- scheme = "https"
- }
- logger.Infof("starting http server at %s://%s/", scheme, addr)
- logger.Infof("pprof handlers are exposed at %s://%s/debug/pprof/", scheme, addr)
- lnTmp, err := netutil.NewTCPListener(scheme, addr)
+ logger.Infof("starting http server at http://%s/", addr)
+ logger.Infof("pprof handlers are exposed at http://%s/debug/pprof/", addr)
+ ln, err := netutil.NewTCPListener("http", addr)
if err != nil {
- logger.Fatalf("cannot start http server at %s: %s", addr, err)
- }
- setNetworkTimeouts(lnTmp)
- ln := net.Listener(lnTmp)
-
- if *tlsEnable {
- cert, err := tls.LoadX509KeyPair(*tlsCertFile, *tlsKeyFile)
- if err != nil {
- logger.Fatalf("cannot load TLS cert from tlsCertFile=%q, tlsKeyFile=%q: %s", *tlsCertFile, *tlsKeyFile, err)
- }
- cfg := &tls.Config{
- Certificates: []tls.Certificate{cert},
- }
- ln = tls.NewListener(ln, cfg)
+ logger.Panicf("FATAL: cannot start http server at %s: %s", addr, err)
}
+ setNetworkTimeouts(ln)
serveWithListener(addr, ln, rh)
}
@@ -151,9 +123,6 @@ var metricsHandlerDuration = metrics.NewSummary(`vm_http_request_duration_second
func handlerWrapper(w http.ResponseWriter, r *http.Request, rh RequestHandler) {
requestsTotal.Inc()
- if !checkAuth(w, r) {
- return
- }
switch r.URL.Path {
case "/health":
w.Header().Set("Content-Type", "text/plain")
@@ -177,7 +146,6 @@ func handlerWrapper(w http.ResponseWriter, r *http.Request, rh RequestHandler) {
pprofHandler(r.URL.Path[len("/debug/pprof/"):], w, r)
return
}
-
if rh(w, r) {
return
}
@@ -188,41 +156,6 @@ func handlerWrapper(w http.ResponseWriter, r *http.Request, rh RequestHandler) {
}
}
-func checkAuth(w http.ResponseWriter, r *http.Request) bool {
- path := r.URL.Path
- if path == "/metrics" && len(*metricsAuthKey) > 0 {
- authKey := r.FormValue("authKey")
- if *metricsAuthKey == authKey {
- return true
- }
- http.Error(w, "The provided authKey doesn't match -metricsAuthKey", http.StatusUnauthorized)
- return false
- }
- if strings.HasPrefix(path, "/debug/pprof/") && len(*pprofAuthKey) > 0 {
- authKey := r.FormValue("authKey")
- if *pprofAuthKey == authKey {
- return true
- }
- http.Error(w, "The provided authKey doesn't match -pprofAuthKey", http.StatusUnauthorized)
- return false
- }
- return checkBasicAuth(w, r)
-}
-
-func checkBasicAuth(w http.ResponseWriter, r *http.Request) bool {
- if len(*httpAuthUsername) == 0 {
- // HTTP Basic Auth is disabled.
- return true
- }
- username, password, ok := r.BasicAuth()
- if ok && username == *httpAuthUsername && password == *httpAuthPassword {
- return true
- }
- w.Header().Set("WWW-Authenticate", `Basic realm="VictoriaMetrics"`)
- http.Error(w, "", http.StatusUnauthorized)
- return false
-}
-
func maybeGzipResponseWriter(w http.ResponseWriter, r *http.Request) http.ResponseWriter {
ae := r.Header.Get("Accept-Encoding")
if ae == "" {
diff --git a/lib/httpserver/path.go b/lib/httpserver/path.go
new file mode 100644
index 000000000..833c0a6b4
--- /dev/null
+++ b/lib/httpserver/path.go
@@ -0,0 +1,64 @@
+package httpserver
+
+import (
+ "fmt"
+ "strings"
+)
+
+// Path contains the following path structure:
+// /{prefix}/{authToken}/{suffix}
+//
+// It is compatible with SaaS version.
+type Path struct {
+ Prefix string
+ AuthToken string
+ Suffix string
+}
+
+// ParsePath parses the given path.
+func ParsePath(path string) (*Path, error) {
+ // The path must have the following form:
+ // /{prefix}/{authToken}/{suffix}
+ //
+ // - prefix must contain `select`, `insert` or `delete`.
+ // - authToken contains `accountID[:projectID]`, where projectID is optional.
+ // - suffix contains arbitrary suffix.
+ //
+ // prefix must be used for the routing to the appropriate service
+ // in the cluster - either vminsert or vmselect.
+ s := skipPrefixSlashes(path)
+ n := strings.IndexByte(s, '/')
+ if n < 0 {
+ return nil, fmt.Errorf("cannot find {prefix}")
+ }
+ prefix := s[:n]
+
+ s = skipPrefixSlashes(s[n+1:])
+ n = strings.IndexByte(s, '/')
+ if n < 0 {
+ return nil, fmt.Errorf("cannot find {authToken}")
+ }
+ authToken := s[:n]
+
+ s = skipPrefixSlashes(s[n+1:])
+
+ // Substitute double slashes with single slashes in the path, since such slashes
+ // may appear due improper copy-pasting of the url.
+ suffix := strings.Replace(s, "//", "/", -1)
+
+ p := &Path{
+ Prefix: prefix,
+ AuthToken: authToken,
+ Suffix: suffix,
+ }
+ return p, nil
+}
+
+// skipPrefixSlashes remove double slashes which may appear due
+// improper copy-pasting of the url
+func skipPrefixSlashes(s string) string {
+ for len(s) > 0 && s[0] == '/' {
+ s = s[1:]
+ }
+ return s
+}
diff --git a/lib/netutil/conn_pool.go b/lib/netutil/conn_pool.go
new file mode 100644
index 000000000..aa3c63e13
--- /dev/null
+++ b/lib/netutil/conn_pool.go
@@ -0,0 +1,76 @@
+package netutil
+
+import (
+ "fmt"
+ "sync"
+
+ "github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
+)
+
+// ConnPool is a connection pool with ZSTD-compressed connections.
+type ConnPool struct {
+ mu sync.Mutex
+ d *TCPDialer
+
+ name string
+ handshakeFunc handshake.Func
+ compressionLevel int
+
+ conns []*handshake.BufferedConn
+}
+
+// NewConnPool creates a new connection pool for the given addr.
+//
+// Name is used in exported metrics.
+// handshakeFunc is used for handshaking after the connection establishing.
+// The compression is disabled if compressionLevel <= 0.
+func NewConnPool(name, addr string, handshakeFunc handshake.Func, compressionLevel int) *ConnPool {
+ return &ConnPool{
+ d: NewTCPDialer(name, addr),
+
+ name: name,
+ handshakeFunc: handshakeFunc,
+ compressionLevel: compressionLevel,
+ }
+}
+
+// Addr returns the address where connections are established.
+func (cp *ConnPool) Addr() string {
+ return cp.d.addr
+}
+
+// Get returns free connection from the pool.
+func (cp *ConnPool) Get() (*handshake.BufferedConn, error) {
+ var bc *handshake.BufferedConn
+ cp.mu.Lock()
+ if len(cp.conns) > 0 {
+ bc = cp.conns[len(cp.conns)-1]
+ cp.conns[len(cp.conns)-1] = nil
+ cp.conns = cp.conns[:len(cp.conns)-1]
+ }
+ cp.mu.Unlock()
+ if bc != nil {
+ return bc, nil
+ }
+
+ // Pool is empty. Create new connection.
+ c, err := cp.d.Dial()
+ if err != nil {
+ return nil, fmt.Errorf("cannot dial %s: %s", cp.d.Addr(), err)
+ }
+ if bc, err = cp.handshakeFunc(c, cp.compressionLevel); err != nil {
+ err = fmt.Errorf("cannot perform %q handshake with server %q: %s", cp.name, cp.d.Addr(), err)
+ _ = c.Close()
+ return nil, err
+ }
+ return bc, nil
+}
+
+// Put puts bc back to the pool.
+//
+// Do not put broken and closed connections to the pool!
+func (cp *ConnPool) Put(bc *handshake.BufferedConn) {
+ cp.mu.Lock()
+ cp.conns = append(cp.conns, bc)
+ cp.mu.Unlock()
+}
diff --git a/lib/netutil/tcpdialer.go b/lib/netutil/tcpdialer.go
new file mode 100644
index 000000000..ad3bfdc97
--- /dev/null
+++ b/lib/netutil/tcpdialer.go
@@ -0,0 +1,64 @@
+package netutil
+
+import (
+ "fmt"
+ "net"
+ "time"
+
+ "github.com/VictoriaMetrics/metrics"
+)
+
+// NewTCPDialer returns new dialer for dialing the given addr.
+//
+// The name is used in metric tags for the returned dialer.
+// The name must be unique among dialers.
+func NewTCPDialer(name, addr string) *TCPDialer {
+ d := &TCPDialer{
+ d: &net.Dialer{
+ Timeout: time.Second,
+ KeepAlive: time.Second,
+ },
+
+ addr: addr,
+
+ dials: metrics.NewCounter(fmt.Sprintf(`vm_tcpdialer_dials_total{name=%q, addr=%q}`, name, addr)),
+ dialErrors: metrics.NewCounter(fmt.Sprintf(`vm_tcpdialer_errors_total{name=%q, addr=%q, type="dial"}`, name, addr)),
+ }
+ d.connMetrics.init("vm_tcpdialer", name, addr)
+ return d
+}
+
+// TCPDialer is used for dialing the addr passed to NewTCPDialer.
+//
+// It also gathers various stats for dialed connections.
+type TCPDialer struct {
+ d *net.Dialer
+
+ addr string
+
+ dials *metrics.Counter
+ dialErrors *metrics.Counter
+
+ connMetrics
+}
+
+// Dial dials the addr passed to NewTCPDialer.
+func (d *TCPDialer) Dial() (net.Conn, error) {
+ d.dials.Inc()
+ c, err := d.d.Dial("tcp4", d.addr)
+ if err != nil {
+ d.dialErrors.Inc()
+ return nil, err
+ }
+ d.conns.Inc()
+ sc := &statConn{
+ Conn: c,
+ cm: &d.connMetrics,
+ }
+ return sc, err
+}
+
+// Addr returns the address the dialer dials to.
+func (d *TCPDialer) Addr() string {
+ return d.addr
+}
diff --git a/lib/storage/block_header_test.go b/lib/storage/block_header_test.go
index ea5cdd669..cba1b992d 100644
--- a/lib/storage/block_header_test.go
+++ b/lib/storage/block_header_test.go
@@ -11,7 +11,7 @@ func TestMarshaledBlockHeaderSize(t *testing.T) {
// This test makes sure marshaled format isn't changed.
// If this test breaks then the storage format has been changed,
// so it may become incompatible with the previously written data.
- expectedSize := 81
+ expectedSize := 89
if marshaledBlockHeaderSize != expectedSize {
t.Fatalf("unexpected marshaledBlockHeaderSize; got %d; want %d", marshaledBlockHeaderSize, expectedSize)
}
diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go
index c1150a1e1..967539cc3 100644
--- a/lib/storage/index_db.go
+++ b/lib/storage/index_db.go
@@ -60,6 +60,9 @@ type indexDB struct {
// Cache for fast MetricID -> MetricName lookup.
metricNameCache *fastcache.Cache
+ tagCachePrefixes map[accountProjectKey]uint64
+ tagCachePrefixesLock sync.RWMutex
+
indexSearchPool sync.Pool
// An inmemory map[uint64]struct{} of deleted metricIDs.
@@ -78,6 +81,12 @@ type indexDB struct {
mustDrop uint64
}
+// accountProjectKey is used for maps keyed by (AccountID, ProjectID).
+type accountProjectKey struct {
+ AccountID uint32
+ ProjectID uint32
+}
+
// openIndexDB opens index db from the given path with the given caches.
func openIndexDB(path string, metricIDCache, metricNameCache *fastcache.Cache) (*indexDB, error) {
tb, err := mergeset.OpenTable(path)
@@ -99,6 +108,8 @@ func openIndexDB(path string, metricIDCache, metricNameCache *fastcache.Cache) (
tagCache: tagCache,
metricIDCache: metricIDCache,
metricNameCache: metricNameCache,
+
+ tagCachePrefixes: make(map[accountProjectKey]uint64),
}
is := db.getIndexSearch()
@@ -240,6 +251,10 @@ func (db *indexDB) putToTagCache(tsids []TSID, key []byte) {
}
func (db *indexDB) getFromMetricIDCache(dst *TSID, metricID uint64) error {
+ // There is no need in prefixing the key with (accountID, projectID),
+ // since metricID is globally unique across all (accountID, projectID) values.
+ // See getUniqueUint64.
+
// There is no need in checking for deleted metricIDs here, since they
// must be checked by the caller.
buf := (*[unsafe.Sizeof(*dst)]byte)(unsafe.Pointer(dst))
@@ -262,6 +277,10 @@ func (db *indexDB) putToMetricIDCache(metricID uint64, tsid *TSID) {
}
func (db *indexDB) getMetricNameFromCache(dst []byte, metricID uint64) []byte {
+ // There is no need in prefixing the key with (accountID, projectID),
+ // since metricID is globally unique across all (accountID, projectID) values.
+ // See getUniqueUint64.
+
// There is no need in checking for deleted metricIDs here, since they
// must be checked by the caller.
key := (*[unsafe.Sizeof(metricID)]byte)(unsafe.Pointer(&metricID))
@@ -273,13 +292,28 @@ func (db *indexDB) putMetricNameToCache(metricID uint64, metricName []byte) {
db.metricNameCache.Set(key[:], metricName)
}
-func marshalTagFiltersKey(dst []byte, tfss []*TagFilters) []byte {
- prefix := atomic.LoadUint64(&tagFiltersKeyGen)
+func (db *indexDB) marshalTagFiltersKey(dst []byte, tfss []*TagFilters) []byte {
+ if len(tfss) == 0 {
+ return nil
+ }
+ k := accountProjectKey{
+ AccountID: tfss[0].accountID,
+ ProjectID: tfss[0].projectID,
+ }
+ db.tagCachePrefixesLock.RLock()
+ prefix := db.tagCachePrefixes[k]
+ db.tagCachePrefixesLock.RUnlock()
+ if prefix == 0 {
+ // Create missing prefix.
+ // It is if multiple concurrent goroutines call invalidateTagCache
+ // for the same (accountID, projectID).
+ prefix = db.invalidateTagCache(k.AccountID, k.ProjectID)
+ }
dst = encoding.MarshalUint64(dst, prefix)
for _, tfs := range tfss {
dst = append(dst, 0) // separator between tfs groups.
for i := range tfs.tfs {
- dst = tfs.tfs[i].Marshal(dst)
+ dst = tfs.tfs[i].MarshalNoAccountIDProjectID(dst)
}
}
return dst
@@ -317,13 +351,21 @@ func unmarshalTSIDs(dst []TSID, src []byte) ([]TSID, error) {
return dst, nil
}
-func (db *indexDB) invalidateTagCache() {
+func (db *indexDB) invalidateTagCache(accountID, projectID uint32) uint64 {
// This function must be fast, since it is called each
// time new timeseries is added.
- atomic.AddUint64(&tagFiltersKeyGen, 1)
+ prefix := atomic.AddUint64(&tagCacheKeyPrefix, 1)
+ k := accountProjectKey{
+ AccountID: accountID,
+ ProjectID: projectID,
+ }
+ db.tagCachePrefixesLock.Lock()
+ db.tagCachePrefixes[k] = prefix
+ db.tagCachePrefixesLock.Unlock()
+ return prefix
}
-var tagFiltersKeyGen uint64
+var tagCacheKeyPrefix uint64
// getTSIDByNameNoCreate fills the dst with TSID for the given metricName.
//
@@ -425,8 +467,9 @@ func (db *indexDB) createTSIDByName(dst *TSID, metricName []byte) error {
return fmt.Errorf("cannot create indexes: %s", err)
}
- // Invalidate tag cache, since it doesn't contain tags for the created mn -> TSID mapping.
- db.invalidateTagCache()
+ // Invalidate tag cache for the given (AccountID, ProjectID), since
+ // it doesn't contain tags for the created mn -> TSID mapping.
+ _ = db.invalidateTagCache(mn.AccountID, mn.ProjectID)
return nil
}
@@ -449,6 +492,8 @@ func (db *indexDB) generateTSID(dst *TSID, metricName []byte, mn *MetricName) er
// The TSID wan't found in the external storage.
// Generate it locally.
+ dst.AccountID = mn.AccountID
+ dst.ProjectID = mn.ProjectID
dst.MetricGroupID = xxhash.Sum64(mn.MetricGroup)
if len(mn.Tags) > 0 {
dst.JobID = uint32(xxhash.Sum64(mn.Tags[0].Value))
@@ -474,19 +519,19 @@ func (db *indexDB) createIndexes(tsid *TSID, mn *MetricName) error {
items.Next()
// Create MetricID -> MetricName index.
- items.B = marshalCommonPrefix(items.B, nsPrefixMetricIDToMetricName)
+ items.B = marshalCommonPrefix(items.B, nsPrefixMetricIDToMetricName, mn.AccountID, mn.ProjectID)
items.B = encoding.MarshalUint64(items.B, tsid.MetricID)
items.B = mn.Marshal(items.B)
items.Next()
// Create MetricID -> TSID index.
- items.B = marshalCommonPrefix(items.B, nsPrefixMetricIDToTSID)
+ items.B = marshalCommonPrefix(items.B, nsPrefixMetricIDToTSID, mn.AccountID, mn.ProjectID)
items.B = encoding.MarshalUint64(items.B, tsid.MetricID)
items.B = tsid.Marshal(items.B)
items.Next()
commonPrefix := kbPool.Get()
- commonPrefix.B = marshalCommonPrefix(commonPrefix.B[:0], nsPrefixTagToMetricID)
+ commonPrefix.B = marshalCommonPrefix(commonPrefix.B[:0], nsPrefixTagToMetricID, mn.AccountID, mn.ProjectID)
// Create MetricGroup -> MetricID index.
items.B = append(items.B, commonPrefix.B...)
@@ -543,14 +588,14 @@ func putIndexItems(ii *indexItems) {
var indexItemsPool sync.Pool
-// SearchTagKeys returns all the tag keys.
-func (db *indexDB) SearchTagKeys(maxTagKeys int) ([]string, error) {
+// SearchTagKeys returns all the tag keys for the given accountID, projectID.
+func (db *indexDB) SearchTagKeys(accountID, projectID uint32, maxTagKeys int) ([]string, error) {
// TODO: cache results?
tks := make(map[string]struct{})
is := db.getIndexSearch()
- err := is.searchTagKeys(tks, maxTagKeys)
+ err := is.searchTagKeys(accountID, projectID, tks, maxTagKeys)
db.putIndexSearch(is)
if err != nil {
return nil, err
@@ -558,7 +603,7 @@ func (db *indexDB) SearchTagKeys(maxTagKeys int) ([]string, error) {
ok := db.doExtDB(func(extDB *indexDB) {
is := extDB.getIndexSearch()
- err = is.searchTagKeys(tks, maxTagKeys)
+ err = is.searchTagKeys(accountID, projectID, tks, maxTagKeys)
extDB.putIndexSearch(is)
})
if ok && err != nil {
@@ -574,11 +619,11 @@ func (db *indexDB) SearchTagKeys(maxTagKeys int) ([]string, error) {
return keys, nil
}
-func (is *indexSearch) searchTagKeys(tks map[string]struct{}, maxTagKeys int) error {
+func (is *indexSearch) searchTagKeys(accountID, projectID uint32, tks map[string]struct{}, maxTagKeys int) error {
ts := &is.ts
kb := &is.kb
dmis := is.db.getDeletedMetricIDs()
- commonPrefix := marshalCommonPrefix(nil, nsPrefixTagToMetricID)
+ commonPrefix := marshalCommonPrefix(nil, nsPrefixTagToMetricID, accountID, projectID)
ts.Seek(commonPrefix)
for len(tks) < maxTagKeys && ts.NextItem() {
item := ts.Item
@@ -626,11 +671,11 @@ func (is *indexSearch) searchTagKeys(tks map[string]struct{}, maxTagKeys int) er
}
// SearchTagValues returns all the tag values for the given tagKey
-func (db *indexDB) SearchTagValues(tagKey []byte, maxTagValues int) ([]string, error) {
+func (db *indexDB) SearchTagValues(accountID, projectID uint32, tagKey []byte, maxTagValues int) ([]string, error) {
// TODO: cache results?
kb := kbPool.Get()
- kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricID)
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricID, accountID, projectID)
kb.B = marshalTagValue(kb.B, tagKey)
tvs := make(map[string]struct{})
@@ -712,13 +757,13 @@ func (is *indexSearch) searchTagValues(tvs map[string]struct{}, prefix []byte, m
return nil
}
-// GetSeriesCount returns the approximate number of unique timeseries in the db.
+// GetSeriesCount returns the approximate number of unique timeseries for the given (accountID, projectID).
//
// It includes the deleted series too and may count the same series
// up to two times - in db and extDB.
-func (db *indexDB) GetSeriesCount() (uint64, error) {
+func (db *indexDB) GetSeriesCount(accountID, projectID uint32) (uint64, error) {
is := db.getIndexSearch()
- n, err := getSeriesCount(&is.ts, &is.kb)
+ n, err := getSeriesCount(accountID, projectID, &is.ts, &is.kb)
db.putIndexSearch(is)
if err != nil {
return 0, err
@@ -727,7 +772,7 @@ func (db *indexDB) GetSeriesCount() (uint64, error) {
var nExt uint64
ok := db.doExtDB(func(extDB *indexDB) {
is := extDB.getIndexSearch()
- nExt, err = getSeriesCount(&is.ts, &is.kb)
+ nExt, err = getSeriesCount(accountID, projectID, &is.ts, &is.kb)
extDB.putIndexSearch(is)
})
if ok && err != nil {
@@ -738,9 +783,9 @@ func (db *indexDB) GetSeriesCount() (uint64, error) {
// searchMetricName appends metric name for the given metricID to dst
// and returns the result.
-func (db *indexDB) searchMetricName(dst []byte, metricID uint64) ([]byte, error) {
+func (db *indexDB) searchMetricName(dst []byte, metricID uint64, accountID, projectID uint32) ([]byte, error) {
is := db.getIndexSearch()
- dst, err := is.searchMetricName(dst, metricID)
+ dst, err := is.searchMetricName(dst, metricID, accountID, projectID)
db.putIndexSearch(is)
if err != io.EOF {
@@ -750,7 +795,7 @@ func (db *indexDB) searchMetricName(dst []byte, metricID uint64) ([]byte, error)
// Try searching in the external indexDB.
if db.doExtDB(func(extDB *indexDB) {
is := extDB.getIndexSearch()
- dst, err = is.searchMetricName(dst, metricID)
+ dst, err = is.searchMetricName(dst, metricID, accountID, projectID)
extDB.putIndexSearch(is)
}) {
return dst, err
@@ -771,6 +816,8 @@ func (db *indexDB) DeleteTSIDs(tfss []*TagFilters) (int, error) {
if len(tfss) == 0 {
return 0, nil
}
+ accountID := tfss[0].accountID
+ projectID := tfss[0].projectID
// Obtain metricIDs to delete.
is := db.getIndexSearch()
@@ -802,7 +849,7 @@ func (db *indexDB) DeleteTSIDs(tfss []*TagFilters) (int, error) {
db.updateDeletedMetricIDs(metricIDs)
// Reset TagFilters -> TSIDS cache, since it may contain deleted TSIDs.
- db.invalidateTagCache()
+ _ = db.invalidateTagCache(accountID, projectID)
// Delete TSIDs in the extDB.
if db.doExtDB(func(extDB *indexDB) {
@@ -872,7 +919,7 @@ func (db *indexDB) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics int)
tfKeyBuf := tagFiltersKeyBufPool.Get()
defer tagFiltersKeyBufPool.Put(tfKeyBuf)
- tfKeyBuf.B = marshalTagFiltersKey(tfKeyBuf.B[:0], tfss)
+ tfKeyBuf.B = db.marshalTagFiltersKey(tfKeyBuf.B[:0], tfss)
tsids, ok := db.getFromTagCache(tfKeyBuf.B)
if ok {
// Fast path - tsids found in the cache.
@@ -959,7 +1006,7 @@ func (is *indexSearch) getTSIDByMetricName(dst *TSID, metricName []byte) error {
return io.EOF
}
-func (is *indexSearch) searchMetricName(dst []byte, metricID uint64) ([]byte, error) {
+func (is *indexSearch) searchMetricName(dst []byte, metricID uint64, accountID, projectID uint32) ([]byte, error) {
metricName := is.db.getMetricNameFromCache(dst, metricID)
if len(metricName) > len(dst) {
return metricName, nil
@@ -967,7 +1014,7 @@ func (is *indexSearch) searchMetricName(dst []byte, metricID uint64) ([]byte, er
ts := &is.ts
kb := &is.kb
- kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToMetricName)
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToMetricName, accountID, projectID)
kb.B = encoding.MarshalUint64(kb.B, metricID)
if err := ts.FirstItemWithPrefix(kb.B); err != nil {
if err == io.EOF {
@@ -1021,6 +1068,8 @@ func (is *indexSearch) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics
// Obtain TSID values for the given metricIDs.
tsids := make([]TSID, len(metricIDs))
i := 0
+ accountID := tfss[0].accountID
+ projectID := tfss[0].projectID
for _, metricID := range metricIDs {
// Try obtaining TSIDs from db.tsidCache. This is much faster
// than scanning the mergeset if it contains a lot of metricIDs.
@@ -1034,7 +1083,7 @@ func (is *indexSearch) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics
if err != io.EOF {
return nil, err
}
- if err := is.getTSIDByMetricID(tsid, metricID); err != nil {
+ if err := is.getTSIDByMetricID(&tsids[i], metricID, accountID, projectID); err != nil {
if err == io.EOF {
// Cannot find TSID for the given metricID.
// This may be the case on incomplete indexDB
@@ -1054,12 +1103,12 @@ func (is *indexSearch) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics
return tsids, nil
}
-func (is *indexSearch) getTSIDByMetricID(dst *TSID, metricID uint64) error {
+func (is *indexSearch) getTSIDByMetricID(dst *TSID, metricID uint64, accountID, projectID uint32) error {
// There is no need in checking for deleted metricIDs here, since they
// must be checked by the caller.
ts := &is.ts
kb := &is.kb
- kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToTSID)
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToTSID, accountID, projectID)
kb.B = encoding.MarshalUint64(kb.B, metricID)
if err := ts.FirstItemWithPrefix(kb.B); err != nil {
if err == io.EOF {
@@ -1078,9 +1127,9 @@ func (is *indexSearch) getTSIDByMetricID(dst *TSID, metricID uint64) error {
return nil
}
-func getSeriesCount(ts *mergeset.TableSearch, kb *bytesutil.ByteBuffer) (uint64, error) {
+func getSeriesCount(accountID, projectID uint32, ts *mergeset.TableSearch, kb *bytesutil.ByteBuffer) (uint64, error) {
var n uint64
- kb.B = append(kb.B[:0], nsPrefixMetricIDToTSID)
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixMetricIDToTSID, accountID, projectID)
ts.Seek(kb.B)
for ts.NextItem() {
if !bytes.HasPrefix(ts.Item, kb.B) {
@@ -1097,7 +1146,7 @@ func getSeriesCount(ts *mergeset.TableSearch, kb *bytesutil.ByteBuffer) (uint64,
// searchMetricIDsMapByMetricNameMatch matches metricName values for the given srcMetricIDs against tfs
// and adds matching metrics to metricIDs.
-func (is *indexSearch) searchMetricIDsMapByMetricNameMatch(metricIDs, srcMetricIDs map[uint64]struct{}, tfs []*tagFilter) error {
+func (is *indexSearch) searchMetricIDsMapByMetricNameMatch(metricIDs, srcMetricIDs map[uint64]struct{}, tfs []*tagFilter, accountID, projectID uint32) error {
// sort srcMetricIDs in order to speed up Seek below.
sortedMetricIDs := make([]uint64, 0, len(srcMetricIDs))
for metricID := range srcMetricIDs {
@@ -1111,7 +1160,7 @@ func (is *indexSearch) searchMetricIDsMapByMetricNameMatch(metricIDs, srcMetricI
defer PutMetricName(mn)
for _, metricID := range sortedMetricIDs {
var err error
- metricName.B, err = is.searchMetricName(metricName.B[:0], metricID)
+ metricName.B, err = is.searchMetricName(metricName.B[:0], metricID, accountID, projectID)
if err != nil {
return fmt.Errorf("cannot find metricName by metricID %d: %s", metricID, err)
}
@@ -1174,8 +1223,7 @@ func (is *indexSearch) getTagFilterWithMinMetricIDsMap(tfs *TagFilters, maxMetri
}
func matchTagFilters(mn *MetricName, tfs []*tagFilter, kb *bytesutil.ByteBuffer) (bool, error) {
- kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricID)
-
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricID, mn.AccountID, mn.ProjectID)
for _, tf := range tfs {
if len(tf.key) == 0 {
// Match against mn.MetricGroup.
@@ -1322,7 +1370,7 @@ func (is *indexSearch) searchMetricIDsMap(metricIDs map[uint64]struct{}, tfs *Ta
// Allow fetching up to 20*maxMetrics metrics for the given time range
// in the hope these metricIDs will be filtered out by other filters below.
maxTimeRangeMetrics := 20 * maxMetrics
- metricIDsForTimeRange, err := is.getMetricIDsForTimeRange(tr, maxTimeRangeMetrics+1)
+ metricIDsForTimeRange, err := is.getMetricIDsForTimeRange(tr, maxTimeRangeMetrics+1, tfs.accountID, tfs.projectID)
if err == errMissingMetricIDsForDate {
// Give up.
for metricID := range minMetricIDs {
@@ -1364,7 +1412,7 @@ func (is *indexSearch) searchMetricIDsMap(metricIDs map[uint64]struct{}, tfs *Ta
for i, tf := range tfsPostponed {
mIDs, err := is.intersectMetricIDsMapForTagFilter(tf, minMetricIDs)
if err == errFallbackToMetricNameMatch {
- return is.searchMetricIDsMapByMetricNameMatch(metricIDs, minMetricIDs, tfsPostponed[i:])
+ return is.searchMetricIDsMapByMetricNameMatch(metricIDs, minMetricIDs, tfsPostponed[i:], tfs.accountID, tfs.projectID)
}
if err != nil {
return err
@@ -1537,7 +1585,7 @@ var errFallbackToMetricNameMatch = errors.New("fall back to searchMetricIDsMapBy
var errMissingMetricIDsForDate = errors.New("missing metricIDs for date")
-func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int) (map[uint64]struct{}, error) {
+func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int, accountID, projectID uint32) (map[uint64]struct{}, error) {
if tr.isZero() {
return nil, errMissingMetricIDsForDate
}
@@ -1549,7 +1597,7 @@ func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int) (m
}
metricIDs := make(map[uint64]struct{}, maxMetrics)
for minDate <= maxDate {
- if err := is.getMetricIDsForDate(uint64(minDate), metricIDs, maxMetrics); err != nil {
+ if err := is.getMetricIDsForDate(uint64(minDate), metricIDs, maxMetrics, accountID, projectID); err != nil {
return nil, err
}
minDate++
@@ -1557,9 +1605,9 @@ func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int) (m
return metricIDs, nil
}
-func (db *indexDB) storeDateMetricID(date, metricID uint64) error {
+func (db *indexDB) storeDateMetricID(date, metricID uint64, accountID, projectID uint32) error {
is := db.getIndexSearch()
- ok, err := is.hasDateMetricID(date, metricID)
+ ok, err := is.hasDateMetricID(date, metricID, accountID, projectID)
db.putIndexSearch(is)
if err != nil {
return err
@@ -1571,7 +1619,7 @@ func (db *indexDB) storeDateMetricID(date, metricID uint64) error {
// Slow path: create (date, metricID) entry.
items := getIndexItems()
- items.B = marshalCommonPrefix(items.B[:0], nsPrefixDateToMetricID)
+ items.B = marshalCommonPrefix(items.B[:0], nsPrefixDateToMetricID, accountID, projectID)
items.B = encoding.MarshalUint64(items.B, date)
items.B = encoding.MarshalUint64(items.B, metricID)
items.Next()
@@ -1580,10 +1628,10 @@ func (db *indexDB) storeDateMetricID(date, metricID uint64) error {
return err
}
-func (is *indexSearch) hasDateMetricID(date, metricID uint64) (bool, error) {
+func (is *indexSearch) hasDateMetricID(date, metricID uint64, accountID, projectID uint32) (bool, error) {
ts := &is.ts
kb := &is.kb
- kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID)
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID, accountID, projectID)
kb.B = encoding.MarshalUint64(kb.B, date)
kb.B = encoding.MarshalUint64(kb.B, metricID)
if err := ts.FirstItemWithPrefix(kb.B); err != nil {
@@ -1598,10 +1646,10 @@ func (is *indexSearch) hasDateMetricID(date, metricID uint64) (bool, error) {
return true, nil
}
-func (is *indexSearch) getMetricIDsForDate(date uint64, metricIDs map[uint64]struct{}, maxMetrics int) error {
+func (is *indexSearch) getMetricIDsForDate(date uint64, metricIDs map[uint64]struct{}, maxMetrics int, accountID, projectID uint32) error {
ts := &is.ts
kb := &is.kb
- kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID)
+ kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID, accountID, projectID)
kb.B = encoding.MarshalUint64(kb.B, date)
ts.Seek(kb.B)
items := 0
@@ -1733,8 +1781,10 @@ func getUniqueUint64() uint64 {
// between VictoriaMetrics restarts.
var uniqueUint64 = uint64(time.Now().UnixNano())
-func marshalCommonPrefix(dst []byte, nsPrefix byte) []byte {
+func marshalCommonPrefix(dst []byte, nsPrefix byte, accountID, projectID uint32) []byte {
dst = append(dst, nsPrefix)
+ dst = encoding.MarshalUint32(dst, accountID)
+ dst = encoding.MarshalUint32(dst, projectID)
return dst
}
diff --git a/lib/storage/index_db_test.go b/lib/storage/index_db_test.go
index 071859119..ba694b717 100644
--- a/lib/storage/index_db_test.go
+++ b/lib/storage/index_db_test.go
@@ -263,6 +263,8 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount,
for i := 0; i < 4e2+1; i++ {
var mn MetricName
+ mn.AccountID = uint32((i + 2) % accountsCount)
+ mn.ProjectID = uint32((i + 1) % projectsCount)
// Init MetricGroup.
mn.MetricGroup = []byte(fmt.Sprintf("metricGroup_%d\x00\x01\x02", i%metricGroups))
@@ -282,6 +284,12 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount,
if err := is.GetOrCreateTSIDByName(&tsid, metricName); err != nil {
return nil, nil, fmt.Errorf("unexpected error when creating tsid for mn:\n%s: %s", &mn, err)
}
+ if tsid.AccountID != mn.AccountID {
+ return nil, nil, fmt.Errorf("unexpected TSID.AccountID; got %d; want %d; mn:\n%s\ntsid:\n%+v", tsid.AccountID, mn.AccountID, &mn, &tsid)
+ }
+ if tsid.ProjectID != mn.ProjectID {
+ return nil, nil, fmt.Errorf("unexpected TSID.ProjectID; got %d; want %d; mn:\n%s\ntsid:\n%+v", tsid.ProjectID, mn.ProjectID, &mn, &tsid)
+ }
mns = append(mns, mn)
tsids = append(tsids, tsid)
@@ -302,15 +310,23 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
return false
}
- timeseriesCounters := make(map[uint64]bool)
+ allKeys := make(map[accountProjectKey]map[string]bool)
+ timeseriesCounters := make(map[accountProjectKey]map[uint64]bool)
var tsidCopy TSID
var metricNameCopy []byte
- allKeys := make(map[string]bool)
for i := range mns {
mn := &mns[i]
tsid := &tsids[i]
- tc := timeseriesCounters
+ apKey := accountProjectKey{
+ AccountID: tsid.AccountID,
+ ProjectID: tsid.ProjectID,
+ }
+ tc := timeseriesCounters[apKey]
+ if tc == nil {
+ tc = make(map[uint64]bool)
+ timeseriesCounters[apKey] = tc
+ }
tc[tsid.MetricID] = true
mn.sortTags()
@@ -330,7 +346,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
// Search for metric name for the given metricID.
var err error
- metricNameCopy, err = db.searchMetricName(metricNameCopy[:0], tsidCopy.MetricID)
+ metricNameCopy, err = db.searchMetricName(metricNameCopy[:0], tsidCopy.MetricID, tsidCopy.AccountID, tsidCopy.ProjectID)
if err != nil {
return fmt.Errorf("error in searchMetricName: %s", err)
}
@@ -339,7 +355,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
}
// Try searching metric name for non-existent MetricID.
- buf, err := db.searchMetricName(nil, 1)
+ buf, err := db.searchMetricName(nil, 1, mn.AccountID, mn.ProjectID)
if err != io.EOF {
return fmt.Errorf("expecting io.EOF error when searching for non-existing metricID; got %v", err)
}
@@ -348,37 +364,44 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
}
// Test SearchTagValues
- tvs, err := db.SearchTagValues(nil, 1e5)
+ tvs, err := db.SearchTagValues(mn.AccountID, mn.ProjectID, nil, 1e5)
if err != nil {
return fmt.Errorf("error in SearchTagValues for __name__: %s", err)
}
if !hasValue(tvs, mn.MetricGroup) {
return fmt.Errorf("SearchTagValues couldn't find %q; found %q", mn.MetricGroup, tvs)
}
+ apKeys := allKeys[apKey]
+ if apKeys == nil {
+ apKeys = make(map[string]bool)
+ allKeys[apKey] = apKeys
+ }
for i := range mn.Tags {
tag := &mn.Tags[i]
- tvs, err := db.SearchTagValues(tag.Key, 1e5)
+ tvs, err := db.SearchTagValues(mn.AccountID, mn.ProjectID, tag.Key, 1e5)
if err != nil {
return fmt.Errorf("error in SearchTagValues for __name__: %s", err)
}
if !hasValue(tvs, tag.Value) {
return fmt.Errorf("SearchTagValues couldn't find %q=%q; found %q", tag.Key, tag.Value, tvs)
}
- allKeys[string(tag.Key)] = true
+ apKeys[string(tag.Key)] = true
}
}
// Test SearchTagKeys
- tks, err := db.SearchTagKeys(1e5)
- if err != nil {
- return fmt.Errorf("error in SearchTagKeys: %s", err)
- }
- if !hasValue(tks, nil) {
- return fmt.Errorf("cannot find __name__ in %q", tks)
- }
- for key := range allKeys {
- if !hasValue(tks, []byte(key)) {
- return fmt.Errorf("cannot find %q in %q", key, tks)
+ for k, apKeys := range allKeys {
+ tks, err := db.SearchTagKeys(k.AccountID, k.ProjectID, 1e5)
+ if err != nil {
+ return fmt.Errorf("error in SearchTagKeys: %s", err)
+ }
+ if !hasValue(tks, nil) {
+ return fmt.Errorf("cannot find __name__ in %q", tks)
+ }
+ for key := range apKeys {
+ if !hasValue(tks, []byte(key)) {
+ return fmt.Errorf("cannot find %q in %q", key, tks)
+ }
}
}
@@ -388,7 +411,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
tsid := &tsids[i]
// Search without regexps.
- tfs := NewTagFilters()
+ tfs := NewTagFilters(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, mn.MetricGroup, false, false); err != nil {
return fmt.Errorf("cannot create tag filter for MetricGroup: %s", err)
}
@@ -434,7 +457,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
}
// Search with regexps.
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, mn.MetricGroup, false, true); err != nil {
return fmt.Errorf("cannot create regexp tag filter for MetricGroup: %s", err)
}
@@ -472,7 +495,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
}
// Search with filter matching zero results.
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("non-existing-key"), []byte("foobar"), false, false); err != nil {
return fmt.Errorf("cannot add non-existing key: %s", err)
}
@@ -493,8 +516,8 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
continue
}
- // Search with empty filter. It should match all the results.
- tfs.Reset()
+ // Search with empty filter. It should match all the results for (accountID, projectID).
+ tfs.Reset(mn.AccountID, mn.ProjectID)
tsidsFound, err = db.searchTSIDs([]*TagFilters{tfs}, TimeRange{}, 1e5)
if err != nil {
return fmt.Errorf("cannot search for common prefix: %s", err)
@@ -504,7 +527,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
}
// Search with empty metricGroup. It should match zero results.
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, nil, false, false); err != nil {
return fmt.Errorf("cannot create tag filter for empty metricGroup: %s", err)
}
@@ -517,11 +540,11 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
}
// Search with multiple tfss
- tfs1 := NewTagFilters()
+ tfs1 := NewTagFilters(mn.AccountID, mn.ProjectID)
if err := tfs1.Add(nil, nil, false, false); err != nil {
return fmt.Errorf("cannot create tag filter for empty metricGroup: %s", err)
}
- tfs2 := NewTagFilters()
+ tfs2 := NewTagFilters(mn.AccountID, mn.ProjectID)
if err := tfs2.Add(nil, mn.MetricGroup, false, false); err != nil {
return fmt.Errorf("cannot create tag filter for MetricGroup: %s", err)
}
@@ -539,7 +562,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
return fmt.Errorf("cannot search for nil tfss: %s", err)
}
if len(tsidsFound) != 0 {
- return fmt.Errorf("unexpected non-empty tsids fround for nil tfss; found %d tsids", len(tsidsFound))
+ return fmt.Errorf("unexpected non-empty tsids fround for nil tfss")
}
}
@@ -557,6 +580,8 @@ func testHasTSID(tsids []TSID, tsid *TSID) bool {
func TestMatchTagFilters(t *testing.T) {
var mn MetricName
+ mn.AccountID = 123
+ mn.ProjectID = 456
mn.MetricGroup = append(mn.MetricGroup, "foobar_metric"...)
for i := 0; i < 5; i++ {
key := fmt.Sprintf("key %d", i)
@@ -565,8 +590,8 @@ func TestMatchTagFilters(t *testing.T) {
}
var bb bytesutil.ByteBuffer
- var tfs TagFilters
- tfs.Reset()
+ // Verify tag filters for different account / project
+ tfs := NewTagFilters(mn.AccountID, mn.ProjectID+1)
if err := tfs.Add(nil, []byte("foobar_metric"), false, false); err != nil {
t.Fatalf("cannot add filter: %s", err)
}
@@ -574,12 +599,36 @@ func TestMatchTagFilters(t *testing.T) {
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
+ if ok {
+ t.Fatalf("Tag filters shouldn't match for invalid projectID")
+ }
+ tfs.Reset(mn.AccountID+1, mn.ProjectID)
+ if err := tfs.Add(nil, []byte("foobar_metric"), false, false); err != nil {
+ t.Fatalf("cannot add filter: %s", err)
+ }
+ ok, err = matchTagFilters(&mn, toTFPointers(tfs.tfs), &bb)
+ if err != nil {
+ t.Fatalf("unexpected error: %s", err)
+ }
+ if ok {
+ t.Fatalf("Tag filters shouldn't match for invalid accountID")
+ }
+
+ // Correct AccountID , ProjectID
+ tfs.Reset(mn.AccountID, mn.ProjectID)
+ if err := tfs.Add(nil, []byte("foobar_metric"), false, false); err != nil {
+ t.Fatalf("cannot add filter: %s", err)
+ }
+ ok, err = matchTagFilters(&mn, toTFPointers(tfs.tfs), &bb)
+ if err != nil {
+ t.Fatalf("unexpected error: %s", err)
+ }
if !ok {
t.Fatalf("should match")
}
// Empty tag filters should match.
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
ok, err = matchTagFilters(&mn, toTFPointers(tfs.tfs), &bb)
if err != nil {
t.Fatalf("unexpected error: %s", err)
@@ -589,7 +638,7 @@ func TestMatchTagFilters(t *testing.T) {
}
// Negative match by MetricGroup
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, []byte("foobar"), false, false); err != nil {
t.Fatalf("cannot add no regexp, no negative filter: %s", err)
}
@@ -600,7 +649,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, []byte("obar.+"), false, true); err != nil {
t.Fatalf("cannot add regexp, no negative filter: %s", err)
}
@@ -611,7 +660,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, []byte("foobar_metric"), true, false); err != nil {
t.Fatalf("cannot add no regexp, negative filter: %s", err)
}
@@ -622,7 +671,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, []byte("foob.+metric"), true, true); err != nil {
t.Fatalf("cannot add regexp, negative filter: %s", err)
}
@@ -635,7 +684,7 @@ func TestMatchTagFilters(t *testing.T) {
}
// Positive match by MetricGroup
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, []byte("foobar_metric"), false, false); err != nil {
t.Fatalf("cannot add no regexp, no negative filter: %s", err)
}
@@ -646,7 +695,7 @@ func TestMatchTagFilters(t *testing.T) {
if !ok {
t.Fatalf("Should match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, []byte("foobar.+etric"), false, true); err != nil {
t.Fatalf("cannot add regexp, no negative filter: %s", err)
}
@@ -657,7 +706,7 @@ func TestMatchTagFilters(t *testing.T) {
if !ok {
t.Fatalf("Should match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, []byte("obar_metric"), true, false); err != nil {
t.Fatalf("cannot add no regexp, negative filter: %s", err)
}
@@ -668,7 +717,7 @@ func TestMatchTagFilters(t *testing.T) {
if !ok {
t.Fatalf("Should match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add(nil, []byte("ob.+metric"), true, true); err != nil {
t.Fatalf("cannot add regexp, negative filter: %s", err)
}
@@ -681,7 +730,7 @@ func TestMatchTagFilters(t *testing.T) {
}
// Negative match by non-existing tag
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("non-existing-tag"), []byte("foobar"), false, false); err != nil {
t.Fatalf("cannot add no regexp, no negative filter: %s", err)
}
@@ -692,7 +741,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("non-existing-tag"), []byte("obar.+"), false, true); err != nil {
t.Fatalf("cannot add regexp, no negative filter: %s", err)
}
@@ -703,7 +752,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("non-existing-tag"), []byte("foobar_metric"), true, false); err != nil {
t.Fatalf("cannot add no regexp, negative filter: %s", err)
}
@@ -714,7 +763,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("non-existing-tag"), []byte("foob.+metric"), true, true); err != nil {
t.Fatalf("cannot add regexp, negative filter: %s", err)
}
@@ -727,7 +776,7 @@ func TestMatchTagFilters(t *testing.T) {
}
// Negative match by existing tag
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 0"), []byte("foobar"), false, false); err != nil {
t.Fatalf("cannot add no regexp, no negative filter: %s", err)
}
@@ -738,7 +787,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 1"), []byte("obar.+"), false, true); err != nil {
t.Fatalf("cannot add regexp, no negative filter: %s", err)
}
@@ -749,7 +798,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 2"), []byte("value 2"), true, false); err != nil {
t.Fatalf("cannot add no regexp, negative filter: %s", err)
}
@@ -760,7 +809,7 @@ func TestMatchTagFilters(t *testing.T) {
if ok {
t.Fatalf("Shouldn't match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 3"), []byte("v.+lue 3"), true, true); err != nil {
t.Fatalf("cannot add regexp, negative filter: %s", err)
}
@@ -773,7 +822,7 @@ func TestMatchTagFilters(t *testing.T) {
}
// Positive match by existing tag
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 0"), []byte("value 0"), false, false); err != nil {
t.Fatalf("cannot add no regexp, no negative filter: %s", err)
}
@@ -784,7 +833,7 @@ func TestMatchTagFilters(t *testing.T) {
if !ok {
t.Fatalf("Should match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 1"), []byte(".+lue 1"), false, true); err != nil {
t.Fatalf("cannot add regexp, no negative filter: %s", err)
}
@@ -795,7 +844,7 @@ func TestMatchTagFilters(t *testing.T) {
if !ok {
t.Fatalf("Should match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 2"), []byte("value 3"), true, false); err != nil {
t.Fatalf("cannot add no regexp, negative filter: %s", err)
}
@@ -806,7 +855,7 @@ func TestMatchTagFilters(t *testing.T) {
if !ok {
t.Fatalf("Should match")
}
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 3"), []byte("v.+lue 2"), true, true); err != nil {
t.Fatalf("cannot add regexp, negative filter: %s", err)
}
@@ -819,7 +868,7 @@ func TestMatchTagFilters(t *testing.T) {
}
// Positive match by multiple tags and MetricGroup
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
if err := tfs.Add([]byte("key 0"), []byte("value 0"), false, false); err != nil {
t.Fatalf("cannot add no regexp, no negative filter: %s", err)
}
@@ -853,7 +902,7 @@ func TestMatchTagFilters(t *testing.T) {
}
// Negative match by multiple tags and MetricGroup
- tfs.Reset()
+ tfs.Reset(mn.AccountID, mn.ProjectID)
// Positive matches
if err := tfs.Add([]byte("key 0"), []byte("value 0"), false, false); err != nil {
t.Fatalf("cannot add no regexp, no negative filter: %s", err)
diff --git a/lib/storage/index_db_timing_test.go b/lib/storage/index_db_timing_test.go
index e830a3777..dbf312e1a 100644
--- a/lib/storage/index_db_timing_test.go
+++ b/lib/storage/index_db_timing_test.go
@@ -4,6 +4,7 @@ import (
"fmt"
"os"
"strconv"
+ "sync/atomic"
"testing"
"github.com/VictoriaMetrics/fastcache"
@@ -28,12 +29,15 @@ func BenchmarkIndexDBAddTSIDs(b *testing.B) {
}
}()
+ var goroutineID uint32
+
b.ReportAllocs()
b.SetBytes(recordsPerLoop)
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
var mn MetricName
var tsid TSID
+ mn.AccountID = atomic.AddUint32(&goroutineID, 1)
// The most common tags.
mn.Tags = []Tag{
@@ -105,6 +109,8 @@ func BenchmarkIndexDBSearchTSIDs(b *testing.B) {
is := db.getIndexSearch()
defer db.putIndexSearch(is)
for i := 0; i < recordsCount; i++ {
+ mn.AccountID = uint32(i % accountsCount)
+ mn.ProjectID = uint32(i % projectsCount)
mn.sortTags()
metricName = mn.Marshal(metricName[:0])
if err := is.GetOrCreateTSIDByName(&tsid, metricName); err != nil {
@@ -124,7 +130,9 @@ func BenchmarkIndexDBSearchTSIDs(b *testing.B) {
tfss := []*TagFilters{&tfs}
i := 0
for pb.Next() {
- tfs.Reset()
+ accountID := uint32(i % accountsCount)
+ projectID := uint32(i % projectsCount)
+ tfs.Reset(accountID, projectID)
for j := range tags {
if err := tfs.Add(tags[j].Key, tags[j].Value, false, false); err != nil {
panic(fmt.Errorf("BUG: unexpected error: %s", err))
@@ -178,6 +186,8 @@ func BenchmarkIndexDBGetTSIDs(b *testing.B) {
is := db.getIndexSearch()
defer db.putIndexSearch(is)
for i := 0; i < recordsCount; i++ {
+ mn.AccountID = uint32(i % accountsCount)
+ mn.ProjectID = uint32(i % projectsCount)
mn.sortTags()
metricName = mn.Marshal(metricName[:0])
if err := is.GetOrCreateTSIDByName(&tsid, metricName); err != nil {
@@ -196,6 +206,8 @@ func BenchmarkIndexDBGetTSIDs(b *testing.B) {
defer db.putIndexSearch(is)
for pb.Next() {
for i := 0; i < recordsPerLoop; i++ {
+ mnLocal.AccountID = uint32(i % accountsCount)
+ mnLocal.ProjectID = uint32(i % projectsCount)
mnLocal.sortTags()
metricNameLocal = mnLocal.Marshal(metricNameLocal[:0])
if err := is.GetOrCreateTSIDByName(&tsidLocal, metricNameLocal); err != nil {
diff --git a/lib/storage/metaindex_row_test.go b/lib/storage/metaindex_row_test.go
index f55e755de..2f62f513d 100644
--- a/lib/storage/metaindex_row_test.go
+++ b/lib/storage/metaindex_row_test.go
@@ -11,6 +11,7 @@ func TestMetaindexRowReset(t *testing.T) {
var mr metaindexRow
mr.TSID.MetricID = 234
+ mr.TSID.AccountID = 342
mr.BlockHeadersCount = 1323
mr.MinTimestamp = -234
mr.MaxTimestamp = 8989
diff --git a/lib/storage/metric_name.go b/lib/storage/metric_name.go
index 15dcaf265..2d727f05b 100644
--- a/lib/storage/metric_name.go
+++ b/lib/storage/metric_name.go
@@ -113,6 +113,9 @@ func unmarshalTagValue(dst, src []byte) ([]byte, []byte, error) {
// MetricName reperesents a metric name.
type MetricName struct {
+ AccountID uint32
+ ProjectID uint32
+
MetricGroup []byte
// Tags are optional. They must be sorted by tag Key for canonical view.
@@ -139,12 +142,16 @@ var mnPool sync.Pool
// Reset resets the mn.
func (mn *MetricName) Reset() {
+ mn.AccountID = 0
+ mn.ProjectID = 0
mn.MetricGroup = mn.MetricGroup[:0]
mn.Tags = mn.Tags[:0]
}
// CopyFrom copies src to mn.
func (mn *MetricName) CopyFrom(src *MetricName) {
+ mn.AccountID = src.AccountID
+ mn.ProjectID = src.ProjectID
if cap(mn.MetricGroup) > 0 {
mn.MetricGroup = append(mn.MetricGroup[:0], src.MetricGroup...)
mn.Tags = copyTags(mn.Tags[:0], src.Tags)
@@ -316,7 +323,7 @@ func (mn *MetricName) String() string {
tags = append(tags, fmt.Sprintf("%q=%q", t.Key, t.Value))
}
tagsStr := strings.Join(tags, ", ")
- return fmt.Sprintf("MetricGroup=%q, tags=[%s]", mn.MetricGroup, tagsStr)
+ return fmt.Sprintf("AccountID=%d, ProjectID=%d, MetricGroup=%q, tags=[%s]", mn.AccountID, mn.ProjectID, mn.MetricGroup, tagsStr)
}
// Marshal appends marshaled mn to dst and returns the result.
@@ -325,7 +332,7 @@ func (mn *MetricName) String() string {
func (mn *MetricName) Marshal(dst []byte) []byte {
// Calculate the required size and pre-allocate space in dst
dstLen := len(dst)
- requiredSize := len(mn.MetricGroup) + 1
+ requiredSize := 8 + len(mn.MetricGroup) + 1
for i := range mn.Tags {
tag := &mn.Tags[i]
requiredSize += len(tag.Key) + len(tag.Value) + 2
@@ -333,16 +340,22 @@ func (mn *MetricName) Marshal(dst []byte) []byte {
dst = bytesutil.Resize(dst, requiredSize)
dst = dst[:dstLen]
- // Marshal MetricGroup
+ dst = encoding.MarshalUint32(dst, mn.AccountID)
+ dst = encoding.MarshalUint32(dst, mn.ProjectID)
dst = marshalTagValue(dst, mn.MetricGroup)
-
- // Marshal tags.
dst = marshalTags(dst, mn.Tags)
return dst
}
// Unmarshal unmarshals mn from src.
func (mn *MetricName) Unmarshal(src []byte) error {
+ if len(src) < 8 {
+ return fmt.Errorf("too short src: %d bytes; must be at least % bytes", len(src), 8)
+ }
+ mn.AccountID = encoding.UnmarshalUint32(src)
+ mn.ProjectID = encoding.UnmarshalUint32(src[4:])
+ src = src[8:]
+
// Unmarshal MetricGroup.
var err error
src, mn.MetricGroup, err = unmarshalTagValue(mn.MetricGroup[:0], src)
@@ -393,10 +406,10 @@ const maxLabelsPerTimeseries = 30
// MarshalMetricNameRaw marshals labels to dst and returns the result.
//
// The result must be unmarshaled with MetricName.unmarshalRaw
-func MarshalMetricNameRaw(dst []byte, labels []prompb.Label) []byte {
+func MarshalMetricNameRaw(dst []byte, accountID, projectID uint32, labels []prompb.Label) []byte {
// Calculate the required space for dst.
dstLen := len(dst)
- dstSize := dstLen
+ dstSize := dstLen + 8
for i := range labels {
if i >= maxLabelsPerTimeseries {
break
@@ -422,6 +435,8 @@ func MarshalMetricNameRaw(dst []byte, labels []prompb.Label) []byte {
dst = bytesutil.Resize(dst, dstSize)[:dstLen]
// Marshal labels to dst.
+ dst = encoding.MarshalUint32(dst, accountID)
+ dst = encoding.MarshalUint32(dst, projectID)
for i := range labels {
if i >= maxLabelsPerTimeseries {
break
@@ -437,6 +452,13 @@ func MarshalMetricNameRaw(dst []byte, labels []prompb.Label) []byte {
return dst
}
+// MarshalMetricLabelRaw marshals label to dst.
+func MarshalMetricLabelRaw(dst []byte, label *prompb.Label) []byte {
+ dst = marshalBytesFast(dst, label.Name)
+ dst = marshalBytesFast(dst, label.Value)
+ return dst
+}
+
// marshalRaw marshals mn to dst and returns the result.
//
// The results may be unmarshaled with MetricName.unmarshalRaw.
@@ -444,6 +466,8 @@ func MarshalMetricNameRaw(dst []byte, labels []prompb.Label) []byte {
// This function is for testing purposes. MarshalMetricNameRaw must be used
// in prod instead.
func (mn *MetricName) marshalRaw(dst []byte) []byte {
+ dst = encoding.MarshalUint32(dst, mn.AccountID)
+ dst = encoding.MarshalUint32(dst, mn.ProjectID)
dst = marshalBytesFast(dst, nil)
dst = marshalBytesFast(dst, mn.MetricGroup)
@@ -459,6 +483,16 @@ func (mn *MetricName) marshalRaw(dst []byte) []byte {
// unmarshalRaw unmarshals mn encoded with MarshalMetricNameRaw.
func (mn *MetricName) unmarshalRaw(src []byte) error {
mn.Reset()
+ if len(src) < 4 {
+ return fmt.Errorf("not enough data for decoding accountID; got %d bytes; %X; want at least 4 bytes", len(src), src)
+ }
+ mn.AccountID = encoding.UnmarshalUint32(src)
+ src = src[4:]
+ if len(src) < 4 {
+ return fmt.Errorf("not enough data for decoding projectID; got %d bytes; %X; want at least 4 bytes", len(src), src)
+ }
+ mn.ProjectID = encoding.UnmarshalUint32(src)
+ src = src[4:]
for len(src) > 0 {
tail, key, err := unmarshalBytesFast(src)
if err != nil {
diff --git a/lib/storage/metric_name_test.go b/lib/storage/metric_name_test.go
index 65f2100f0..c518ee984 100644
--- a/lib/storage/metric_name_test.go
+++ b/lib/storage/metric_name_test.go
@@ -38,6 +38,8 @@ func TestMetricNameMarshalUnmarshal(t *testing.T) {
for i := 0; i < 10; i++ {
for tagsCount := 0; tagsCount < 10; tagsCount++ {
var mn MetricName
+ mn.AccountID = uint32(i)
+ mn.ProjectID = uint32(i + 1)
for j := 0; j < tagsCount; j++ {
key := fmt.Sprintf("key_%d_%d_\x00\x01\x02", i, j)
value := fmt.Sprintf("\x02\x00\x01value_%d_%d", i, j)
@@ -80,6 +82,8 @@ func TestMetricNameMarshalUnmarshalRaw(t *testing.T) {
for i := 0; i < 10; i++ {
for tagsCount := 0; tagsCount < 10; tagsCount++ {
var mn MetricName
+ mn.AccountID = uint32(i)
+ mn.ProjectID = uint32(tagsCount)
for j := 0; j < tagsCount; j++ {
key := fmt.Sprintf("key_%d_%d_\x00\x01\x02", i, j)
value := fmt.Sprintf("\x02\x00\x01value_%d_%d", i, j)
diff --git a/lib/storage/raw_row.go b/lib/storage/raw_row.go
index 3d58c8e5e..7b506af08 100644
--- a/lib/storage/raw_row.go
+++ b/lib/storage/raw_row.go
@@ -59,6 +59,18 @@ func (rrs *rawRowsSort) Less(i, j int) bool {
// Slow path - compare TSIDs.
// Manually inline TSID.Less here, since the compiler doesn't inline it yet :(
+ if ta.AccountID < tb.AccountID {
+ return true
+ }
+ if ta.AccountID > tb.AccountID {
+ return false
+ }
+ if ta.ProjectID < tb.ProjectID {
+ return true
+ }
+ if ta.ProjectID > tb.ProjectID {
+ return false
+ }
if ta.MetricGroupID < tb.MetricGroupID {
return true
}
diff --git a/lib/storage/search.go b/lib/storage/search.go
index f460f6426..468824f28 100644
--- a/lib/storage/search.go
+++ b/lib/storage/search.go
@@ -158,7 +158,7 @@ func (s *Search) NextMetricBlock() bool {
for s.ts.NextBlock() {
tsid := &s.ts.Block.bh.TSID
var err error
- s.MetricBlock.MetricName, err = s.storage.searchMetricName(s.MetricBlock.MetricName[:0], tsid.MetricID)
+ s.MetricBlock.MetricName, err = s.storage.searchMetricName(s.MetricBlock.MetricName[:0], tsid.MetricID, tsid.AccountID, tsid.ProjectID)
if err != nil {
if err == io.EOF {
// Missing metricName for tsid.MetricID. Increment error counter and skip it.
@@ -182,6 +182,8 @@ func (s *Search) NextMetricBlock() bool {
// SearchQuery is used for sending search queries from vmselect to vmstorage.
type SearchQuery struct {
+ AccountID uint32
+ ProjectID uint32
MinTimestamp int64
MaxTimestamp int64
TagFilterss [][]TagFilter
@@ -263,8 +265,8 @@ func (tf *TagFilter) Unmarshal(src []byte) ([]byte, error) {
// String returns string representation of the search query.
func (sq *SearchQuery) String() string {
var bb bytesutil.ByteBuffer
- fmt.Fprintf(&bb, "MinTimestamp=%s, MaxTimestamp=%s, TagFilters=[\n",
- timestampToTime(sq.MinTimestamp), timestampToTime(sq.MaxTimestamp))
+ fmt.Fprintf(&bb, "AccountID=%d, ProjectID=%d, MinTimestamp=%s, MaxTimestamp=%s, TagFilters=[\n",
+ sq.AccountID, sq.ProjectID, timestampToTime(sq.MinTimestamp), timestampToTime(sq.MaxTimestamp))
for _, tagFilters := range sq.TagFilterss {
for _, tf := range tagFilters {
fmt.Fprintf(&bb, "%s", tf.String())
@@ -277,6 +279,8 @@ func (sq *SearchQuery) String() string {
// Marshal appends marshaled sq to dst and returns the result.
func (sq *SearchQuery) Marshal(dst []byte) []byte {
+ dst = encoding.MarshalUint32(dst, sq.AccountID)
+ dst = encoding.MarshalUint32(dst, sq.ProjectID)
dst = encoding.MarshalVarInt64(dst, sq.MinTimestamp)
dst = encoding.MarshalVarInt64(dst, sq.MaxTimestamp)
dst = encoding.MarshalVarUint64(dst, uint64(len(sq.TagFilterss)))
@@ -291,6 +295,18 @@ func (sq *SearchQuery) Marshal(dst []byte) []byte {
// Unmarshal unmarshals sq from src and returns the tail.
func (sq *SearchQuery) Unmarshal(src []byte) ([]byte, error) {
+ if len(src) < 4 {
+ return src, fmt.Errorf("cannot unmarshal AccountID: too short src len: %d; must be at least %d bytes", len(src), 4)
+ }
+ sq.AccountID = encoding.UnmarshalUint32(src)
+ src = src[4:]
+
+ if len(src) < 4 {
+ return src, fmt.Errorf("cannot unmarshal ProjectID: too short src len: %d; must be at least %d bytes", len(src), 4)
+ }
+ sq.ProjectID = encoding.UnmarshalUint32(src)
+ src = src[4:]
+
tail, minTs, err := encoding.UnmarshalVarInt64(src)
if err != nil {
return src, fmt.Errorf("cannot unmarshal MinTimestamp: %s", err)
diff --git a/lib/storage/search_test.go b/lib/storage/search_test.go
index f0bdfd4bd..4a0f1f432 100644
--- a/lib/storage/search_test.go
+++ b/lib/storage/search_test.go
@@ -38,6 +38,12 @@ func TestSearchQueryMarshalUnmarshal(t *testing.T) {
if len(tail) > 0 {
t.Fatalf("unexpected tail left after SearchQuery unmarshaling; tail (len=%d): %q", len(tail), tail)
}
+ if sq1.AccountID != sq1.AccountID {
+ t.Fatalf("unexpected AccountID; got %d; want %d", sq2.AccountID, sq1.AccountID)
+ }
+ if sq2.ProjectID != sq1.ProjectID {
+ t.Fatalf("unexpected ProjectID; got %d; want %d", sq2.ProjectID, sq1.ProjectID)
+ }
if sq1.MinTimestamp != sq2.MinTimestamp {
t.Fatalf("unexpected MinTimestamp; got %d; want %d", sq2.MinTimestamp, sq1.MinTimestamp)
}
@@ -99,6 +105,7 @@ func TestSearch(t *testing.T) {
startTimestamp -= startTimestamp % (1e3 * 3600 * 24)
blockRowsCount := 0
for i := 0; i < rowsCount; i++ {
+ mn.AccountID = uint32(i % accountsCount)
mn.MetricGroup = []byte(fmt.Sprintf("metric_%d", i%metricGroupsCount))
mr := &mrs[i]
@@ -162,7 +169,7 @@ func testSearch(st *Storage, tr TimeRange, mrs []MetricRow, accountsCount int) e
var s Search
for i := 0; i < 10; i++ {
// Prepare TagFilters for search.
- tfs := NewTagFilters()
+ tfs := NewTagFilters(uint32(i%accountsCount), 0)
metricGroupRe := fmt.Sprintf(`metric_\d*%d%d`, i, i)
if err := tfs.Add(nil, []byte(metricGroupRe), false, true); err != nil {
return fmt.Errorf("cannot add metricGroupRe=%q: %s", metricGroupRe, err)
diff --git a/lib/storage/storage.go b/lib/storage/storage.go
index b68d5db56..f809996d1 100644
--- a/lib/storage/storage.go
+++ b/lib/storage/storage.go
@@ -461,26 +461,26 @@ func (s *Storage) DeleteMetrics(tfss []*TagFilters) (int, error) {
// searchMetricName appends metric name for the given metricID to dst
// and returns the result.
-func (s *Storage) searchMetricName(dst []byte, metricID uint64) ([]byte, error) {
- return s.idb().searchMetricName(dst, metricID)
+func (s *Storage) searchMetricName(dst []byte, metricID uint64, accountID, projectID uint32) ([]byte, error) {
+ return s.idb().searchMetricName(dst, metricID, accountID, projectID)
}
-// SearchTagKeys searches for tag keys
-func (s *Storage) SearchTagKeys(maxTagKeys int) ([]string, error) {
- return s.idb().SearchTagKeys(maxTagKeys)
+// SearchTagKeys searches for tag keys for the given (accountID, projectID).
+func (s *Storage) SearchTagKeys(accountID, projectID uint32, maxTagKeys int) ([]string, error) {
+ return s.idb().SearchTagKeys(accountID, projectID, maxTagKeys)
}
-// SearchTagValues searches for tag values for the given tagKey
-func (s *Storage) SearchTagValues(tagKey []byte, maxTagValues int) ([]string, error) {
- return s.idb().SearchTagValues(tagKey, maxTagValues)
+// SearchTagValues searches for tag values for the given tagKey in (accountID, projectID).
+func (s *Storage) SearchTagValues(accountID, projectID uint32, tagKey []byte, maxTagValues int) ([]string, error) {
+ return s.idb().SearchTagValues(accountID, projectID, tagKey, maxTagValues)
}
-// GetSeriesCount returns the approximate number of unique time series.
+// GetSeriesCount returns the approximate number of unique time series for the given (accountID, projectID).
//
// It includes the deleted series too and may count the same series
// up to two times - in db and extDB.
-func (s *Storage) GetSeriesCount() (uint64, error) {
- return s.idb().GetSeriesCount()
+func (s *Storage) GetSeriesCount(accountID, projectID uint32) (uint64, error) {
+ return s.idb().GetSeriesCount(accountID, projectID)
}
// MetricRow is a metric to insert into storage.
@@ -507,15 +507,19 @@ func (mr *MetricRow) String() string {
if err := mn.unmarshalRaw(mr.MetricNameRaw); err == nil {
metricName = mn.String()
}
- return fmt.Sprintf("MetricName=%s, Timestamp=%d, Value=%f\n",
- metricName, mr.Timestamp, mr.Value)
+ return fmt.Sprintf("MetricName=%s, Timestamp=%d, Value=%f\n", metricName, mr.Timestamp, mr.Value)
}
// Marshal appends marshaled mr to dst and returns the result.
func (mr *MetricRow) Marshal(dst []byte) []byte {
- dst = encoding.MarshalBytes(dst, mr.MetricNameRaw)
- dst = encoding.MarshalUint64(dst, uint64(mr.Timestamp))
- dst = encoding.MarshalUint64(dst, math.Float64bits(mr.Value))
+ return MarshalMetricRow(dst, mr.MetricNameRaw, mr.Timestamp, mr.Value)
+}
+
+// MarshalMetricRow marshals MetricRow data to dst and returns the result.
+func MarshalMetricRow(dst []byte, metricNameRaw []byte, timestamp int64, value float64) []byte {
+ dst = encoding.MarshalBytes(dst, metricNameRaw)
+ dst = encoding.MarshalUint64(dst, uint64(timestamp))
+ dst = encoding.MarshalUint64(dst, math.Float64bits(value))
return dst
}
@@ -688,7 +692,7 @@ func (s *Storage) updateDateMetricIDCache(rows []rawRow, errors []error) []error
// It is OK if the (date, metricID) entry is added multiple times to db
// by concurrent goroutines.
s.dateMetricIDCache.Set(keyBuf, nil)
- if err := idb.storeDateMetricID(date, metricID); err != nil {
+ if err := idb.storeDateMetricID(date, metricID, r.TSID.AccountID, r.TSID.ProjectID); err != nil {
errors = append(errors, err)
continue
}
diff --git a/lib/storage/storage_test.go b/lib/storage/storage_test.go
index 5dc2b1dd6..989e47864 100644
--- a/lib/storage/storage_test.go
+++ b/lib/storage/storage_test.go
@@ -194,7 +194,7 @@ func TestStorageDeleteMetrics(t *testing.T) {
}
// Verify no tag keys exist
- tks, err := s.SearchTagKeys(1e5)
+ tks, err := s.SearchTagKeys(0, 0, 1e5)
if err != nil {
t.Fatalf("error in SearchTagKeys at the start: %s", err)
}
@@ -245,7 +245,7 @@ func TestStorageDeleteMetrics(t *testing.T) {
})
// Verify no more tag keys exist
- tks, err = s.SearchTagKeys(1e5)
+ tks, err = s.SearchTagKeys(0, 0, 1e5)
if err != nil {
t.Fatalf("error in SearchTagKeys after the test: %s", err)
}
@@ -264,12 +264,16 @@ func testStorageDeleteMetrics(s *Storage, workerNum int) error {
const metricsCount = 30
workerTag := []byte(fmt.Sprintf("workerTag_%d", workerNum))
+ accountID := uint32(workerNum)
+ projectID := uint32(123)
tksAll := make(map[string]bool)
tksAll[""] = true // __name__
for i := 0; i < metricsCount; i++ {
var mrs []MetricRow
var mn MetricName
+ mn.AccountID = accountID
+ mn.ProjectID = projectID
job := fmt.Sprintf("job_%d_%d", i, workerNum)
instance := fmt.Sprintf("instance_%d_%d", i, workerNum)
mn.Tags = []Tag{
@@ -301,7 +305,7 @@ func testStorageDeleteMetrics(s *Storage, workerNum int) error {
s.debugFlush()
// Verify tag values exist
- tvs, err := s.SearchTagValues(workerTag, 1e5)
+ tvs, err := s.SearchTagValues(accountID, projectID, workerTag, 1e5)
if err != nil {
return fmt.Errorf("error in SearchTagValues before metrics removal: %s", err)
}
@@ -310,7 +314,7 @@ func testStorageDeleteMetrics(s *Storage, workerNum int) error {
}
// Verify tag keys exist
- tks, err := s.SearchTagKeys(1e5)
+ tks, err := s.SearchTagKeys(accountID, projectID, 1e5)
if err != nil {
return fmt.Errorf("error in SearchTagKeys before metrics removal: %s", err)
}
@@ -333,7 +337,7 @@ func testStorageDeleteMetrics(s *Storage, workerNum int) error {
return n
}
for i := 0; i < metricsCount; i++ {
- tfs := NewTagFilters()
+ tfs := NewTagFilters(accountID, projectID)
if err := tfs.Add(nil, []byte("metric_.+"), false, true); err != nil {
return fmt.Errorf("cannot add regexp tag filter: %s", err)
}
@@ -366,14 +370,14 @@ func testStorageDeleteMetrics(s *Storage, workerNum int) error {
}
// Make sure no more metrics left for the given workerNum
- tfs := NewTagFilters()
+ tfs := NewTagFilters(accountID, projectID)
if err := tfs.Add(nil, []byte(fmt.Sprintf("metric_.+_%d", workerNum)), false, true); err != nil {
return fmt.Errorf("cannot add regexp tag filter for worker metrics: %s", err)
}
if n := metricBlocksCount(tfs); n != 0 {
return fmt.Errorf("expecting zero metric blocks after deleting all the metrics; got %d blocks", n)
}
- tvs, err = s.SearchTagValues(workerTag, 1e5)
+ tvs, err = s.SearchTagValues(accountID, projectID, workerTag, 1e5)
if err != nil {
return fmt.Errorf("error in SearchTagValues after all the metrics are removed: %s", err)
}
@@ -451,6 +455,8 @@ func testStorageAddRows(s *Storage) error {
{[]byte("instance"), []byte("1.2.3.4")},
}
for j := 0; j < rowsPerAdd; j++ {
+ mn.AccountID = uint32(rand.Intn(2))
+ mn.ProjectID = uint32(rand.Intn(3))
mn.MetricGroup = []byte(fmt.Sprintf("metric_%d", rand.Intn(100)))
metricNameRaw := mn.marshalRaw(nil)
timestamp := rand.Int63n(1e10)
@@ -581,6 +587,8 @@ func testStorageAddMetrics(s *Storage, workerNum int) error {
{[]byte("instance"), []byte("1.2.3.4")},
}
for i := 0; i < rowsCount; i++ {
+ mn.AccountID = 123
+ mn.ProjectID = uint32(i % 3)
mn.MetricGroup = []byte(fmt.Sprintf("metric_%d_%d", workerNum, rand.Intn(10)))
metricNameRaw := mn.marshalRaw(nil)
timestamp := rand.Int63n(1e10)
diff --git a/lib/storage/storage_timing_test.go b/lib/storage/storage_timing_test.go
index abdd5f46b..3cc195cc0 100644
--- a/lib/storage/storage_timing_test.go
+++ b/lib/storage/storage_timing_test.go
@@ -44,6 +44,8 @@ func benchmarkStorageAddRows(b *testing.B, rowsPerBatch int) {
for pb.Next() {
offset := int(atomic.AddUint64(&globalOffset, uint64(rowsPerBatch)))
for i := 0; i < rowsPerBatch; i++ {
+ mn.AccountID = uint32(i)
+ mn.ProjectID = uint32(i % 3)
mr := &mrs[i]
mr.MetricNameRaw = mn.marshalRaw(mr.MetricNameRaw[:0])
mr.Timestamp = int64(offset + i)
diff --git a/lib/storage/tag_filters.go b/lib/storage/tag_filters.go
index 66c1c5bd5..bccb966d0 100644
--- a/lib/storage/tag_filters.go
+++ b/lib/storage/tag_filters.go
@@ -16,17 +16,22 @@ import (
// TagFilters represents filters used for filtering tags.
type TagFilters struct {
+ accountID uint32
+ projectID uint32
+
tfs []tagFilter
// Common prefix for all the tag filters.
- // Contains encoded nsPrefixTagToMetricID.
+ // Contains encoded nsPrefixTagToMetricID + accountID + projectID
commonPrefix []byte
}
-// NewTagFilters returns new TagFilters.
-func NewTagFilters() *TagFilters {
+// NewTagFilters returns new TagFilters for the given accountID and projectID.
+func NewTagFilters(accountID, projectID uint32) *TagFilters {
return &TagFilters{
- commonPrefix: marshalCommonPrefix(nil, nsPrefixTagToMetricID),
+ accountID: accountID,
+ projectID: projectID,
+ commonPrefix: marshalCommonPrefix(nil, nsPrefixTagToMetricID, accountID, projectID),
}
}
@@ -69,16 +74,19 @@ func (tfs *TagFilters) Add(key, value []byte, isNegative, isRegexp bool) error {
// String returns human-readable value for tfs.
func (tfs *TagFilters) String() string {
var bb bytes.Buffer
+ fmt.Fprintf(&bb, "AccountID=%d, ProjectID=%d", tfs.accountID, tfs.projectID)
for i := range tfs.tfs {
fmt.Fprintf(&bb, ", %s", tfs.tfs[i].String())
}
return bb.String()
}
-// Reset resets the tf
-func (tfs *TagFilters) Reset() {
+// Reset resets the tf for the given accountID and projectID
+func (tfs *TagFilters) Reset(accountID, projectID uint32) {
+ tfs.accountID = accountID
+ tfs.projectID = projectID
tfs.tfs = tfs.tfs[:0]
- tfs.commonPrefix = marshalCommonPrefix(tfs.commonPrefix[:0], nsPrefixTagToMetricID)
+ tfs.commonPrefix = marshalCommonPrefix(tfs.commonPrefix[:0], nsPrefixTagToMetricID, accountID, projectID)
}
// tagFilter represents a filter used for filtering tags.
@@ -88,7 +96,7 @@ type tagFilter struct {
isNegative bool
isRegexp bool
- // Prefix always contains {nsPrefixTagToMetricID, key}.
+ // Prefix always contains {nsPrefixTagToMetricID, AccountID, ProjectID, key}.
// Additionally it contains:
// - value ending with tagSeparatorChar if !isRegexp.
// - non-regexp prefix if isRegexp.
@@ -110,9 +118,9 @@ func (tf *tagFilter) String() string {
return bb.String()
}
-// Marshal appends marshaled tf to dst
+// MarshalNoAccountIDProjectID appends marshaled tf to dst
// and returns the result.
-func (tf *tagFilter) Marshal(dst []byte) []byte {
+func (tf *tagFilter) MarshalNoAccountIDProjectID(dst []byte) []byte {
dst = marshalTagValue(dst, tf.key)
dst = marshalTagValue(dst, tf.value)
diff --git a/lib/storage/tag_filters_test.go b/lib/storage/tag_filters_test.go
index 74dc705a3..897ea10ff 100644
--- a/lib/storage/tag_filters_test.go
+++ b/lib/storage/tag_filters_test.go
@@ -403,7 +403,7 @@ func testGetRegexpPrefix(t *testing.T, s, expectedPrefix, expectedSuffix string)
}
func TestTagFiltersAddEmpty(t *testing.T) {
- tfs := NewTagFilters()
+ tfs := NewTagFilters(0, 0)
mustAdd := func(key, value []byte, isNegative, isRegexp bool) {
t.Helper()
@@ -437,7 +437,7 @@ func TestTagFiltersAddEmpty(t *testing.T) {
expectTagFilter(2, ".+", false, true)
// Empty regexp filters
- tfs.Reset()
+ tfs.Reset(0, 0)
mustAdd([]byte("foo"), []byte(".*"), false, true)
if len(tfs.tfs) != 0 {
t.Fatalf("unexpectedly added empty regexp filter %s", &tfs.tfs[0])
@@ -450,7 +450,7 @@ func TestTagFiltersAddEmpty(t *testing.T) {
expectTagFilter(2, "foo||bar", true, true)
// Verify that otner filters are added normally.
- tfs.Reset()
+ tfs.Reset(0, 0)
mustAdd(nil, []byte("foobar"), false, false)
if len(tfs.tfs) != 1 {
t.Fatalf("missing added filter")
diff --git a/lib/storage/tsid.go b/lib/storage/tsid.go
index 210787a3b..c8d99ae59 100644
--- a/lib/storage/tsid.go
+++ b/lib/storage/tsid.go
@@ -14,9 +14,17 @@ import (
// grouping of related metrics.
// It is OK if their meaning differ from their naming.
type TSID struct {
+ // AccountID is the id of the registered account.
+ AccountID uint32
+
+ // ProjectID is the id of the project.
+ //
+ // The ProjectID must be unique for the given AccountID.
+ ProjectID uint32
+
// MetricGroupID is the id of metric group inside the given project.
//
- // MetricGroupID must be unique.
+ // MetricGroupID must be unique for the given (AccountID, ProjectID).
//
// Metric group contains metrics with the identical name like
// 'memory_usage', 'http_requests', but with different
@@ -32,7 +40,7 @@ type TSID struct {
// JobID is the id of an individual job (aka service)
// for the given project.
//
- // JobID must be unique.
+ // JobID must be unique for the given (AccountID, ProjectID).
//
// Service may consist of multiple instances.
// See https://prometheus.io/docs/concepts/jobs_instances/ for details.
@@ -41,7 +49,7 @@ type TSID struct {
// InstanceID is the id of an instance (aka process)
// for the given project.
//
- // InstanceID must be unique.
+ // InstanceID must be unique for the given (AccountID, ProjectID).
//
// See https://prometheus.io/docs/concepts/jobs_instances/ for details.
InstanceID uint32
@@ -61,6 +69,8 @@ var marshaledTSIDSize = func() int {
// Marshal appends marshaled t to dst and returns the result.
func (t *TSID) Marshal(dst []byte) []byte {
+ dst = encoding.MarshalUint32(dst, t.AccountID)
+ dst = encoding.MarshalUint32(dst, t.ProjectID)
dst = encoding.MarshalUint64(dst, t.MetricGroupID)
dst = encoding.MarshalUint32(dst, t.JobID)
dst = encoding.MarshalUint32(dst, t.InstanceID)
@@ -74,6 +84,10 @@ func (t *TSID) Unmarshal(src []byte) ([]byte, error) {
return nil, fmt.Errorf("too short src; got %d bytes; want %d bytes", len(src), marshaledTSIDSize)
}
+ t.AccountID = encoding.UnmarshalUint32(src)
+ src = src[4:]
+ t.ProjectID = encoding.UnmarshalUint32(src)
+ src = src[4:]
t.MetricGroupID = encoding.UnmarshalUint64(src)
src = src[8:]
t.JobID = encoding.UnmarshalUint32(src)
@@ -93,6 +107,18 @@ func (t *TSID) Less(b *TSID) bool {
return false
}
+ if t.AccountID < b.AccountID {
+ return true
+ }
+ if t.AccountID > b.AccountID {
+ return false
+ }
+ if t.ProjectID < b.ProjectID {
+ return true
+ }
+ if t.ProjectID > b.ProjectID {
+ return false
+ }
if t.MetricGroupID < b.MetricGroupID {
return true
}
diff --git a/lib/storage/tsid_test.go b/lib/storage/tsid_test.go
index 7ceb6e2b1..20ca82502 100644
--- a/lib/storage/tsid_test.go
+++ b/lib/storage/tsid_test.go
@@ -13,7 +13,7 @@ func TestMarshaledTSIDSize(t *testing.T) {
// This test makes sure marshaled format isn't changed.
// If this test breaks then the storage format has been changed,
// so it may become incompatible with the previously written data.
- expectedSize := 24
+ expectedSize := 32
if marshaledTSIDSize != expectedSize {
t.Fatalf("unexpected marshaledTSIDSize; got %d; want %d", marshaledTSIDSize, expectedSize)
}
@@ -28,8 +28,27 @@ func TestTSIDLess(t *testing.T) {
t.Fatalf("t2=%v cannot be less than t1=%v", &t2, &t1)
}
- t1.MetricID = 124
- t2.MetricID = 126
+ t2.MetricID = 345
+ t1.AccountID = 123
+ if t1.Less(&t2) {
+ t.Fatalf("t1=%v cannot be less than t2=%v", &t1, &t2)
+ }
+ if !t2.Less(&t1) {
+ t.Fatalf("t2=%v must be less than t1=%v", &t2, &t1)
+ }
+
+ t2 = t1
+ t2.MetricID = 123
+ t1.ProjectID = 8473
+ if t1.Less(&t2) {
+ t.Fatalf("t1=%v cannot be less than t2=%v", &t1, &t2)
+ }
+ if !t2.Less(&t1) {
+ t.Fatalf("t2=%v must be less than t1=%v", &t2, &t1)
+ }
+
+ t2 = t1
+ t2.MetricID = 123
t1.MetricGroupID = 847
if t1.Less(&t2) {
t.Fatalf("t1=%v cannot be less than t2=%v", &t1, &t2)
diff --git a/vendor/github.com/lithammer/go-jump-consistent-hash/.travis.yml b/vendor/github.com/lithammer/go-jump-consistent-hash/.travis.yml
new file mode 100644
index 000000000..a6d922c79
--- /dev/null
+++ b/vendor/github.com/lithammer/go-jump-consistent-hash/.travis.yml
@@ -0,0 +1,11 @@
+language: go
+
+go:
+ - 1.0
+ - 1.1
+ - 1.2
+ - 1.3
+ - 1.4
+ - tip
+
+sudo: false
diff --git a/vendor/github.com/lithammer/go-jump-consistent-hash/LICENSE b/vendor/github.com/lithammer/go-jump-consistent-hash/LICENSE
new file mode 100644
index 000000000..9cc753370
--- /dev/null
+++ b/vendor/github.com/lithammer/go-jump-consistent-hash/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Peter Renström
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/lithammer/go-jump-consistent-hash/README.md b/vendor/github.com/lithammer/go-jump-consistent-hash/README.md
new file mode 100644
index 000000000..0f3a833fa
--- /dev/null
+++ b/vendor/github.com/lithammer/go-jump-consistent-hash/README.md
@@ -0,0 +1,22 @@
+# Jump Consistent Hash
+
+[![Build Status](https://travis-ci.org/renstrom/go-jump-consistent-hash.svg?branch=master)](https://travis-ci.org/renstrom/go-jump-consistent-hash)
+[![Godoc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/renstrom/go-jump-consistent-hash)
+
+Go implementation of the jump consistent hash algorithm[1] by John Lamping and Eric Veach.
+
+[1] http://arxiv.org/pdf/1406.2294v1.pdf
+
+## Usage
+
+```go
+import jump "github.com/renstrom/go-jump-consistent-hash"
+
+func main() {
+ jump.Hash(256, 1024) // 520
+}
+```
+
+## License
+
+MIT
diff --git a/vendor/github.com/lithammer/go-jump-consistent-hash/doc.go b/vendor/github.com/lithammer/go-jump-consistent-hash/doc.go
new file mode 100644
index 000000000..309751588
--- /dev/null
+++ b/vendor/github.com/lithammer/go-jump-consistent-hash/doc.go
@@ -0,0 +1,131 @@
+// Example
+//
+// jump.Hash(256, 1024) // 520
+//
+// Reference C++ implementation[1]
+//
+// int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets) {
+// int64_t b = -1, j = 0;
+// while (j < num_buckets) {
+// b = j;
+// key = key * 2862933555777941757ULL + 1;
+// j = (b + 1) * (double(1LL << 31) / double((key >> 33) + 1));
+// }
+// return b;
+// }
+//
+// Explanation of the algorithm
+//
+// Jump consistent hash works by computing when its output changes as the
+// number of buckets increases. Let ch(key, num_buckets) be the consistent hash
+// for the key when there are num_buckets buckets. Clearly, for any key, k,
+// ch(k, 1) is 0, since there is only the one bucket. In order for the
+// consistent hash function to balanced, ch(k, 2) will have to stay at 0 for
+// half the keys, k, while it will have to jump to 1 for the other half. In
+// general, ch(k, n+1) has to stay the same as ch(k, n) for n/(n+1) of the
+// keys, and jump to n for the other 1/(n+1) of the keys.
+//
+// Here are examples of the consistent hash values for three keys, k1, k2, and
+// k3, as num_buckets goes up:
+//
+// │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ 7 │ 8 │ 9 │ 10 │ 11 │ 12 │ 13 │ 14
+// ───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼────┼────┼────┼────┼────
+// k1 │ 0 │ 0 │ 2 │ 2 │ 4 │ 4 │ 4 │ 4 │ 4 │ 4 │ 4 │ 4 │ 4 │ 4
+// ───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼────┼────┼────┼────┼────
+// k2 │ 0 │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ 7 │ 7 │ 7 │ 7 │ 7 │ 7 │ 7
+// ───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼────┼────┼────┼────┼────
+// k3 │ 0 │ 1 │ 1 │ 1 │ 1 │ 5 │ 5 │ 7 │ 7 │ 7 │ 10 │ 10 │ 10 │ 10
+//
+// A linear time algorithm can be defined by using the formula for the
+// probability of ch(key, j) jumping when j increases. It essentially walks
+// across a row of this table. Given a key and number of buckets, the algorithm
+// considers each successive bucket, j, from 1 to num_buckets1, and uses
+// ch(key, j) to compute ch(key, j+1). At each bucket, j, it decides whether to
+// keep ch(k, j+1) the same as ch(k, j), or to jump its value to j. In order to
+// jump for the right fraction of keys, it uses a pseudorandom number
+// generator with the key as its seed. To jump for 1/(j+1) of keys, it
+// generates a uniform random number between 0.0 and 1.0, and jumps if the
+// value is less than 1/(j+1). At the end of the loop, it has computed
+// ch(k, num_buckets), which is the desired answer. In code:
+//
+// int ch(int key, int num_buckets) {
+// random.seed(key);
+// int b = 0; // This will track ch(key,j+1).
+// for (int j = 1; j < num_buckets; j++) {
+// if (random.next() < 1.0 / (j + 1)) b = j;
+// }
+// return b;
+// }
+//
+// We can convert this to a logarithmic time algorithm by exploiting that
+// ch(key, j+1) is usually unchanged as j increases, only jumping occasionally.
+// The algorithm will only compute the destinations of jumps the j’s for
+// which ch(key, j+1) ≠ ch(key, j). Also notice that for these j’s, ch(key,
+// j+1) = j. To develop the algorithm, we will treat ch(key, j) as a random
+// variable, so that we can use the notation for random variables to analyze
+// the fractions of keys for which various propositions are true. That will
+// lead us to a closed form expression for a pseudorandom variable whose value
+// gives the destination of the next jump.
+//
+// Suppose that the algorithm is tracking the bucket numbers of the jumps for a
+// particular key, k. And suppose that b was the destination of the last jump,
+// that is, ch(k, b) ≠ ch(k, b+1), and ch(k, b+1) = b. Now, we want to find the
+// next jump, the smallest j such that ch(k, j+1) ≠ ch(k, b+1), or
+// equivalently, the largest j such that ch(k, j) = ch(k, b+1). We will make a
+// pseudorandom variable whose value is that j. To get a probabilistic
+// constraint on j, note that for any bucket number, i, we have j ≥ i if and
+// only if the consistent hash hasn’t changed by i, that is, if and only if
+// ch(k, i) = ch(k, b+1). Hence, the distribution of j must satisfy
+//
+// P(j ≥ i) = P( ch(k, i) = ch(k, b+1) )
+//
+// Fortunately, it is easy to compute that probability. Notice that since P(
+// ch(k, 10) = ch(k, 11) ) is 10/11, and P( ch(k, 11) = ch(k, 12) ) is 11/12,
+// then P( ch(k, 10) = ch(k, 12) ) is 10/11 * 11/12 = 10/12. In general, if n ≥
+// m, P( ch(k, n) = ch(k, m) ) = m / n. Thus for any i > b,
+//
+// P(j ≥ i) = P( ch(k, i) = ch(k, b+1) ) = (b+1) / i .
+//
+// Now, we generate a pseudorandom variable, r, (depending on k and j) that is
+// uniformly distributed between 0 and 1. Since we want P(j ≥ i) = (b+1) / i,
+// we set P(j ≥ i) iff r ≤ (b+1) / i. Solving the inequality for i yields P(j ≥
+// i) iff i ≤ (b+1) / r. Since i is a lower bound on j, j will equal the
+// largest i for which P(j ≥ i), thus the largest i satisfying i ≤ (b+1) / r.
+// Thus, by the definition of the floor function, j = floor((b+1) / r).
+//
+// Using this formula, jump consistent hash finds ch(key, num_buckets) by
+// choosing successive jump destinations until it finds a position at or past
+// num_buckets. It then knows that the previous jump destination is the answer.
+//
+// int ch(int key, int num_buckets) {
+// random.seed(key);
+// int b = -1; // bucket number before the previous jump
+// int j = 0; // bucket number before the current jump
+// while (j < num_buckets) {
+// b = j;
+// r = random.next();
+// j = floor((b + 1) / r);
+// }
+// return = b;
+// }
+//
+// To turn this into the actual code of figure 1, we need to implement random.
+// We want it to be fast, and yet to also to have well distributed successive
+// values. We use a 64bit linear congruential generator; the particular
+// multiplier we use produces random numbers that are especially well
+// distributed in higher dimensions (i.e., when successive random values are
+// used to form tuples). We use the key as the seed. (For keys that don’t fit
+// into 64 bits, a 64 bit hash of the key should be used.) The congruential
+// generator updates the seed on each iteration, and the code derives a double
+// from the current seed. Tests show that this generator has good speed and
+// distribution.
+//
+// It is worth noting that unlike the algorithm of Karger et al., jump
+// consistent hash does not require the key to be hashed if it is already an
+// integer. This is because jump consistent hash has an embedded pseudorandom
+// number generator that essentially rehashes the key on every iteration. The
+// hash is not especially good (i.e., linear congruential), but since it is
+// applied repeatedly, additional hashing of the input key is not necessary.
+//
+// [1] http://arxiv.org/pdf/1406.2294v1.pdf
+package jump
diff --git a/vendor/github.com/lithammer/go-jump-consistent-hash/jump.go b/vendor/github.com/lithammer/go-jump-consistent-hash/jump.go
new file mode 100644
index 000000000..fb62c665e
--- /dev/null
+++ b/vendor/github.com/lithammer/go-jump-consistent-hash/jump.go
@@ -0,0 +1,19 @@
+package jump
+
+// Hash takes a 64 bit key and the number of buckets. It outputs a bucket
+// number in the range [0, buckets].
+func Hash(key uint64, buckets int32) int32 {
+ var b, j int64
+
+ if buckets <= 0 {
+ buckets = 1
+ }
+
+ for j < int64(buckets) {
+ b = j
+ key = key*2862933555777941757 + 1
+ j = int64(float64(b+1) * (float64(int64(1)<<31) / float64((key>>33)+1)))
+ }
+
+ return int32(b)
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index c1942d131..fb074a664 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -6,6 +6,8 @@ github.com/VictoriaMetrics/metrics
github.com/cespare/xxhash/v2
# github.com/golang/snappy v0.0.1
github.com/golang/snappy
+# github.com/lithammer/go-jump-consistent-hash v1.0.0
+github.com/lithammer/go-jump-consistent-hash
# github.com/valyala/bytebufferpool v1.0.0
github.com/valyala/bytebufferpool
# github.com/valyala/fastjson v1.4.1