mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-03-11 15:34:56 +00:00
{app/vmstorage,app/vmselect}: add API to get list of existing tenants (#3348)
* {app/vmstorage,app/vmselect}: add API to get list of existing tenants * {app/vmstorage,app/vmselect}: add API to get list of existing tenants * app/vmselect: fix error message * {app/vmstorage,app/vmselect}: fix error messages * app/vmselect: change log level for error handling * wip Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
This commit is contained in:
parent
693d68f307
commit
e407e7243a
15 changed files with 501 additions and 16 deletions
|
@ -57,6 +57,10 @@ when different tenants have different amounts of data and different query load.
|
|||
|
||||
- VictoriaMetrics doesn't support querying multiple tenants in a single request.
|
||||
|
||||
- The list of registered tenants can be obtained via `http://<vmselect>:8481/admin/tenants` url. See [these docs](#url-format).
|
||||
|
||||
- VictoriaMetrics exposes various per-tenant statistics via metrics - see [these docs](https://docs.victoriametrics.com/PerTenantStatistic.html).
|
||||
|
||||
See also [multitenancy via labels](#multitenancy-via-labels).
|
||||
|
||||
|
||||
|
@ -328,6 +332,9 @@ See [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html
|
|||
Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't
|
||||
be used on a regular basis, since it carries non-zero overhead.
|
||||
|
||||
- URL for listing [tenants](#multitenancy) with the ingested data on the given time range: `http://<vmselect>:8481/admin/tenants?start=...&end=...` .
|
||||
The `start` and `end` query args are optional. If they are missing, then all the tenants with at least one sample stored in VictoriaMetrics are returned.
|
||||
|
||||
- URL for accessing [vmalert's](https://docs.victoriametrics.com/vmalert.html) UI: `http://<vmselect>:8481/select/<accountID>/prometheus/vmalert/`.
|
||||
This URL works only when `-vmalert.proxyURL` flag is set. See more about vmalert [here](#vmalert).
|
||||
|
||||
|
|
|
@ -42,6 +42,11 @@ func (api *vmstorageAPI) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQu
|
|||
return bi, nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
|
||||
dl := searchutils.DeadlineFromTimestamp(deadline)
|
||||
return netstorage.Tenants(qt, tr, dl)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) ([]string, error) {
|
||||
denyPartialResponse := searchutils.GetDenyPartialResponse(nil)
|
||||
dl := searchutils.DeadlineFromTimestamp(deadline)
|
||||
|
|
|
@ -248,7 +248,16 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|||
}
|
||||
return true
|
||||
}
|
||||
|
||||
if path == "/admin/tenants" {
|
||||
tenantsRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
if err := prometheus.Tenants(qt, startTime, w, r); err != nil {
|
||||
tenantsErrors.Inc()
|
||||
httpserver.Errorf(w, r, "error getting tenants: %s", err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
p, err := httpserver.ParsePath(path)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot parse path %q: %s", path, err)
|
||||
|
@ -735,6 +744,9 @@ var (
|
|||
buildInfoRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/buildinfo"}`)
|
||||
queryExemplarsRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/query_exemplars"}`)
|
||||
|
||||
tenantsRequests = metrics.NewCounter(`vm_http_requests_total{path="/admin/tenants"}`)
|
||||
tenantsErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/admin/tenants"}`)
|
||||
|
||||
httpRequests = tenantmetrics.NewCounterMap(`vm_tenant_select_requests_total`)
|
||||
httpRequestsDuration = tenantmetrics.NewCounterMap(`vm_tenant_select_requests_duration_ms_total`)
|
||||
)
|
||||
|
|
|
@ -882,6 +882,58 @@ func LabelValues(qt *querytracer.Tracer, denyPartialResponse bool, labelName str
|
|||
return labelValues, isPartial, nil
|
||||
}
|
||||
|
||||
// Tenants returns tenants until the given deadline.
|
||||
func Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline searchutils.Deadline) ([]string, error) {
|
||||
qt = qt.NewChild("get tenants on timeRange=%s", &tr)
|
||||
defer qt.Done()
|
||||
if deadline.Exceeded() {
|
||||
return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String())
|
||||
}
|
||||
|
||||
// Send the query to all the storage nodes in parallel.
|
||||
type nodeResult struct {
|
||||
tenants []string
|
||||
err error
|
||||
}
|
||||
sns := getStorageNodes()
|
||||
// Deny partial responses when obtaining the list of tenants, since partial tenants have little sense.
|
||||
snr := startStorageNodesRequest(qt, sns, true, func(qt *querytracer.Tracer, workerID uint, sn *storageNode) interface{} {
|
||||
sn.tenantsRequests.Inc()
|
||||
tenants, err := sn.getTenants(qt, tr, deadline)
|
||||
if err != nil {
|
||||
sn.tenantsErrors.Inc()
|
||||
err = fmt.Errorf("cannot get tenants from vmstorage %s: %w", sn.connPool.Addr(), err)
|
||||
}
|
||||
return &nodeResult{
|
||||
tenants: tenants,
|
||||
err: err,
|
||||
}
|
||||
})
|
||||
|
||||
// Collect results
|
||||
var tenants []string
|
||||
_, err := snr.collectResults(partialLabelValuesResults, func(result interface{}) error {
|
||||
nr := result.(*nodeResult)
|
||||
if nr.err != nil {
|
||||
return nr.err
|
||||
}
|
||||
tenants = append(tenants, nr.tenants...)
|
||||
return nil
|
||||
})
|
||||
qt.Printf("get %d non-duplicated tenants", len(tenants))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot fetch tenants from vmstorage nodes: %w", err)
|
||||
}
|
||||
|
||||
// Deduplicate tenants
|
||||
tenants = deduplicateStrings(tenants)
|
||||
qt.Printf("get %d unique tenants after de-duplication", len(tenants))
|
||||
|
||||
sort.Strings(tenants)
|
||||
qt.Printf("sort %d tenants", len(tenants))
|
||||
return tenants, nil
|
||||
}
|
||||
|
||||
// GraphiteTagValues returns tag values for the given tagName until the given deadline.
|
||||
func GraphiteTagValues(qt *querytracer.Tracer, accountID, projectID uint32, denyPartialResponse bool, tagName, filter string, limit int, deadline searchutils.Deadline) ([]string, bool, error) {
|
||||
qt = qt.NewChild("get graphite tag values for tagName=%s, filter=%s, limit=%d", tagName, filter, limit)
|
||||
|
@ -1637,6 +1689,12 @@ type storageNode struct {
|
|||
|
||||
// The number of read metric rows.
|
||||
metricRowsRead *metrics.Counter
|
||||
|
||||
// The number of list tenants requests to storageNode.
|
||||
tenantsRequests *metrics.Counter
|
||||
|
||||
// The number of list tenants errors to storageNode.
|
||||
tenantsErrors *metrics.Counter
|
||||
}
|
||||
|
||||
func (sn *storageNode) registerMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, deadline searchutils.Deadline) error {
|
||||
|
@ -1697,6 +1755,22 @@ func (sn *storageNode) getLabelValues(qt *querytracer.Tracer, labelName string,
|
|||
return labelValues, nil
|
||||
}
|
||||
|
||||
func (sn *storageNode) getTenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline searchutils.Deadline) ([]string, error) {
|
||||
var tenants []string
|
||||
f := func(bc *handshake.BufferedConn) error {
|
||||
result, err := sn.getTenantsOnConn(bc, tr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tenants = result
|
||||
return nil
|
||||
}
|
||||
if err := sn.execOnConnWithPossibleRetry(qt, "tenants_v1", f, deadline); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return tenants, nil
|
||||
}
|
||||
|
||||
func (sn *storageNode) getTagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string,
|
||||
delimiter byte, maxSuffixes int, deadline searchutils.Deadline) ([]string, error) {
|
||||
var suffixes []string
|
||||
|
@ -2002,6 +2076,7 @@ func (sn *storageNode) getLabelNamesOnConn(bc *handshake.BufferedConn, requestDa
|
|||
}
|
||||
|
||||
const maxLabelValueSize = 16 * 1024 * 1024
|
||||
const maxTenantValueSize = 16 * 1024 * 1024 // TODO: calc 'uint32:uint32'
|
||||
|
||||
func (sn *storageNode) getLabelValuesOnConn(bc *handshake.BufferedConn, labelName string, requestData []byte, maxLabelValues int) ([]string, error) {
|
||||
// Send the request to sn.
|
||||
|
@ -2051,6 +2126,39 @@ func readLabelValues(buf []byte, bc *handshake.BufferedConn) ([]string, []byte,
|
|||
}
|
||||
}
|
||||
|
||||
func (sn *storageNode) getTenantsOnConn(bc *handshake.BufferedConn, tr storage.TimeRange) ([]string, error) {
|
||||
if err := writeTimeRange(bc, tr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := bc.Flush(); err != nil {
|
||||
return nil, fmt.Errorf("cannot flush request to conn: %w", err)
|
||||
}
|
||||
|
||||
// Read response error.
|
||||
buf, err := readBytes(nil, bc, maxErrorMessageSize)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read error message: %w", err)
|
||||
}
|
||||
if len(buf) > 0 {
|
||||
return nil, newErrRemote(buf)
|
||||
}
|
||||
|
||||
// Read response
|
||||
var tenants []string
|
||||
for {
|
||||
var err error
|
||||
buf, err = readBytes(buf[:0], bc, maxTenantValueSize)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read tenant #%d: %w", len(tenants), err)
|
||||
}
|
||||
if len(buf) == 0 {
|
||||
// Reached the end of the response
|
||||
return tenants, nil
|
||||
}
|
||||
tenants = append(tenants, string(buf))
|
||||
}
|
||||
}
|
||||
|
||||
func (sn *storageNode) getTagValueSuffixesOnConn(bc *handshake.BufferedConn, accountID, projectID uint32,
|
||||
tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte, maxSuffixes int) ([]string, error) {
|
||||
// Send the request to sn.
|
||||
|
@ -2480,6 +2588,8 @@ func initStorageNodes(addrs []string) *storageNodesBucket {
|
|||
searchMetricNamesErrors: ms.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="searchMetricNames", type="rpcClient", name="vmselect", addr=%q}`, addr)),
|
||||
searchRequests: ms.NewCounter(fmt.Sprintf(`vm_requests_total{action="search", type="rpcClient", name="vmselect", addr=%q}`, addr)),
|
||||
searchErrors: ms.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="search", type="rpcClient", name="vmselect", addr=%q}`, addr)),
|
||||
tenantsRequests: ms.NewCounter(fmt.Sprintf(`vm_requests_total{action="tenants", type="rpcClient", name="vmselect", addr=%q}`, addr)),
|
||||
tenantsErrors: ms.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="tenants", type="rpcClient", name="vmselect", addr=%q}`, addr)),
|
||||
|
||||
metricBlocksRead: ms.NewCounter(fmt.Sprintf(`vm_metric_blocks_read_total{name="vmselect", addr=%q}`, addr)),
|
||||
metricRowsRead: ms.NewCounter(fmt.Sprintf(`vm_metric_rows_read_total{name="vmselect", addr=%q}`, addr)),
|
||||
|
|
|
@ -508,6 +508,36 @@ var httpClient = &http.Client{
|
|||
Timeout: time.Second * 5,
|
||||
}
|
||||
|
||||
// Tenants processes /admin/tenants request.
|
||||
func Tenants(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := searchutils.GetDeadlineForStatusRequest(r, startTime)
|
||||
start, err := searchutils.GetTime(r, "start", 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ct := startTime.UnixNano() / 1e6
|
||||
end, err := searchutils.GetTime(r, "end", ct)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tr := storage.TimeRange{
|
||||
MinTimestamp: start,
|
||||
MaxTimestamp: end,
|
||||
}
|
||||
tenants, err := netstorage.Tenants(qt, tr, deadline)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
bw := bufferedwriter.Get(w)
|
||||
defer bufferedwriter.Put(bw)
|
||||
WriteTenantsResponse(bw, tenants, qt)
|
||||
if err := bw.Flush(); err != nil {
|
||||
return fmt.Errorf("canot flush label values to remote client: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LabelValuesHandler processes /api/v1/label/<labelName>/values request.
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
|
||||
|
|
24
app/vmselect/prometheus/tenants_response.qtpl
Normal file
24
app/vmselect/prometheus/tenants_response.qtpl
Normal file
|
@ -0,0 +1,24 @@
|
|||
{% stripspace %}
|
||||
|
||||
{% import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
) %}
|
||||
|
||||
TenantsResponse generates response for /admin/tenants .
|
||||
{% func TenantsResponse(tenants []string, qt *querytracer.Tracer) %}
|
||||
{
|
||||
"status":"success",
|
||||
"data":[
|
||||
{% for i, tenant := range tenants %}
|
||||
{%q= tenant %}
|
||||
{% if i+1 < len(tenants) %},{% endif %}
|
||||
{% endfor %}
|
||||
]
|
||||
{% code
|
||||
qt.Printf("generate response for %d tenants", len(tenants))
|
||||
qt.Done()
|
||||
%}
|
||||
{%= dumpQueryTrace(qt) %}
|
||||
}
|
||||
{% endfunc %}
|
||||
{% endstripspace %}
|
80
app/vmselect/prometheus/tenants_response.qtpl.go
Normal file
80
app/vmselect/prometheus/tenants_response.qtpl.go
Normal file
|
@ -0,0 +1,80 @@
|
|||
// Code generated by qtc from "tenants_response.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:3
|
||||
package prometheus
|
||||
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:3
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
)
|
||||
|
||||
// TenantsResponse generates response for /admin/tenants .
|
||||
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:8
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:8
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:8
|
||||
func StreamTenantsResponse(qw422016 *qt422016.Writer, tenants []string, qt *querytracer.Tracer) {
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:8
|
||||
qw422016.N().S(`{"status":"success","data":[`)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:12
|
||||
for i, tenant := range tenants {
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:13
|
||||
qw422016.N().Q(tenant)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:14
|
||||
if i+1 < len(tenants) {
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:14
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:14
|
||||
}
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:15
|
||||
}
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:15
|
||||
qw422016.N().S(`]`)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:18
|
||||
qt.Printf("generate response for %d tenants", len(tenants))
|
||||
qt.Done()
|
||||
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:21
|
||||
streamdumpQueryTrace(qw422016, qt)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:21
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
func WriteTenantsResponse(qq422016 qtio422016.Writer, tenants []string, qt *querytracer.Tracer) {
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
StreamTenantsResponse(qw422016, tenants, qt)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
func TenantsResponse(tenants []string, qt *querytracer.Tracer) string {
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
WriteTenantsResponse(qb422016, tenants, qt)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/tenants_response.qtpl:23
|
||||
}
|
|
@ -116,6 +116,10 @@ func (api *vmstorageAPI) SeriesCount(qt *querytracer.Tracer, accountID, projectI
|
|||
return api.s.GetSeriesCount(accountID, projectID, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
|
||||
return api.s.SearchTenants(qt, tr, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) TSDBStatus(qt *querytracer.Tracer, sq *storage.SearchQuery, focusLabel string, topN int, deadline uint64) (*storage.TSDBStatus, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := getMaxMetrics(sq)
|
||||
|
|
|
@ -15,6 +15,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
|||
|
||||
## tip
|
||||
|
||||
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): add `http://<vmselect>:8481/admin/tenants` API endpoint for returning a list of registered tenants. See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) for details.
|
||||
* FEATURE: [VictoriaMetrics enterprise](https://docs.victoriametrics.com/enterprise.html): add `-storageNode.filter` command-line flag for filtering the [discovered vmstorage nodes](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#automatic-vmstorage-discovery) with arbitrary regular expressions. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3353).
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): allow using numeric values with `K`, `Ki`, `M`, `Mi`, `G`, `Gi`, `T` and `Ti` suffixes inside MetricsQL queries. For example `8Ki` equals to `8*1024`, while `8.2M` equals to `8.2*1000*1000`.
|
||||
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [range_normalize](https://docs.victoriametrics.com/MetricsQL.html#range_normalize) function for normalizing multiple time series into `[0...1]` value range. This function is useful for correlation analysis of time series with distinct value ranges. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3167).
|
||||
|
|
|
@ -61,6 +61,10 @@ when different tenants have different amounts of data and different query load.
|
|||
|
||||
- VictoriaMetrics doesn't support querying multiple tenants in a single request.
|
||||
|
||||
- The list of registered tenants can be obtained via `http://<vmselect>:8481/admin/tenants` url. See [these docs](#url-format).
|
||||
|
||||
- VictoriaMetrics exposes various per-tenant statistics via metrics - see [these docs](https://docs.victoriametrics.com/PerTenantStatistic.html).
|
||||
|
||||
See also [multitenancy via labels](#multitenancy-via-labels).
|
||||
|
||||
|
||||
|
@ -332,6 +336,9 @@ See [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html
|
|||
Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't
|
||||
be used on a regular basis, since it carries non-zero overhead.
|
||||
|
||||
- URL for listing [tenants](#multitenancy) with the ingested data on the given time range: `http://<vmselect>:8481/admin/tenants?start=...&end=...` .
|
||||
The `start` and `end` query args are optional. If they are missing, then all the tenants with at least one sample stored in VictoriaMetrics are returned.
|
||||
|
||||
- URL for accessing [vmalert's](https://docs.victoriametrics.com/vmalert.html) UI: `http://<vmselect>:8481/select/<accountID>/prometheus/vmalert/`.
|
||||
This URL works only when `-vmalert.proxyURL` flag is set. See more about vmalert [here](#vmalert).
|
||||
|
||||
|
|
|
@ -947,6 +947,134 @@ func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricI
|
|||
return nil
|
||||
}
|
||||
|
||||
// SearchTenants returns all tenants on the given tr.
|
||||
func (db *indexDB) SearchTenants(qt *querytracer.Tracer, tr TimeRange, deadline uint64) ([]string, error) {
|
||||
qt = qt.NewChild("search for tenants on timeRange=%s", &tr)
|
||||
defer qt.Done()
|
||||
tenants := make(map[string]struct{})
|
||||
qtChild := qt.NewChild("search for tenants in the current indexdb")
|
||||
is := db.getIndexSearch(0, 0, deadline)
|
||||
err := is.searchTenantsOnTimeRange(qtChild, tenants, tr)
|
||||
db.putIndexSearch(is)
|
||||
qtChild.Donef("found %d tenants", len(tenants))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ok := db.doExtDB(func(extDB *indexDB) {
|
||||
qtChild := qt.NewChild("search for tenants in the previous indexdb")
|
||||
tenantsLen := len(tenants)
|
||||
is := extDB.getIndexSearch(0, 0, deadline)
|
||||
err = is.searchTenantsOnTimeRange(qtChild, tenants, tr)
|
||||
extDB.putIndexSearch(is)
|
||||
qtChild.Donef("found %d additional tenants", len(tenants)-tenantsLen)
|
||||
})
|
||||
if ok && err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tenantsList := make([]string, 0, len(tenants))
|
||||
for tenant := range tenants {
|
||||
tenantsList = append(tenantsList, tenant)
|
||||
}
|
||||
// Do not sort tenants, since they must be sorted by vmselect.
|
||||
qt.Printf("found %d tenants in the current and the previous indexdb", len(tenantsList))
|
||||
return tenantsList, nil
|
||||
}
|
||||
|
||||
func (is *indexSearch) searchTenantsOnTimeRange(qt *querytracer.Tracer, tenants map[string]struct{}, tr TimeRange) error {
|
||||
minDate := uint64(tr.MinTimestamp) / msecPerDay
|
||||
maxDate := uint64(tr.MaxTimestamp-1) / msecPerDay
|
||||
if maxDate == 0 || minDate > maxDate || maxDate-minDate > maxDaysForPerDaySearch {
|
||||
qtChild := qt.NewChild("search for tenants in global index")
|
||||
err := is.searchTenantsOnDate(qtChild, tenants, 0)
|
||||
qtChild.Done()
|
||||
return err
|
||||
}
|
||||
var mu sync.Mutex
|
||||
wg := getWaitGroup()
|
||||
var errGlobal error
|
||||
qt = qt.NewChild("parallel search for tenants on timeRange=%s", &tr)
|
||||
for date := minDate; date <= maxDate; date++ {
|
||||
wg.Add(1)
|
||||
qtChild := qt.NewChild("search for tenants on date=%s", dateToString(date))
|
||||
go func(date uint64) {
|
||||
defer func() {
|
||||
qtChild.Done()
|
||||
wg.Done()
|
||||
}()
|
||||
tenantsLocal := make(map[string]struct{})
|
||||
isLocal := is.db.getIndexSearch(0, 0, is.deadline)
|
||||
err := isLocal.searchTenantsOnDate(qtChild, tenantsLocal, date)
|
||||
is.db.putIndexSearch(isLocal)
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if errGlobal != nil {
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
errGlobal = err
|
||||
return
|
||||
}
|
||||
for tenant := range tenantsLocal {
|
||||
tenants[tenant] = struct{}{}
|
||||
}
|
||||
}(date)
|
||||
}
|
||||
wg.Wait()
|
||||
putWaitGroup(wg)
|
||||
qt.Done()
|
||||
return errGlobal
|
||||
}
|
||||
|
||||
func (is *indexSearch) searchTenantsOnDate(qt *querytracer.Tracer, tenants map[string]struct{}, date uint64) error {
|
||||
loopsPaceLimiter := 0
|
||||
ts := &is.ts
|
||||
kb := &is.kb
|
||||
is.accountID = 0
|
||||
is.projectID = 0
|
||||
kb.B = is.marshalCommonPrefixForDate(kb.B[:0], date)
|
||||
_, prefixNeeded, _, _, err := unmarshalCommonPrefix(kb.B)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: cannot unmarshal common prefix from %q: %s", kb.B, err)
|
||||
}
|
||||
ts.Seek(kb.B)
|
||||
for ts.NextItem() {
|
||||
if loopsPaceLimiter&paceLimiterFastIterationsMask == 0 {
|
||||
if err := checkSearchDeadlineAndPace(is.deadline); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
loopsPaceLimiter++
|
||||
_, prefix, accountID, projectID, err := unmarshalCommonPrefix(ts.Item)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if prefix != prefixNeeded {
|
||||
// Reached the end of enteris with the needed prefix.
|
||||
break
|
||||
}
|
||||
tenant := fmt.Sprintf("%d:%d", accountID, projectID)
|
||||
tenants[tenant] = struct{}{}
|
||||
// Seek for the next (accountID, projectID)
|
||||
projectID++
|
||||
if projectID == 0 {
|
||||
accountID++
|
||||
if accountID == 0 {
|
||||
// Reached the end (accountID, projectID) space
|
||||
break
|
||||
}
|
||||
}
|
||||
is.accountID = accountID
|
||||
is.projectID = projectID
|
||||
kb.B = is.marshalCommonPrefixForDate(kb.B[:0], date)
|
||||
ts.Seek(kb.B)
|
||||
}
|
||||
if err := ts.Error(); err != nil {
|
||||
return fmt.Errorf("error during search for prefix %q: %w", kb.B, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SearchLabelValuesWithFiltersOnTimeRange returns label values for the given labelName, tfss and tr.
|
||||
func (db *indexDB) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, accountID, projectID uint32, labelName string, tfss []*TagFilters, tr TimeRange,
|
||||
maxLabelValues, maxMetrics int, deadline uint64) ([]string, error) {
|
||||
|
|
|
@ -544,14 +544,14 @@ func TestIndexDB(t *testing.T) {
|
|||
if err := testIndexDBBigMetricName(db); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
mns, tsids, err := testIndexDBGetOrCreateTSIDByName(db, accountsCount, projectsCount, metricGroups)
|
||||
mns, tsids, tenants, err := testIndexDBGetOrCreateTSIDByName(db, accountsCount, projectsCount, metricGroups)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if err := testIndexDBBigMetricName(db); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsids, false); err != nil {
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsids, tenants, false); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if err := testIndexDBBigMetricName(db); err != nil {
|
||||
|
@ -567,7 +567,7 @@ func TestIndexDB(t *testing.T) {
|
|||
if err := testIndexDBBigMetricName(db); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsids, false); err != nil {
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsids, tenants, false); err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if err := testIndexDBBigMetricName(db); err != nil {
|
||||
|
@ -599,7 +599,7 @@ func TestIndexDB(t *testing.T) {
|
|||
ch <- err
|
||||
return
|
||||
}
|
||||
mns, tsid, err := testIndexDBGetOrCreateTSIDByName(db, accountsCount, projectsCount, metricGroups)
|
||||
mns, tsid, tenants, err := testIndexDBGetOrCreateTSIDByName(db, accountsCount, projectsCount, metricGroups)
|
||||
if err != nil {
|
||||
ch <- err
|
||||
return
|
||||
|
@ -608,7 +608,7 @@ func TestIndexDB(t *testing.T) {
|
|||
ch <- err
|
||||
return
|
||||
}
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsid, true); err != nil {
|
||||
if err := testIndexDBCheckTSIDByName(db, mns, tsid, tenants, true); err != nil {
|
||||
ch <- err
|
||||
return
|
||||
}
|
||||
|
@ -704,10 +704,11 @@ func testIndexDBBigMetricName(db *indexDB) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount, metricGroups int) ([]MetricName, []TSID, error) {
|
||||
func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount, metricGroups int) ([]MetricName, []TSID, []string, error) {
|
||||
// Create tsids.
|
||||
var mns []MetricName
|
||||
var tsids []TSID
|
||||
tenants := make(map[string]struct{})
|
||||
|
||||
is := db.getIndexSearch(0, 0, noDeadline)
|
||||
defer db.putIndexSearch(is)
|
||||
|
@ -718,6 +719,8 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount,
|
|||
var mn MetricName
|
||||
mn.AccountID = uint32((i + 2) % accountsCount)
|
||||
mn.ProjectID = uint32((i + 1) % projectsCount)
|
||||
tenant := fmt.Sprintf("%d:%d", mn.AccountID, mn.ProjectID)
|
||||
tenants[tenant] = struct{}{}
|
||||
|
||||
// Init MetricGroup.
|
||||
mn.MetricGroup = []byte(fmt.Sprintf("metricGroup.%d\x00\x01\x02", i%metricGroups))
|
||||
|
@ -736,13 +739,13 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount,
|
|||
// Create tsid for the metricName.
|
||||
var tsid TSID
|
||||
if err := is.GetOrCreateTSIDByName(&tsid, metricNameBuf, metricNameRawBuf, 0); err != nil {
|
||||
return nil, nil, fmt.Errorf("unexpected error when creating tsid for mn:\n%s: %w", &mn, err)
|
||||
return nil, nil, nil, fmt.Errorf("unexpected error when creating tsid for mn:\n%s: %w", &mn, err)
|
||||
}
|
||||
if tsid.AccountID != mn.AccountID {
|
||||
return nil, nil, fmt.Errorf("unexpected TSID.AccountID; got %d; want %d; mn:\n%s\ntsid:\n%+v", tsid.AccountID, mn.AccountID, &mn, &tsid)
|
||||
return nil, nil, nil, fmt.Errorf("unexpected TSID.AccountID; got %d; want %d; mn:\n%s\ntsid:\n%+v", tsid.AccountID, mn.AccountID, &mn, &tsid)
|
||||
}
|
||||
if tsid.ProjectID != mn.ProjectID {
|
||||
return nil, nil, fmt.Errorf("unexpected TSID.ProjectID; got %d; want %d; mn:\n%s\ntsid:\n%+v", tsid.ProjectID, mn.ProjectID, &mn, &tsid)
|
||||
return nil, nil, nil, fmt.Errorf("unexpected TSID.ProjectID; got %d; want %d; mn:\n%s\ntsid:\n%+v", tsid.ProjectID, mn.ProjectID, &mn, &tsid)
|
||||
}
|
||||
|
||||
mns = append(mns, mn)
|
||||
|
@ -754,17 +757,22 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount,
|
|||
for i := range tsids {
|
||||
tsid := &tsids[i]
|
||||
if err := is.createPerDayIndexes(date, tsid.MetricID, &mns[i]); err != nil {
|
||||
return nil, nil, fmt.Errorf("error in createPerDayIndexes(%d, %d): %w", date, tsid.MetricID, err)
|
||||
return nil, nil, nil, fmt.Errorf("error in createPerDayIndexes(%d, %d): %w", date, tsid.MetricID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Flush index to disk, so it becomes visible for search
|
||||
db.tb.DebugFlush()
|
||||
|
||||
return mns, tsids, nil
|
||||
var tenantsList []string
|
||||
for tenant := range tenants {
|
||||
tenantsList = append(tenantsList, tenant)
|
||||
}
|
||||
sort.Strings(tenantsList)
|
||||
return mns, tsids, tenantsList, nil
|
||||
}
|
||||
|
||||
func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isConcurrent bool) error {
|
||||
func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, tenants []string, isConcurrent bool) error {
|
||||
hasValue := func(lvs []string, v []byte) bool {
|
||||
for _, lv := range lvs {
|
||||
if string(v) == lv {
|
||||
|
@ -857,7 +865,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
|
|||
for k, labelNames := range allLabelNames {
|
||||
lns, err := db.SearchLabelNamesWithFiltersOnTimeRange(nil, k.AccountID, k.ProjectID, nil, TimeRange{}, 1e5, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error in SearchTagKeys: %w", err)
|
||||
return fmt.Errorf("error in SearchLabelNamesWithFiltersOnTimeRange: %w", err)
|
||||
}
|
||||
if !hasValue(lns, []byte("__name__")) {
|
||||
return fmt.Errorf("cannot find __name__ in %q", lns)
|
||||
|
@ -869,6 +877,31 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
|
|||
}
|
||||
}
|
||||
|
||||
// Test SearchTenants on global time range
|
||||
tenantsGot, err := db.SearchTenants(nil, TimeRange{}, noDeadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error in SearchTenants: %w", err)
|
||||
}
|
||||
sort.Strings(tenantsGot)
|
||||
if !reflect.DeepEqual(tenants, tenantsGot) {
|
||||
return fmt.Errorf("unexpected tenants got when searching in global time range;\ngot\n%s\nwant\n%s", tenantsGot, tenants)
|
||||
}
|
||||
|
||||
// Test SearchTenants on specific time range
|
||||
currentTime := timestampFromTime(time.Now())
|
||||
tr := TimeRange{
|
||||
MinTimestamp: currentTime - msecPerDay,
|
||||
MaxTimestamp: currentTime + msecPerDay,
|
||||
}
|
||||
tenantsGot, err = db.SearchTenants(nil, tr, noDeadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error in SearchTenants: %w", err)
|
||||
}
|
||||
sort.Strings(tenantsGot)
|
||||
if !reflect.DeepEqual(tenants, tenantsGot) {
|
||||
return fmt.Errorf("unexpected tenants got when searching in global time range;\ngot\n%s\nwant\n%s", tenantsGot, tenants)
|
||||
}
|
||||
|
||||
// Check timerseriesCounters only for serial test.
|
||||
// Concurrent test may create duplicate timeseries, so GetSeriesCount
|
||||
// would return more timeseries than needed.
|
||||
|
@ -885,8 +918,7 @@ func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isC
|
|||
}
|
||||
|
||||
// Try tag filters.
|
||||
currentTime := timestampFromTime(time.Now())
|
||||
tr := TimeRange{
|
||||
tr = TimeRange{
|
||||
MinTimestamp: currentTime - msecPerDay,
|
||||
MaxTimestamp: currentTime + msecPerDay,
|
||||
}
|
||||
|
|
|
@ -1511,6 +1511,11 @@ func (s *Storage) GetSeriesCount(accountID, projectID uint32, deadline uint64) (
|
|||
return s.idb().GetSeriesCount(accountID, projectID, deadline)
|
||||
}
|
||||
|
||||
// SearchTenants returns list of registered tenants on the given tr.
|
||||
func (s *Storage) SearchTenants(qt *querytracer.Tracer, tr TimeRange, deadline uint64) ([]string, error) {
|
||||
return s.idb().SearchTenants(qt, tr, deadline)
|
||||
}
|
||||
|
||||
// GetTSDBStatus returns TSDB status data for /api/v1/status/tsdb
|
||||
func (s *Storage) GetTSDBStatus(qt *querytracer.Tracer, accountID, projectID uint32, tfss []*TagFilters, date uint64, focusLabel string, topN, maxMetrics int, deadline uint64) (*TSDBStatus, error) {
|
||||
return s.idb().GetTSDBStatus(qt, accountID, projectID, tfss, date, focusLabel, topN, maxMetrics, deadline)
|
||||
|
|
|
@ -35,6 +35,9 @@ type API interface {
|
|||
|
||||
// RegisterMetricNames registers the given mrs in the storage.
|
||||
RegisterMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, deadline uint64) error
|
||||
|
||||
// Tenants returns list of tenants in the storage on the given tr.
|
||||
Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error)
|
||||
}
|
||||
|
||||
// BlockIterator must iterate through series blocks found by VMSelect.InitSearch.
|
||||
|
|
|
@ -59,6 +59,7 @@ type Server struct {
|
|||
tsdbStatusRequests *metrics.Counter
|
||||
searchMetricNamesRequests *metrics.Counter
|
||||
searchRequests *metrics.Counter
|
||||
tenantsRequests *metrics.Counter
|
||||
|
||||
metricBlocksRead *metrics.Counter
|
||||
metricRowsRead *metrics.Counter
|
||||
|
@ -103,6 +104,7 @@ func NewServer(addr string, api API, limits Limits, disableResponseCompression b
|
|||
tsdbStatusRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="tsdbStatus",addr=%q}`, addr)),
|
||||
searchMetricNamesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="searchMetricNames",addr=%q}`, addr)),
|
||||
searchRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="search",addr=%q}`, addr)),
|
||||
tenantsRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="tenants",addr=%q}`, addr)),
|
||||
|
||||
metricBlocksRead: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_metric_blocks_read_total{addr=%q}`, addr)),
|
||||
metricRowsRead: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_metric_rows_read_total{addr=%q}`, addr)),
|
||||
|
@ -485,6 +487,8 @@ func (s *Server) processRPC(ctx *vmselectRequestCtx, rpcName string) error {
|
|||
return s.processDeleteSeries(ctx)
|
||||
case "registerMetricNames_v3":
|
||||
return s.processRegisterMetricNames(ctx)
|
||||
case "tenants_v1":
|
||||
return s.processTenants(ctx)
|
||||
default:
|
||||
return fmt.Errorf("unsupported rpcName: %q", rpcName)
|
||||
}
|
||||
|
@ -765,6 +769,39 @@ func (s *Server) processTSDBStatus(ctx *vmselectRequestCtx) error {
|
|||
return writeTSDBStatus(ctx, status)
|
||||
}
|
||||
|
||||
func (s *Server) processTenants(ctx *vmselectRequestCtx) error {
|
||||
s.tenantsRequests.Inc()
|
||||
|
||||
// Read request
|
||||
tr, err := ctx.readTimeRange()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Execute the request
|
||||
tenants, err := s.api.Tenants(ctx.qt, tr, ctx.deadline)
|
||||
if err != nil {
|
||||
return ctx.writeErrorMessage(err)
|
||||
}
|
||||
|
||||
// Send an empty error message to vmselect.
|
||||
if err := ctx.writeString(""); err != nil {
|
||||
return fmt.Errorf("cannot send empty error message: %w", err)
|
||||
}
|
||||
|
||||
// Send tenants to vmselect
|
||||
for _, tenant := range tenants {
|
||||
if err := ctx.writeString(tenant); err != nil {
|
||||
return fmt.Errorf("cannot write tenant %q: %w", tenant, err)
|
||||
}
|
||||
}
|
||||
// Send 'end of response' marker
|
||||
if err := ctx.writeString(""); err != nil {
|
||||
return fmt.Errorf("cannot send 'end of response' marker")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeTSDBStatus(ctx *vmselectRequestCtx, status *storage.TSDBStatus) error {
|
||||
if err := ctx.writeUint64(status.TotalSeries); err != nil {
|
||||
return fmt.Errorf("cannot write totalSeries to vmselect: %w", err)
|
||||
|
|
Loading…
Reference in a new issue