2019-05-22 21:16:55 +00:00
package netstorage
import (
"container/heap"
2020-06-30 21:58:26 +00:00
"errors"
2020-11-22 22:39:34 +00:00
"flag"
2019-05-22 21:16:55 +00:00
"fmt"
2019-05-22 21:23:23 +00:00
"io"
2021-03-30 11:54:34 +00:00
"net"
2020-06-30 21:58:26 +00:00
"net/http"
2022-08-07 21:20:37 +00:00
"os"
2024-01-22 21:56:25 +00:00
"reflect"
2019-05-22 21:16:55 +00:00
"sort"
2020-06-30 21:58:26 +00:00
"strings"
2019-05-22 21:16:55 +00:00
"sync"
2020-09-27 20:17:14 +00:00
"sync/atomic"
2019-05-22 21:16:55 +00:00
"time"
2022-08-11 18:37:21 +00:00
"unsafe"
2019-05-22 21:16:55 +00:00
2023-12-12 22:06:30 +00:00
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/metricsql"
"github.com/cespare/xxhash/v2"
2020-09-11 10:18:57 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
2019-05-22 21:16:55 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
2020-12-08 18:49:32 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
2019-05-22 21:23:23 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
2020-06-24 16:36:55 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2023-12-12 22:06:30 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
2019-05-22 21:23:23 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
2020-06-30 21:58:26 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
2019-05-22 21:16:55 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2019-05-22 21:23:23 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
2022-05-31 23:31:40 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
2019-05-22 21:16:55 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
2021-07-15 13:03:26 +00:00
var (
2023-12-12 22:06:30 +00:00
replicationFactor = flagutil . NewDictInt ( "replicationFactor" , 1 , "How many copies of every time series is available on the provided -storageNode nodes. " +
2023-07-09 18:58:12 +00:00
"vmselect continues returning full responses when up to replicationFactor-1 vmstorage nodes are temporarily unavailable during querying. " +
"See also -search.skipSlowReplicas" )
skipSlowReplicas = flag . Bool ( "search.skipSlowReplicas" , false , "Whether to skip -replicationFactor - 1 slowest vmstorage nodes during querying. " +
"Enabling this setting may improve query speed, but it could also lead to incomplete results if some queried data has less than -replicationFactor " +
"copies at vmstorage nodes. Consider enabling this setting only if all the queried data contains -replicationFactor copies in the cluster" )
2022-06-20 12:14:47 +00:00
maxSamplesPerSeries = flag . Int ( "search.maxSamplesPerSeries" , 30e6 , "The maximum number of raw samples a single query can scan per each time series. See also -search.maxSamplesPerQuery" )
maxSamplesPerQuery = flag . Int ( "search.maxSamplesPerQuery" , 1e9 , "The maximum number of raw samples a single query can process across all time series. This protects from heavy queries, which select unexpectedly high number of raw samples. See also -search.maxSamplesPerSeries" )
2023-08-29 10:09:24 +00:00
vmstorageDialTimeout = flag . Duration ( "vmstorageDialTimeout" , 3 * time . Second , "Timeout for establishing RPC connections from vmselect to vmstorage. " +
"See also -vmstorageUserTimeout" )
vmstorageUserTimeout = flag . Duration ( "vmstorageUserTimeout" , 3 * time . Second , "Network timeout for RPC connections from vmselect to vmstorage (Linux only). " +
"Lower values reduce the maximum query durations when some vmstorage nodes become unavailable because of networking issues. " +
"Read more about TCP_USER_TIMEOUT at https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/ . " +
"See also -vmstorageDialTimeout" )
2023-10-18 17:51:37 +00:00
maxWorkersPerQuery = flag . Int ( "search.maxWorkersPerQuery" , defaultMaxWorkersPerQuery , "The maximum number of CPU cores a single query can use. " +
"The default value should work good for most cases. " +
"The flag can be set to lower values for improving performance of big number of concurrently executed queries. " +
"The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). " +
"There is no sense in setting this flag to values bigger than the number of CPU cores available on the system" )
2021-07-15 13:03:26 +00:00
)
2020-11-22 22:39:34 +00:00
2019-05-22 21:16:55 +00:00
// Result is a single timeseries result.
//
// ProcessSearchQuery returns Result slice.
type Result struct {
// The name of the metric.
MetricName storage . MetricName
// Values are sorted by Timestamps.
Values [ ] float64
Timestamps [ ] int64
}
func ( r * Result ) reset ( ) {
r . MetricName . Reset ( )
r . Values = r . Values [ : 0 ]
r . Timestamps = r . Timestamps [ : 0 ]
}
// Results holds results returned from ProcessSearchQuery.
type Results struct {
2022-06-28 09:55:20 +00:00
tr storage . TimeRange
deadline searchutils . Deadline
2019-05-22 21:16:55 +00:00
2022-08-11 20:22:53 +00:00
tbfs [ ] * tmpBlocksFile
2019-05-22 21:16:55 +00:00
packedTimeseries [ ] packedTimeseries
}
// Len returns the number of results in rss.
func ( rss * Results ) Len ( ) int {
return len ( rss . packedTimeseries )
}
// Cancel cancels rss work.
func ( rss * Results ) Cancel ( ) {
2022-08-11 20:22:53 +00:00
rss . closeTmpBlockFiles ( )
}
func ( rss * Results ) closeTmpBlockFiles ( ) {
closeTmpBlockFiles ( rss . tbfs )
rss . tbfs = nil
}
func closeTmpBlockFiles ( tbfs [ ] * tmpBlocksFile ) {
for _ , tbf := range tbfs {
putTmpBlocksFile ( tbf )
}
2019-05-22 21:16:55 +00:00
}
2020-06-23 17:29:19 +00:00
type timeseriesWork struct {
2021-03-30 10:22:21 +00:00
mustStop * uint32
2020-09-27 20:17:14 +00:00
rss * Results
pts * packedTimeseries
f func ( rs * Result , workerID uint ) error
2022-07-25 06:12:42 +00:00
err error
2020-06-23 17:29:19 +00:00
rowsProcessed int
}
2021-07-30 09:02:09 +00:00
func ( tsw * timeseriesWork ) do ( r * Result , workerID uint ) error {
if atomic . LoadUint32 ( tsw . mustStop ) != 0 {
return nil
}
rss := tsw . rss
if rss . deadline . Exceeded ( ) {
atomic . StoreUint32 ( tsw . mustStop , 1 )
return fmt . Errorf ( "timeout exceeded during query execution: %s" , rss . deadline . String ( ) )
}
2022-08-11 20:22:53 +00:00
if err := tsw . pts . Unpack ( r , rss . tbfs , rss . tr ) ; err != nil {
2021-07-30 09:02:09 +00:00
atomic . StoreUint32 ( tsw . mustStop , 1 )
return fmt . Errorf ( "error during time series unpacking: %w" , err )
}
2022-07-29 21:38:54 +00:00
tsw . rowsProcessed = len ( r . Timestamps )
2022-06-28 09:55:20 +00:00
if len ( r . Timestamps ) > 0 {
2021-07-30 09:02:09 +00:00
if err := tsw . f ( r , workerID ) ; err != nil {
atomic . StoreUint32 ( tsw . mustStop , 1 )
return err
}
}
return nil
}
2023-01-10 21:06:02 +00:00
func timeseriesWorker ( qt * querytracer . Tracer , workChs [ ] chan * timeseriesWork , workerID uint ) {
tmpResult := getTmpResult ( )
// Perform own work at first.
rowsProcessed := 0
seriesProcessed := 0
ch := workChs [ workerID ]
for tsw := range ch {
tsw . err = tsw . do ( & tmpResult . rs , workerID )
rowsProcessed += tsw . rowsProcessed
seriesProcessed ++
}
qt . Printf ( "own work processed: series=%d, samples=%d" , seriesProcessed , rowsProcessed )
// Then help others with the remaining work.
rowsProcessed = 0
seriesProcessed = 0
2023-03-21 03:23:30 +00:00
for i := uint ( 1 ) ; i < uint ( len ( workChs ) ) ; i ++ {
idx := ( i + workerID ) % uint ( len ( workChs ) )
ch := workChs [ idx ]
for len ( ch ) > 0 {
2023-03-25 23:36:45 +00:00
// Do not call runtime.Gosched() here in order to give a chance
// the real owner of the work to complete it, since it consumes additional CPU
// and slows down the code on systems with big number of CPU cores.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3966#issuecomment-1483208419
2023-03-21 03:23:30 +00:00
// It is expected that every channel in the workChs is already closed,
// so the next line should return immediately.
tsw , ok := <- ch
if ! ok {
break
}
tsw . err = tsw . do ( & tmpResult . rs , workerID )
rowsProcessed += tsw . rowsProcessed
seriesProcessed ++
2023-01-10 21:06:02 +00:00
}
}
qt . Printf ( "others work processed: series=%d, samples=%d" , seriesProcessed , rowsProcessed )
putTmpResult ( tmpResult )
}
func getTmpResult ( ) * result {
2021-07-26 12:38:51 +00:00
v := resultPool . Get ( )
if v == nil {
v = & result { }
}
2023-01-10 21:06:02 +00:00
return v . ( * result )
}
func putTmpResult ( r * result ) {
2021-07-30 09:02:09 +00:00
currentTime := fasttime . UnixTimestamp ( )
if cap ( r . rs . Values ) > 1024 * 1024 && 4 * len ( r . rs . Values ) < cap ( r . rs . Values ) && currentTime - r . lastResetTime > 10 {
2023-02-13 12:27:13 +00:00
// Reset r.rs in order to preserve memory usage after processing big time series with millions of rows.
2021-07-30 09:02:09 +00:00
r . rs = Result { }
r . lastResetTime = currentTime
2020-06-23 17:29:19 +00:00
}
2021-07-26 12:38:51 +00:00
resultPool . Put ( r )
}
type result struct {
rs Result
lastResetTime uint64
2020-06-23 17:29:19 +00:00
}
2021-07-26 12:38:51 +00:00
var resultPool sync . Pool
2023-10-18 17:51:37 +00:00
// MaxWorkers returns the maximum number of concurrent goroutines, which can be used by RunParallel()
2023-03-20 22:37:00 +00:00
func MaxWorkers ( ) int {
2023-10-18 17:51:37 +00:00
n := * maxWorkersPerQuery
if n <= 0 {
return defaultMaxWorkersPerQuery
}
if n > gomaxprocs {
// There is no sense in running more than gomaxprocs CPU-bound concurrent workers,
// since this may worsen the query performance.
n = gomaxprocs
}
return n
2023-03-20 22:37:00 +00:00
}
var gomaxprocs = cgroup . AvailableCPUs ( )
2023-10-18 17:51:37 +00:00
var defaultMaxWorkersPerQuery = func ( ) int {
// maxWorkersLimit is the maximum number of CPU cores, which can be used in parallel
// for processing an average query, without significant impact on inter-CPU communications.
const maxWorkersLimit = 32
n := gomaxprocs
if n > maxWorkersLimit {
n = maxWorkersLimit
}
return n
} ( )
2020-07-23 16:21:49 +00:00
// RunParallel runs f in parallel for all the results from rss.
2019-05-22 21:16:55 +00:00
//
// f shouldn't hold references to rs after returning.
2023-03-20 22:37:00 +00:00
// workerID is the id of the worker goroutine that calls f. The workerID is in the range [0..MaxWorkers()-1].
2020-09-27 20:17:14 +00:00
// Data processing is immediately stopped if f returns non-nil error.
2019-05-22 21:16:55 +00:00
//
// rss becomes unusable after the call to RunParallel.
2022-05-31 23:31:40 +00:00
func ( rss * Results ) RunParallel ( qt * querytracer . Tracer , f func ( rs * Result , workerID uint ) error ) error {
2022-06-08 18:05:17 +00:00
qt = qt . NewChild ( "parallel process of fetched data" )
2022-08-11 20:22:53 +00:00
defer rss . closeTmpBlockFiles ( )
2019-05-22 21:16:55 +00:00
2023-01-10 21:06:02 +00:00
rowsProcessedTotal , err := rss . runParallel ( qt , f )
seriesProcessedTotal := len ( rss . packedTimeseries )
rss . packedTimeseries = rss . packedTimeseries [ : 0 ]
rowsReadPerQuery . Update ( float64 ( rowsProcessedTotal ) )
seriesReadPerQuery . Update ( float64 ( seriesProcessedTotal ) )
qt . Donef ( "series=%d, samples=%d" , seriesProcessedTotal , rowsProcessedTotal )
return err
}
func ( rss * Results ) runParallel ( qt * querytracer . Tracer , f func ( rs * Result , workerID uint ) error ) ( int , error ) {
tswsLen := len ( rss . packedTimeseries )
if tswsLen == 0 {
// Nothing to process
return 0 , nil
}
2021-03-30 10:22:21 +00:00
var mustStop uint32
2023-01-10 21:06:02 +00:00
initTimeseriesWork := func ( tsw * timeseriesWork , pts * packedTimeseries ) {
2021-02-16 14:08:37 +00:00
tsw . rss = rss
2023-01-10 21:06:02 +00:00
tsw . pts = pts
2021-02-16 14:08:37 +00:00
tsw . f = f
2021-03-30 10:22:21 +00:00
tsw . mustStop = & mustStop
2023-01-10 21:06:02 +00:00
}
2023-03-20 22:37:00 +00:00
maxWorkers := MaxWorkers ( )
if maxWorkers == 1 || tswsLen == 1 {
2023-01-10 21:06:02 +00:00
// It is faster to process time series in the current goroutine.
2024-01-22 23:36:57 +00:00
var tsw timeseriesWork
2023-01-10 21:06:02 +00:00
tmpResult := getTmpResult ( )
rowsProcessedTotal := 0
var err error
for i := range rss . packedTimeseries {
2024-01-22 23:36:57 +00:00
initTimeseriesWork ( & tsw , & rss . packedTimeseries [ i ] )
2023-01-10 21:06:02 +00:00
err = tsw . do ( & tmpResult . rs , 0 )
rowsReadPerSeries . Update ( float64 ( tsw . rowsProcessed ) )
rowsProcessedTotal += tsw . rowsProcessed
if err != nil {
break
}
}
putTmpResult ( tmpResult )
return rowsProcessedTotal , err
}
// Slow path - spin up multiple local workers for parallel data processing.
// Do not use global workers pool, since it increases inter-CPU memory ping-poing,
// which reduces the scalability on systems with many CPU cores.
// Prepare the work for workers.
2024-01-22 23:36:57 +00:00
tsws := make ( [ ] timeseriesWork , len ( rss . packedTimeseries ) )
2023-01-10 21:06:02 +00:00
for i := range rss . packedTimeseries {
2024-01-22 23:36:57 +00:00
initTimeseriesWork ( & tsws [ i ] , & rss . packedTimeseries [ i ] )
2019-05-22 21:16:55 +00:00
}
2023-01-10 21:06:02 +00:00
// Prepare worker channels.
workers := len ( tsws )
2023-03-20 22:37:00 +00:00
if workers > maxWorkers {
workers = maxWorkers
2023-01-10 21:06:02 +00:00
}
itemsPerWorker := ( len ( tsws ) + workers - 1 ) / workers
workChs := make ( [ ] chan * timeseriesWork , workers )
for i := range workChs {
workChs [ i ] = make ( chan * timeseriesWork , itemsPerWorker )
}
// Spread work among workers.
2024-01-22 23:36:57 +00:00
for i := range tsws {
2023-01-10 21:06:02 +00:00
idx := i % len ( workChs )
2024-01-22 23:36:57 +00:00
workChs [ idx ] <- & tsws [ i ]
2023-01-10 21:06:02 +00:00
}
// Mark worker channels as closed.
for _ , workCh := range workChs {
close ( workCh )
}
// Start workers and wait until they finish the work.
2022-07-25 06:12:42 +00:00
var wg sync . WaitGroup
2023-01-10 21:06:02 +00:00
for i := range workChs {
2022-07-25 06:12:42 +00:00
wg . Add ( 1 )
2023-01-10 21:06:02 +00:00
qtChild := qt . NewChild ( "worker #%d" , i )
go func ( workerID uint ) {
timeseriesWorker ( qtChild , workChs , workerID )
qtChild . Done ( )
wg . Done ( )
} ( uint ( i ) )
2022-07-25 06:12:42 +00:00
}
wg . Wait ( )
// Collect results.
2020-06-23 17:29:19 +00:00
var firstErr error
rowsProcessedTotal := 0
2024-01-22 23:36:57 +00:00
for i := range tsws {
tsw := & tsws [ i ]
2023-01-10 21:06:02 +00:00
if tsw . err != nil && firstErr == nil {
2021-03-30 10:22:21 +00:00
// Return just the first error, since other errors are likely duplicate the first error.
2023-01-10 21:06:02 +00:00
firstErr = tsw . err
2019-05-22 21:16:55 +00:00
}
2022-06-28 17:18:08 +00:00
rowsReadPerSeries . Update ( float64 ( tsw . rowsProcessed ) )
2020-06-23 17:29:19 +00:00
rowsProcessedTotal += tsw . rowsProcessed
2019-05-22 21:16:55 +00:00
}
2023-01-10 21:06:02 +00:00
return rowsProcessedTotal , firstErr
2022-07-29 21:29:46 +00:00
}
2022-06-28 17:18:08 +00:00
var (
rowsReadPerSeries = metrics . NewHistogram ( ` vm_rows_read_per_series ` )
rowsReadPerQuery = metrics . NewHistogram ( ` vm_rows_read_per_query ` )
seriesReadPerQuery = metrics . NewHistogram ( ` vm_series_read_per_query ` )
)
2019-11-23 11:22:55 +00:00
2019-05-22 21:16:55 +00:00
type packedTimeseries struct {
metricName string
addrs [ ] tmpBlockAddr
}
2023-01-10 21:06:02 +00:00
type unpackWork struct {
tbfs [ ] * tmpBlocksFile
2020-08-06 14:42:15 +00:00
addr tmpBlockAddr
tr storage . TimeRange
2023-01-10 21:06:02 +00:00
sb * sortBlock
err error
2020-06-23 17:29:19 +00:00
}
2019-05-22 21:16:55 +00:00
2020-07-22 11:53:54 +00:00
func ( upw * unpackWork ) reset ( ) {
2022-08-11 20:22:53 +00:00
upw . tbfs = nil
2023-01-10 21:06:02 +00:00
upw . addr = tmpBlockAddr { }
upw . tr = storage . TimeRange { }
upw . sb = nil
upw . err = nil
2020-07-22 11:53:54 +00:00
}
2020-09-15 18:06:04 +00:00
func ( upw * unpackWork ) unpack ( tmpBlock * storage . Block ) {
2023-01-10 21:06:02 +00:00
sb := getSortBlock ( )
if err := sb . unpackFrom ( tmpBlock , upw . tbfs , upw . addr , upw . tr ) ; err != nil {
putSortBlock ( sb )
upw . err = fmt . Errorf ( "cannot unpack block: %w" , err )
return
2020-08-06 14:42:15 +00:00
}
2023-01-10 21:06:02 +00:00
upw . sb = sb
2020-08-06 14:42:15 +00:00
}
2020-07-22 11:53:54 +00:00
func getUnpackWork ( ) * unpackWork {
v := unpackWorkPool . Get ( )
if v != nil {
return v . ( * unpackWork )
}
2023-01-10 21:06:02 +00:00
return & unpackWork { }
2020-07-22 11:53:54 +00:00
}
func putUnpackWork ( upw * unpackWork ) {
upw . reset ( )
unpackWorkPool . Put ( upw )
}
var unpackWorkPool sync . Pool
2023-01-10 21:06:02 +00:00
func unpackWorker ( workChs [ ] chan * unpackWork , workerID uint ) {
tmpBlock := getTmpStorageBlock ( )
// Deal with own work at first.
ch := workChs [ workerID ]
for upw := range ch {
upw . unpack ( tmpBlock )
2021-07-15 21:34:33 +00:00
}
2023-01-10 21:06:02 +00:00
// Then help others with their work.
2023-03-21 03:23:30 +00:00
for i := uint ( 1 ) ; i < uint ( len ( workChs ) ) ; i ++ {
idx := ( i + workerID ) % uint ( len ( workChs ) )
ch := workChs [ idx ]
for len ( ch ) > 0 {
2023-07-06 17:02:47 +00:00
// Do not call runtime.Gosched() here in order to give a chance
// the real owner of the work to complete it, since it consumes additional CPU
// and slows down the code on systems with big number of CPU cores.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3966#issuecomment-1483208419
2023-03-21 03:23:30 +00:00
// It is expected that every channel in the workChs is already closed,
// so the next line should return immediately.
upw , ok := <- ch
if ! ok {
break
}
upw . unpack ( tmpBlock )
2021-07-15 12:40:41 +00:00
}
2019-05-22 21:16:55 +00:00
}
2023-01-10 21:06:02 +00:00
putTmpStorageBlock ( tmpBlock )
2020-06-23 17:29:19 +00:00
}
2023-01-10 07:10:41 +00:00
func getTmpStorageBlock ( ) * storage . Block {
v := tmpStorageBlockPool . Get ( )
if v == nil {
v = & storage . Block { }
}
return v . ( * storage . Block )
}
func putTmpStorageBlock ( sb * storage . Block ) {
tmpStorageBlockPool . Put ( sb )
2020-06-23 17:29:19 +00:00
}
2019-05-22 21:16:55 +00:00
2023-01-10 07:10:41 +00:00
var tmpStorageBlockPool sync . Pool
2021-07-30 09:02:09 +00:00
2020-06-23 17:29:19 +00:00
// Unpack unpacks pts to dst.
2022-08-11 20:22:53 +00:00
func ( pts * packedTimeseries ) Unpack ( dst * Result , tbfs [ ] * tmpBlocksFile , tr storage . TimeRange ) error {
2020-06-23 17:29:19 +00:00
dst . reset ( )
if err := dst . MetricName . Unmarshal ( bytesutil . ToUnsafeBytes ( pts . metricName ) ) ; err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "cannot unmarshal metricName %q: %w" , pts . metricName , err )
2019-05-22 21:16:55 +00:00
}
2023-01-10 07:10:41 +00:00
sbh := getSortBlocksHeap ( )
var err error
sbh . sbs , err = pts . unpackTo ( sbh . sbs [ : 0 ] , tbfs , tr )
2023-01-10 21:06:02 +00:00
pts . addrs = pts . addrs [ : 0 ]
2023-01-10 07:10:41 +00:00
if err != nil {
putSortBlocksHeap ( sbh )
return err
}
dedupInterval := storage . GetDedupInterval ( )
mergeSortBlocks ( dst , sbh , dedupInterval )
putSortBlocksHeap ( sbh )
return nil
}
func ( pts * packedTimeseries ) unpackTo ( dst [ ] * sortBlock , tbfs [ ] * tmpBlocksFile , tr storage . TimeRange ) ( [ ] * sortBlock , error ) {
2023-01-10 21:06:02 +00:00
upwsLen := len ( pts . addrs )
if upwsLen == 0 {
// Nothing to do
return nil , nil
}
initUnpackWork := func ( upw * unpackWork , addr tmpBlockAddr ) {
2023-01-10 07:10:41 +00:00
upw . tbfs = tbfs
2023-01-10 21:06:02 +00:00
upw . addr = addr
upw . tr = tr
}
2023-01-12 17:31:41 +00:00
if gomaxprocs == 1 || upwsLen <= 1000 {
2023-01-10 21:06:02 +00:00
// It is faster to unpack all the data in the current goroutine.
upw := getUnpackWork ( )
2023-01-10 07:10:41 +00:00
samples := 0
2023-01-10 21:06:02 +00:00
tmpBlock := getTmpStorageBlock ( )
var err error
for _ , addr := range pts . addrs {
initUnpackWork ( upw , addr )
upw . unpack ( tmpBlock )
if upw . err != nil {
return dst , upw . err
}
samples += len ( upw . sb . Timestamps )
2023-01-10 07:10:41 +00:00
if * maxSamplesPerSeries > 0 && samples > * maxSamplesPerSeries {
2023-01-10 21:06:02 +00:00
putSortBlock ( upw . sb )
2023-07-06 04:13:32 +00:00
err = & limitExceededErr {
err : fmt . Errorf ( "cannot process more than %d samples per series; either increase -search.maxSamplesPerSeries " +
"or reduce time range for the query" , * maxSamplesPerSeries ) ,
}
2023-01-10 21:06:02 +00:00
break
2023-01-10 07:10:41 +00:00
}
2023-01-10 21:06:02 +00:00
dst = append ( dst , upw . sb )
upw . reset ( )
2023-01-10 07:10:41 +00:00
}
2023-01-10 21:06:02 +00:00
putTmpStorageBlock ( tmpBlock )
2023-01-10 07:10:41 +00:00
putUnpackWork ( upw )
2023-01-10 21:06:02 +00:00
return dst , err
2023-01-10 07:10:41 +00:00
}
2019-05-22 21:16:55 +00:00
2023-01-10 07:10:41 +00:00
// Slow path - spin up multiple local workers for parallel data unpacking.
2021-07-30 09:02:09 +00:00
// Do not use global workers pool, since it increases inter-CPU memory ping-poing,
// which reduces the scalability on systems with many CPU cores.
2023-01-10 21:06:02 +00:00
// Prepare the work for workers.
upws := make ( [ ] * unpackWork , upwsLen )
for i , addr := range pts . addrs {
upw := getUnpackWork ( )
initUnpackWork ( upw , addr )
upws [ i ] = upw
}
// Prepare worker channels.
workers := len ( upws )
2021-07-30 09:02:09 +00:00
if workers > gomaxprocs {
workers = gomaxprocs
}
if workers < 1 {
workers = 1
}
2023-01-10 21:06:02 +00:00
itemsPerWorker := ( len ( upws ) + workers - 1 ) / workers
2021-07-30 09:02:09 +00:00
workChs := make ( [ ] chan * unpackWork , workers )
2023-01-10 21:06:02 +00:00
for i := range workChs {
workChs [ i ] = make ( chan * unpackWork , itemsPerWorker )
}
// Spread work among worker channels.
for i , upw := range upws {
idx := i % len ( workChs )
workChs [ idx ] <- upw
}
// Mark worker channels as closed.
for _ , workCh := range workChs {
close ( workCh )
}
// Start workers and wait until they finish the work.
var wg sync . WaitGroup
2021-07-30 09:02:09 +00:00
for i := 0 ; i < workers ; i ++ {
2023-01-10 21:06:02 +00:00
wg . Add ( 1 )
go func ( workerID uint ) {
unpackWorker ( workChs , workerID )
wg . Done ( )
} ( uint ( i ) )
2019-05-22 21:16:55 +00:00
}
2023-01-10 21:06:02 +00:00
wg . Wait ( )
2019-05-22 21:16:55 +00:00
2023-01-10 21:06:02 +00:00
// Collect results.
2021-07-15 13:03:26 +00:00
samples := 0
2020-06-23 17:29:19 +00:00
var firstErr error
for _ , upw := range upws {
2023-01-10 21:06:02 +00:00
if upw . err != nil && firstErr == nil {
2020-06-23 17:29:19 +00:00
// Return the first error only, since other errors are likely the same.
2023-01-10 21:06:02 +00:00
firstErr = upw . err
2020-06-23 17:29:19 +00:00
}
if firstErr == nil {
2023-01-10 21:06:02 +00:00
sb := upw . sb
samples += len ( sb . Timestamps )
if * maxSamplesPerSeries > 0 && samples > * maxSamplesPerSeries {
putSortBlock ( sb )
firstErr = fmt . Errorf ( "cannot process more than %d samples per series; either increase -search.maxSamplesPerSeries " +
"or reduce time range for the query" , * maxSamplesPerSeries )
} else {
2023-01-10 07:10:41 +00:00
dst = append ( dst , sb )
2021-07-15 13:03:26 +00:00
}
2023-01-10 07:10:41 +00:00
} else {
2023-01-10 21:06:02 +00:00
putSortBlock ( upw . sb )
2019-05-22 21:16:55 +00:00
}
2020-07-22 11:53:54 +00:00
putUnpackWork ( upw )
2019-05-22 21:16:55 +00:00
}
2021-07-30 09:02:09 +00:00
2023-01-10 07:10:41 +00:00
return dst , firstErr
2019-05-22 21:16:55 +00:00
}
func getSortBlock ( ) * sortBlock {
v := sbPool . Get ( )
if v == nil {
return & sortBlock { }
}
return v . ( * sortBlock )
}
func putSortBlock ( sb * sortBlock ) {
sb . reset ( )
sbPool . Put ( sb )
}
var sbPool sync . Pool
var metricRowsSkipped = metrics . NewCounter ( ` vm_metric_rows_skipped_total { name="vmselect"} ` )
2023-01-09 23:19:15 +00:00
func mergeSortBlocks ( dst * Result , sbh * sortBlocksHeap , dedupInterval int64 ) {
2019-05-22 21:16:55 +00:00
// Skip empty sort blocks, since they cannot be passed to heap.Init.
2023-01-09 23:19:15 +00:00
sbs := sbh . sbs [ : 0 ]
for _ , sb := range sbh . sbs {
2019-05-22 21:16:55 +00:00
if len ( sb . Timestamps ) == 0 {
putSortBlock ( sb )
continue
}
2023-01-09 23:19:15 +00:00
sbs = append ( sbs , sb )
2019-05-22 21:16:55 +00:00
}
2023-01-09 23:19:15 +00:00
sbh . sbs = sbs
if sbh . Len ( ) == 0 {
2019-05-22 21:16:55 +00:00
return
}
2023-01-09 23:19:15 +00:00
heap . Init ( sbh )
2019-05-22 21:16:55 +00:00
for {
2023-01-09 23:19:15 +00:00
sbs := sbh . sbs
top := sbs [ 0 ]
if len ( sbs ) == 1 {
2019-05-22 21:16:55 +00:00
dst . Timestamps = append ( dst . Timestamps , top . Timestamps [ top . NextIdx : ] ... )
dst . Values = append ( dst . Values , top . Values [ top . NextIdx : ] ... )
putSortBlock ( top )
2020-01-30 23:09:44 +00:00
break
2019-05-22 21:16:55 +00:00
}
2022-07-12 09:30:24 +00:00
sbNext := sbh . getNextBlock ( )
2019-05-22 21:16:55 +00:00
tsNext := sbNext . Timestamps [ sbNext . NextIdx ]
2022-07-08 21:14:48 +00:00
topNextIdx := top . NextIdx
2023-01-09 20:57:43 +00:00
if n := equalSamplesPrefix ( top , sbNext ) ; n > 0 && dedupInterval > 0 {
2022-07-08 21:14:48 +00:00
// Skip n replicated samples at top if deduplication is enabled.
top . NextIdx = topNextIdx + n
} else {
// Copy samples from top to dst with timestamps not exceeding tsNext.
2023-01-09 20:57:43 +00:00
top . NextIdx = topNextIdx + binarySearchTimestamps ( top . Timestamps [ topNextIdx : ] , tsNext )
dst . Timestamps = append ( dst . Timestamps , top . Timestamps [ topNextIdx : top . NextIdx ] ... )
2022-07-08 21:14:48 +00:00
dst . Values = append ( dst . Values , top . Values [ topNextIdx : top . NextIdx ] ... )
2019-05-22 21:16:55 +00:00
}
2023-01-09 20:57:43 +00:00
if top . NextIdx < len ( top . Timestamps ) {
2023-01-09 23:19:15 +00:00
heap . Fix ( sbh , 0 )
2019-05-22 21:16:55 +00:00
} else {
2023-01-09 23:19:15 +00:00
heap . Pop ( sbh )
2019-05-22 21:16:55 +00:00
putSortBlock ( top )
}
}
2021-12-14 18:49:08 +00:00
timestamps , values := storage . DeduplicateSamples ( dst . Timestamps , dst . Values , dedupInterval )
2020-02-27 21:47:05 +00:00
dedups := len ( dst . Timestamps ) - len ( timestamps )
dedupsDuringSelect . Add ( dedups )
dst . Timestamps = timestamps
dst . Values = values
2019-05-22 21:16:55 +00:00
}
2020-02-27 21:47:05 +00:00
var dedupsDuringSelect = metrics . NewCounter ( ` vm_deduplicated_samples_total { type="select"} ` )
2023-01-09 20:57:43 +00:00
func equalSamplesPrefix ( a , b * sortBlock ) int {
n := equalTimestampsPrefix ( a . Timestamps [ a . NextIdx : ] , b . Timestamps [ b . NextIdx : ] )
if n == 0 {
return 0
}
return equalValuesPrefix ( a . Values [ a . NextIdx : a . NextIdx + n ] , b . Values [ b . NextIdx : b . NextIdx + n ] )
}
2022-07-08 21:14:48 +00:00
func equalTimestampsPrefix ( a , b [ ] int64 ) int {
for i , v := range a {
if i >= len ( b ) || v != b [ i ] {
return i
}
}
return len ( a )
}
2023-01-09 20:57:43 +00:00
func equalValuesPrefix ( a , b [ ] float64 ) int {
for i , v := range a {
if i >= len ( b ) || v != b [ i ] {
return i
}
}
return len ( a )
}
2022-07-08 21:14:48 +00:00
func binarySearchTimestamps ( timestamps [ ] int64 , ts int64 ) int {
// The code has been adapted from sort.Search.
n := len ( timestamps )
2022-07-11 08:57:31 +00:00
if n > 0 && timestamps [ n - 1 ] <= ts {
// Fast path for timestamps scanned in ascending order.
2022-07-08 21:14:48 +00:00
return n
}
i , j := 0 , n
for i < j {
h := int ( uint ( i + j ) >> 1 )
if h >= 0 && h < len ( timestamps ) && timestamps [ h ] <= ts {
i = h + 1
} else {
j = h
}
}
return i
}
2019-05-22 21:16:55 +00:00
type sortBlock struct {
Timestamps [ ] int64
Values [ ] float64
NextIdx int
}
func ( sb * sortBlock ) reset ( ) {
sb . Timestamps = sb . Timestamps [ : 0 ]
sb . Values = sb . Values [ : 0 ]
sb . NextIdx = 0
}
2022-08-11 20:22:53 +00:00
func ( sb * sortBlock ) unpackFrom ( tmpBlock * storage . Block , tbfs [ ] * tmpBlocksFile , addr tmpBlockAddr , tr storage . TimeRange ) error {
2020-09-15 18:06:04 +00:00
tmpBlock . Reset ( )
2022-08-11 20:22:53 +00:00
tbfs [ addr . tbfIdx ] . MustReadBlockAt ( tmpBlock , addr )
2020-09-24 17:16:19 +00:00
if err := tmpBlock . UnmarshalData ( ) ; err != nil {
return fmt . Errorf ( "cannot unmarshal block: %w" , err )
2019-05-22 21:16:55 +00:00
}
2020-09-26 01:29:45 +00:00
sb . Timestamps , sb . Values = tmpBlock . AppendRowsWithTimeRangeFilter ( sb . Timestamps [ : 0 ] , sb . Values [ : 0 ] , tr )
skippedRows := tmpBlock . RowsCount ( ) - len ( sb . Timestamps )
2019-05-22 21:16:55 +00:00
metricRowsSkipped . Add ( skippedRows )
return nil
}
2023-01-09 23:19:15 +00:00
type sortBlocksHeap struct {
sbs [ ] * sortBlock
}
2019-05-22 21:16:55 +00:00
2023-01-09 23:19:15 +00:00
func ( sbh * sortBlocksHeap ) getNextBlock ( ) * sortBlock {
sbs := sbh . sbs
if len ( sbs ) < 2 {
2022-07-12 09:30:24 +00:00
return nil
}
2023-01-09 23:19:15 +00:00
if len ( sbs ) < 3 {
return sbs [ 1 ]
2022-07-12 09:30:24 +00:00
}
2023-01-09 23:19:15 +00:00
a := sbs [ 1 ]
b := sbs [ 2 ]
2022-07-12 09:30:24 +00:00
if a . Timestamps [ a . NextIdx ] <= b . Timestamps [ b . NextIdx ] {
return a
}
return b
}
2023-01-09 23:19:15 +00:00
func ( sbh * sortBlocksHeap ) Len ( ) int {
return len ( sbh . sbs )
2019-05-22 21:16:55 +00:00
}
2023-01-09 23:19:15 +00:00
func ( sbh * sortBlocksHeap ) Less ( i , j int ) bool {
sbs := sbh . sbs
a := sbs [ i ]
b := sbs [ j ]
2019-05-22 21:16:55 +00:00
return a . Timestamps [ a . NextIdx ] < b . Timestamps [ b . NextIdx ]
}
2023-01-09 23:19:15 +00:00
func ( sbh * sortBlocksHeap ) Swap ( i , j int ) {
sbs := sbh . sbs
sbs [ i ] , sbs [ j ] = sbs [ j ] , sbs [ i ]
2019-05-22 21:16:55 +00:00
}
func ( sbh * sortBlocksHeap ) Push ( x interface { } ) {
2023-01-09 23:19:15 +00:00
sbh . sbs = append ( sbh . sbs , x . ( * sortBlock ) )
2019-05-22 21:16:55 +00:00
}
func ( sbh * sortBlocksHeap ) Pop ( ) interface { } {
2023-01-09 23:19:15 +00:00
sbs := sbh . sbs
v := sbs [ len ( sbs ) - 1 ]
sbs [ len ( sbs ) - 1 ] = nil
sbh . sbs = sbs [ : len ( sbs ) - 1 ]
2019-05-22 21:16:55 +00:00
return v
}
2023-01-09 23:19:15 +00:00
func getSortBlocksHeap ( ) * sortBlocksHeap {
v := sbhPool . Get ( )
if v == nil {
return & sortBlocksHeap { }
}
return v . ( * sortBlocksHeap )
}
func putSortBlocksHeap ( sbh * sortBlocksHeap ) {
sbs := sbh . sbs
for i := range sbs {
sbs [ i ] = nil
}
sbh . sbs = sbs [ : 0 ]
sbhPool . Put ( sbh )
}
var sbhPool sync . Pool
2020-11-23 10:33:17 +00:00
// RegisterMetricNames registers metric names from mrs in the storage.
2022-07-05 21:11:59 +00:00
func RegisterMetricNames ( qt * querytracer . Tracer , mrs [ ] storage . MetricRow , deadline searchutils . Deadline ) error {
2022-06-08 18:05:17 +00:00
qt = qt . NewChild ( "register metric names" )
defer qt . Done ( )
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
2020-11-23 10:33:17 +00:00
// Split mrs among available vmstorage nodes.
2022-10-25 11:41:56 +00:00
mrsPerNode := make ( [ ] [ ] storage . MetricRow , len ( sns ) )
2020-11-23 10:33:17 +00:00
for _ , mr := range mrs {
idx := 0
2022-10-25 11:41:56 +00:00
if len ( sns ) > 1 {
2020-11-23 10:33:17 +00:00
// There is no need in using the same hash as for time series distribution in vminsert,
// since RegisterMetricNames is used only in Graphite Tags API.
h := xxhash . Sum64 ( mr . MetricNameRaw )
2022-10-25 11:41:56 +00:00
idx = int ( h % uint64 ( len ( sns ) ) )
2020-11-23 10:33:17 +00:00
}
mrsPerNode [ idx ] = append ( mrsPerNode [ idx ] , mr )
}
// Push mrs to storage nodes in parallel.
2022-10-25 11:41:56 +00:00
snr := startStorageNodesRequest ( qt , sns , true , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2020-11-23 10:33:17 +00:00
sn . registerMetricNamesRequests . Inc ( )
2022-10-01 19:05:43 +00:00
err := sn . registerMetricNames ( qt , mrsPerNode [ workerID ] , deadline )
2020-11-23 10:33:17 +00:00
if err != nil {
2020-11-23 13:00:04 +00:00
sn . registerMetricNamesErrors . Inc ( )
2020-11-23 10:33:17 +00:00
}
return & err
} )
// Collect results
err := snr . collectAllResults ( func ( result interface { } ) error {
errP := result . ( * error )
return * errP
} )
if err != nil {
return fmt . Errorf ( "cannot register series on all the vmstorage nodes: %w" , err )
}
return nil
}
2019-05-22 21:23:23 +00:00
// DeleteSeries deletes time series matching the given sq.
2022-07-05 21:11:59 +00:00
func DeleteSeries ( qt * querytracer . Tracer , sq * storage . SearchQuery , deadline searchutils . Deadline ) ( int , error ) {
2022-06-08 18:05:17 +00:00
qt = qt . NewChild ( "delete series: %s" , sq )
defer qt . Done ( )
2019-05-22 21:23:23 +00:00
requestData := sq . Marshal ( nil )
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
deletedCount int
err error
2019-05-22 21:16:55 +00:00
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
snr := startStorageNodesRequest ( qt , sns , true , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2020-11-23 08:51:40 +00:00
sn . deleteSeriesRequests . Inc ( )
2022-07-05 20:56:31 +00:00
deletedCount , err := sn . deleteSeries ( qt , requestData , deadline )
2020-11-23 08:51:40 +00:00
if err != nil {
2020-11-23 13:00:04 +00:00
sn . deleteSeriesErrors . Inc ( )
2020-11-23 08:51:40 +00:00
}
return & nodeResult {
deletedCount : deletedCount ,
err : err ,
}
} )
2019-05-22 21:23:23 +00:00
// Collect results
deletedTotal := 0
2020-11-23 10:33:17 +00:00
err := snr . collectAllResults ( func ( result interface { } ) error {
2020-11-22 22:15:51 +00:00
nr := result . ( * nodeResult )
2019-05-22 21:23:23 +00:00
if nr . err != nil {
2020-11-22 22:15:51 +00:00
return nr . err
2019-05-22 21:23:23 +00:00
}
deletedTotal += nr . deletedCount
2020-11-22 22:15:51 +00:00
return nil
} )
if err != nil {
return deletedTotal , fmt . Errorf ( "cannot delete time series on all the vmstorage nodes: %w" , err )
2019-05-22 21:23:23 +00:00
}
return deletedTotal , nil
2019-05-22 21:16:55 +00:00
}
2022-06-26 21:37:19 +00:00
// LabelNames returns label names matching the given sq until the given deadline.
2022-07-05 21:11:59 +00:00
func LabelNames ( qt * querytracer . Tracer , denyPartialResponse bool , sq * storage . SearchQuery , maxLabelNames int , deadline searchutils . Deadline ) ( [ ] string , bool , error ) {
2022-06-12 01:32:13 +00:00
qt = qt . NewChild ( "get labels: %s" , sq )
2022-06-08 18:05:17 +00:00
defer qt . Done ( )
2020-11-04 22:15:43 +00:00
if deadline . Exceeded ( ) {
return nil , false , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
2022-06-12 01:32:13 +00:00
requestData := sq . Marshal ( nil )
2020-11-04 22:15:43 +00:00
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
2022-06-12 01:32:13 +00:00
labelNames [ ] string
err error
2020-11-04 22:15:43 +00:00
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
snr := startStorageNodesRequest ( qt , sns , denyPartialResponse , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2022-06-12 01:32:13 +00:00
sn . labelNamesRequests . Inc ( )
labelNames , err := sn . getLabelNames ( qt , requestData , maxLabelNames , deadline )
2020-11-23 08:51:40 +00:00
if err != nil {
2022-06-12 01:32:13 +00:00
sn . labelNamesErrors . Inc ( )
err = fmt . Errorf ( "cannot get labels from vmstorage %s: %w" , sn . connPool . Addr ( ) , err )
2020-11-23 08:51:40 +00:00
}
return & nodeResult {
2022-06-12 01:32:13 +00:00
labelNames : labelNames ,
err : err ,
2020-11-23 08:51:40 +00:00
}
} )
2020-11-04 22:15:43 +00:00
// Collect results
2022-06-12 01:32:13 +00:00
var labelNames [ ] string
isPartial , err := snr . collectResults ( partialLabelNamesResults , func ( result interface { } ) error {
2020-11-22 22:15:51 +00:00
nr := result . ( * nodeResult )
2020-11-04 22:15:43 +00:00
if nr . err != nil {
2020-11-22 22:15:51 +00:00
return nr . err
2020-11-04 22:15:43 +00:00
}
2022-06-12 01:32:13 +00:00
labelNames = append ( labelNames , nr . labelNames ... )
2020-11-22 22:15:51 +00:00
return nil
} )
2022-06-12 01:32:13 +00:00
qt . Printf ( "get %d non-duplicated labels" , len ( labelNames ) )
2020-11-22 22:15:51 +00:00
if err != nil {
2022-06-12 01:32:13 +00:00
return nil , isPartial , fmt . Errorf ( "cannot fetch labels from vmstorage nodes: %w" , err )
2020-11-04 22:15:43 +00:00
}
// Deduplicate labels
2022-06-12 01:32:13 +00:00
labelNames = deduplicateStrings ( labelNames )
qt . Printf ( "get %d unique labels after de-duplication" , len ( labelNames ) )
if maxLabelNames > 0 && maxLabelNames < len ( labelNames ) {
labelNames = labelNames [ : maxLabelNames ]
2020-11-04 22:15:43 +00:00
}
2022-06-12 01:32:13 +00:00
// Sort labelNames like Prometheus does
sort . Strings ( labelNames )
qt . Printf ( "sort %d labels" , len ( labelNames ) )
return labelNames , isPartial , nil
2020-11-04 22:15:43 +00:00
}
2022-06-26 21:37:19 +00:00
// GraphiteTags returns Graphite tags until the given deadline.
2022-07-05 21:11:59 +00:00
func GraphiteTags ( qt * querytracer . Tracer , accountID , projectID uint32 , denyPartialResponse bool , filter string , limit int , deadline searchutils . Deadline ) ( [ ] string , bool , error ) {
2022-06-08 18:05:17 +00:00
qt = qt . NewChild ( "get graphite tags: filter=%s, limit=%d" , filter , limit )
defer qt . Done ( )
2020-11-16 01:58:12 +00:00
if deadline . Exceeded ( ) {
return nil , false , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
2022-07-05 21:11:59 +00:00
sq := storage . NewSearchQuery ( accountID , projectID , 0 , 0 , nil , 0 )
labels , isPartial , err := LabelNames ( qt , denyPartialResponse , sq , 0 , deadline )
2020-11-15 23:25:38 +00:00
if err != nil {
return nil , false , err
}
2020-11-16 01:58:12 +00:00
// Substitute "__name__" with "name" for Graphite compatibility
for i := range labels {
2020-12-06 23:07:03 +00:00
if labels [ i ] != "__name__" {
continue
}
// Prevent from duplicate `name` tag.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/942
if hasString ( labels , "name" ) {
labels = append ( labels [ : i ] , labels [ i + 1 : ] ... )
} else {
2020-11-15 23:25:38 +00:00
labels [ i ] = "name"
2020-11-16 12:49:46 +00:00
sort . Strings ( labels )
2020-11-15 23:25:38 +00:00
}
2020-12-06 23:07:03 +00:00
break
2020-11-15 23:25:38 +00:00
}
2020-11-16 13:50:48 +00:00
if len ( filter ) > 0 {
labels , err = applyGraphiteRegexpFilter ( filter , labels )
if err != nil {
return nil , false , err
}
}
2020-11-16 01:58:12 +00:00
if limit > 0 && limit < len ( labels ) {
labels = labels [ : limit ]
}
2020-11-15 23:25:38 +00:00
return labels , isPartial , nil
}
2020-12-06 23:07:03 +00:00
func hasString ( a [ ] string , s string ) bool {
for _ , x := range a {
if x == s {
return true
}
}
return false
}
2022-06-26 21:37:19 +00:00
// LabelValues returns label values matching the given labelName and sq until the given deadline.
2022-07-05 21:11:59 +00:00
func LabelValues ( qt * querytracer . Tracer , denyPartialResponse bool , labelName string , sq * storage . SearchQuery , maxLabelValues int , deadline searchutils . Deadline ) ( [ ] string , bool , error ) {
2022-06-12 01:32:13 +00:00
qt = qt . NewChild ( "get values for label %s: %s" , labelName , sq )
2022-06-08 18:05:17 +00:00
defer qt . Done ( )
2020-07-21 15:34:59 +00:00
if deadline . Exceeded ( ) {
return nil , false , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
2022-06-12 01:32:13 +00:00
requestData := sq . Marshal ( nil )
2020-11-04 22:15:43 +00:00
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
labelValues [ ] string
err error
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
snr := startStorageNodesRequest ( qt , sns , denyPartialResponse , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2022-06-12 01:32:13 +00:00
sn . labelValuesRequests . Inc ( )
labelValues , err := sn . getLabelValues ( qt , labelName , requestData , maxLabelValues , deadline )
2020-11-23 08:51:40 +00:00
if err != nil {
2022-06-12 01:32:13 +00:00
sn . labelValuesErrors . Inc ( )
err = fmt . Errorf ( "cannot get label values from vmstorage %s: %w" , sn . connPool . Addr ( ) , err )
2020-11-23 08:51:40 +00:00
}
return & nodeResult {
labelValues : labelValues ,
err : err ,
}
} )
2020-11-04 22:15:43 +00:00
// Collect results
var labelValues [ ] string
2022-06-12 01:32:13 +00:00
isPartial , err := snr . collectResults ( partialLabelValuesResults , func ( result interface { } ) error {
2020-11-22 22:15:51 +00:00
nr := result . ( * nodeResult )
2020-11-04 22:15:43 +00:00
if nr . err != nil {
2020-11-22 22:15:51 +00:00
return nr . err
2020-11-04 22:15:43 +00:00
}
labelValues = append ( labelValues , nr . labelValues ... )
2020-11-22 22:15:51 +00:00
return nil
} )
2022-05-31 23:31:40 +00:00
qt . Printf ( "get %d non-duplicated label values" , len ( labelValues ) )
2020-11-22 22:15:51 +00:00
if err != nil {
2022-06-12 01:32:13 +00:00
return nil , isPartial , fmt . Errorf ( "cannot fetch label values from vmstorage nodes: %w" , err )
2020-11-04 22:15:43 +00:00
}
// Deduplicate label values
labelValues = deduplicateStrings ( labelValues )
2022-05-31 23:31:40 +00:00
qt . Printf ( "get %d unique label values after de-duplication" , len ( labelValues ) )
2020-11-04 22:15:43 +00:00
// Sort labelValues like Prometheus does
2022-06-12 01:32:13 +00:00
if maxLabelValues > 0 && maxLabelValues < len ( labelValues ) {
labelValues = labelValues [ : maxLabelValues ]
2022-06-10 06:50:30 +00:00
}
2020-11-04 22:15:43 +00:00
sort . Strings ( labelValues )
2022-05-31 23:31:40 +00:00
qt . Printf ( "sort %d label values" , len ( labelValues ) )
2020-11-14 10:36:21 +00:00
return labelValues , isPartial , nil
2020-11-04 22:15:43 +00:00
}
2022-11-25 18:32:45 +00:00
// Tenants returns tenants until the given deadline.
func Tenants ( qt * querytracer . Tracer , tr storage . TimeRange , deadline searchutils . Deadline ) ( [ ] string , error ) {
qt = qt . NewChild ( "get tenants on timeRange=%s" , & tr )
defer qt . Done ( )
if deadline . Exceeded ( ) {
return nil , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
tenants [ ] string
err error
}
sns := getStorageNodes ( )
// Deny partial responses when obtaining the list of tenants, since partial tenants have little sense.
snr := startStorageNodesRequest ( qt , sns , true , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
sn . tenantsRequests . Inc ( )
tenants , err := sn . getTenants ( qt , tr , deadline )
if err != nil {
sn . tenantsErrors . Inc ( )
err = fmt . Errorf ( "cannot get tenants from vmstorage %s: %w" , sn . connPool . Addr ( ) , err )
}
return & nodeResult {
tenants : tenants ,
err : err ,
}
} )
// Collect results
var tenants [ ] string
_ , err := snr . collectResults ( partialLabelValuesResults , func ( result interface { } ) error {
nr := result . ( * nodeResult )
if nr . err != nil {
return nr . err
}
tenants = append ( tenants , nr . tenants ... )
return nil
} )
qt . Printf ( "get %d non-duplicated tenants" , len ( tenants ) )
if err != nil {
return nil , fmt . Errorf ( "cannot fetch tenants from vmstorage nodes: %w" , err )
}
// Deduplicate tenants
tenants = deduplicateStrings ( tenants )
qt . Printf ( "get %d unique tenants after de-duplication" , len ( tenants ) )
sort . Strings ( tenants )
qt . Printf ( "sort %d tenants" , len ( tenants ) )
return tenants , nil
}
2022-06-26 21:37:19 +00:00
// GraphiteTagValues returns tag values for the given tagName until the given deadline.
2022-07-05 21:11:59 +00:00
func GraphiteTagValues ( qt * querytracer . Tracer , accountID , projectID uint32 , denyPartialResponse bool , tagName , filter string , limit int , deadline searchutils . Deadline ) ( [ ] string , bool , error ) {
2022-06-08 18:05:17 +00:00
qt = qt . NewChild ( "get graphite tag values for tagName=%s, filter=%s, limit=%d" , tagName , filter , limit )
defer qt . Done ( )
2020-11-16 01:31:09 +00:00
if deadline . Exceeded ( ) {
return nil , false , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
if tagName == "name" {
tagName = ""
}
2022-07-05 21:11:59 +00:00
sq := storage . NewSearchQuery ( accountID , projectID , 0 , 0 , nil , 0 )
tagValues , isPartial , err := LabelValues ( qt , denyPartialResponse , tagName , sq , 0 , deadline )
2020-11-16 01:31:09 +00:00
if err != nil {
return nil , false , err
}
2020-11-16 01:58:12 +00:00
if len ( filter ) > 0 {
tagValues , err = applyGraphiteRegexpFilter ( filter , tagValues )
if err != nil {
return nil , false , err
}
}
if limit > 0 && limit < len ( tagValues ) {
2020-11-16 01:31:09 +00:00
tagValues = tagValues [ : limit ]
}
return tagValues , isPartial , nil
}
2022-06-26 21:37:19 +00:00
// TagValueSuffixes returns tag value suffixes for the given tagKey and the given tagValuePrefix.
2020-09-10 21:29:26 +00:00
//
// It can be used for implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find
2022-07-05 21:11:59 +00:00
func TagValueSuffixes ( qt * querytracer . Tracer , accountID , projectID uint32 , denyPartialResponse bool , tr storage . TimeRange , tagKey , tagValuePrefix string ,
2022-07-05 21:31:41 +00:00
delimiter byte , maxSuffixes int , deadline searchutils . Deadline ) ( [ ] string , bool , error ) {
qt = qt . NewChild ( "get tag value suffixes for tagKey=%s, tagValuePrefix=%s, maxSuffixes=%d, timeRange=%s" , tagKey , tagValuePrefix , maxSuffixes , & tr )
2022-06-08 18:05:17 +00:00
defer qt . Done ( )
2020-09-10 21:29:26 +00:00
if deadline . Exceeded ( ) {
return nil , false , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
suffixes [ ] string
err error
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
snr := startStorageNodesRequest ( qt , sns , denyPartialResponse , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2020-11-23 08:51:40 +00:00
sn . tagValueSuffixesRequests . Inc ( )
2022-07-05 21:31:41 +00:00
suffixes , err := sn . getTagValueSuffixes ( qt , accountID , projectID , tr , tagKey , tagValuePrefix , delimiter , maxSuffixes , deadline )
2020-11-23 08:51:40 +00:00
if err != nil {
2020-11-23 13:00:04 +00:00
sn . tagValueSuffixesErrors . Inc ( )
2022-07-06 10:19:45 +00:00
err = fmt . Errorf ( "cannot get tag value suffixes for timeRange=%s, tagKey=%q, tagValuePrefix=%q, delimiter=%c from vmstorage %s: %w" ,
2020-11-23 08:51:40 +00:00
tr . String ( ) , tagKey , tagValuePrefix , delimiter , sn . connPool . Addr ( ) , err )
}
return & nodeResult {
suffixes : suffixes ,
err : err ,
}
} )
2020-09-10 21:29:26 +00:00
// Collect results
m := make ( map [ string ] struct { } )
2020-11-23 08:51:40 +00:00
isPartial , err := snr . collectResults ( partialTagValueSuffixesResults , func ( result interface { } ) error {
2020-11-22 22:15:51 +00:00
nr := result . ( * nodeResult )
2020-09-10 21:29:26 +00:00
if nr . err != nil {
2020-11-22 22:15:51 +00:00
return nr . err
2020-09-10 21:29:26 +00:00
}
for _ , suffix := range nr . suffixes {
m [ suffix ] = struct { } { }
}
2020-11-22 22:15:51 +00:00
return nil
} )
if err != nil {
return nil , isPartial , fmt . Errorf ( "cannot fetch tag value suffixes from vmstorage nodes: %w" , err )
2020-09-10 21:29:26 +00:00
}
suffixes := make ( [ ] string , 0 , len ( m ) )
for suffix := range m {
suffixes = append ( suffixes , suffix )
}
2020-11-14 10:36:21 +00:00
return suffixes , isPartial , nil
2020-09-10 21:29:26 +00:00
}
2019-05-22 21:23:23 +00:00
func deduplicateStrings ( a [ ] string ) [ ] string {
m := make ( map [ string ] bool , len ( a ) )
for _ , s := range a {
m [ s ] = true
2019-05-22 21:16:55 +00:00
}
2019-05-22 21:23:23 +00:00
a = a [ : 0 ]
for s := range m {
a = append ( a , s )
}
return a
2019-05-22 21:16:55 +00:00
}
2022-06-26 21:37:19 +00:00
// TSDBStatus returns tsdb status according to https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats
2022-06-14 14:46:16 +00:00
//
2023-02-13 12:27:13 +00:00
// It accepts arbitrary filters on time series in sq.
2022-07-05 21:11:59 +00:00
func TSDBStatus ( qt * querytracer . Tracer , denyPartialResponse bool , sq * storage . SearchQuery , focusLabel string , topN int , deadline searchutils . Deadline ) ( * storage . TSDBStatus , bool , error ) {
2022-06-14 14:46:16 +00:00
qt = qt . NewChild ( "get tsdb stats: %s, focusLabel=%q, topN=%d" , sq , focusLabel , topN )
2022-06-08 18:05:17 +00:00
defer qt . Done ( )
2020-07-21 15:34:59 +00:00
if deadline . Exceeded ( ) {
return nil , false , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
2022-06-14 14:46:16 +00:00
requestData := sq . Marshal ( nil )
2020-04-22 16:57:36 +00:00
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
status * storage . TSDBStatus
err error
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
snr := startStorageNodesRequest ( qt , sns , denyPartialResponse , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2020-11-23 08:51:40 +00:00
sn . tsdbStatusRequests . Inc ( )
2022-06-14 14:46:16 +00:00
status , err := sn . getTSDBStatus ( qt , requestData , focusLabel , topN , deadline )
2020-11-23 08:51:40 +00:00
if err != nil {
2020-11-23 13:00:04 +00:00
sn . tsdbStatusErrors . Inc ( )
2020-11-23 08:51:40 +00:00
err = fmt . Errorf ( "cannot obtain tsdb status from vmstorage %s: %w" , sn . connPool . Addr ( ) , err )
}
return & nodeResult {
status : status ,
err : err ,
}
} )
2020-04-22 16:57:36 +00:00
// Collect results.
var statuses [ ] * storage . TSDBStatus
2020-11-23 08:51:40 +00:00
isPartial , err := snr . collectResults ( partialTSDBStatusResults , func ( result interface { } ) error {
2020-11-22 22:15:51 +00:00
nr := result . ( * nodeResult )
2020-04-22 16:57:36 +00:00
if nr . err != nil {
2020-11-22 22:15:51 +00:00
return nr . err
2020-04-22 16:57:36 +00:00
}
statuses = append ( statuses , nr . status )
2020-11-22 22:15:51 +00:00
return nil
} )
if err != nil {
return nil , isPartial , fmt . Errorf ( "cannot fetch tsdb status from vmstorage nodes: %w" , err )
2020-04-22 16:57:36 +00:00
}
status := mergeTSDBStatuses ( statuses , topN )
2020-11-14 10:36:21 +00:00
return status , isPartial , nil
2020-04-22 16:57:36 +00:00
}
func mergeTSDBStatuses ( statuses [ ] * storage . TSDBStatus , topN int ) * storage . TSDBStatus {
2022-06-08 16:25:59 +00:00
totalSeries := uint64 ( 0 )
totalLabelValuePairs := uint64 ( 0 )
2022-06-15 13:48:07 +00:00
seriesCountByMetricName := make ( map [ string ] uint64 )
seriesCountByLabelName := make ( map [ string ] uint64 )
seriesCountByFocusLabelValue := make ( map [ string ] uint64 )
seriesCountByLabelValuePair := make ( map [ string ] uint64 )
labelValueCountByLabelName := make ( map [ string ] uint64 )
2020-04-22 16:57:36 +00:00
for _ , st := range statuses {
2022-06-15 13:48:07 +00:00
totalSeries += st . TotalSeries
totalLabelValuePairs += st . TotalLabelValuePairs
2020-04-22 16:57:36 +00:00
for _ , e := range st . SeriesCountByMetricName {
seriesCountByMetricName [ e . Name ] += e . Count
}
2022-06-15 13:48:07 +00:00
for _ , e := range st . SeriesCountByLabelName {
seriesCountByLabelName [ e . Name ] += e . Count
}
for _ , e := range st . SeriesCountByFocusLabelValue {
seriesCountByFocusLabelValue [ e . Name ] += e . Count
}
for _ , e := range st . SeriesCountByLabelValuePair {
seriesCountByLabelValuePair [ e . Name ] += e . Count
}
2020-04-22 16:57:36 +00:00
for _ , e := range st . LabelValueCountByLabelName {
2022-06-15 13:48:07 +00:00
// The same label values may exist in multiple vmstorage nodes.
// So select the maximum label values count in order to get the value close to reality.
2020-04-22 16:57:36 +00:00
if e . Count > labelValueCountByLabelName [ e . Name ] {
labelValueCountByLabelName [ e . Name ] = e . Count
}
}
}
return & storage . TSDBStatus {
2022-06-15 13:48:07 +00:00
TotalSeries : totalSeries ,
TotalLabelValuePairs : totalLabelValuePairs ,
SeriesCountByMetricName : toTopHeapEntries ( seriesCountByMetricName , topN ) ,
SeriesCountByLabelName : toTopHeapEntries ( seriesCountByLabelName , topN ) ,
SeriesCountByFocusLabelValue : toTopHeapEntries ( seriesCountByFocusLabelValue , topN ) ,
SeriesCountByLabelValuePair : toTopHeapEntries ( seriesCountByLabelValuePair , topN ) ,
LabelValueCountByLabelName : toTopHeapEntries ( labelValueCountByLabelName , topN ) ,
2020-04-22 16:57:36 +00:00
}
}
func toTopHeapEntries ( m map [ string ] uint64 , topN int ) [ ] storage . TopHeapEntry {
a := make ( [ ] storage . TopHeapEntry , 0 , len ( m ) )
for name , count := range m {
a = append ( a , storage . TopHeapEntry {
Name : name ,
Count : count ,
} )
}
sort . Slice ( a , func ( i , j int ) bool {
if a [ i ] . Count != a [ j ] . Count {
return a [ i ] . Count > a [ j ] . Count
}
return a [ i ] . Name < a [ j ] . Name
} )
if len ( a ) > topN {
a = a [ : topN ]
}
return a
}
2022-07-05 21:11:59 +00:00
// SeriesCount returns the number of unique series.
func SeriesCount ( qt * querytracer . Tracer , accountID , projectID uint32 , denyPartialResponse bool , deadline searchutils . Deadline ) ( uint64 , bool , error ) {
2022-06-08 18:05:17 +00:00
qt = qt . NewChild ( "get series count" )
defer qt . Done ( )
2020-07-21 15:34:59 +00:00
if deadline . Exceeded ( ) {
return 0 , false , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
2019-05-22 21:23:23 +00:00
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
n uint64
err error
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
snr := startStorageNodesRequest ( qt , sns , denyPartialResponse , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2020-11-23 08:51:40 +00:00
sn . seriesCountRequests . Inc ( )
2022-07-05 21:11:59 +00:00
n , err := sn . getSeriesCount ( qt , accountID , projectID , deadline )
2020-11-23 08:51:40 +00:00
if err != nil {
2020-11-23 13:00:04 +00:00
sn . seriesCountErrors . Inc ( )
2020-11-23 08:51:40 +00:00
err = fmt . Errorf ( "cannot get series count from vmstorage %s: %w" , sn . connPool . Addr ( ) , err )
}
return & nodeResult {
n : n ,
err : err ,
}
} )
2019-05-22 21:16:55 +00:00
2019-05-22 21:23:23 +00:00
// Collect results
var n uint64
2020-11-23 08:51:40 +00:00
isPartial , err := snr . collectResults ( partialSeriesCountResults , func ( result interface { } ) error {
2020-11-22 22:15:51 +00:00
nr := result . ( * nodeResult )
2019-05-22 21:23:23 +00:00
if nr . err != nil {
2020-11-22 22:15:51 +00:00
return nr . err
2019-05-22 21:23:23 +00:00
}
n += nr . n
2020-11-22 22:15:51 +00:00
return nil
} )
if err != nil {
return 0 , isPartial , fmt . Errorf ( "cannot fetch series count from vmstorage nodes: %w" , err )
2019-05-22 21:23:23 +00:00
}
2020-11-14 10:36:21 +00:00
return n , isPartial , nil
2019-05-22 21:23:23 +00:00
}
2019-05-22 21:16:55 +00:00
2019-09-28 09:20:50 +00:00
type tmpBlocksFileWrapper struct {
2024-01-22 22:50:32 +00:00
shards [ ] tmpBlocksFileWrapperShardWithPadding
}
type tmpBlocksFileWrapperShard struct {
// tbf is a file where temporary blocks are stored from the read time series.
tbf * tmpBlocksFile
2024-01-22 17:13:39 +00:00
2024-01-22 22:50:32 +00:00
// metricNamesBuf is a buf for holding all the loaded unique metric names for m and orderedMetricNames.
2024-01-22 19:16:38 +00:00
// It should reduce pressure on Go GC by reducing the number of string allocations
// when constructing metricName string from byte slice.
2024-01-22 22:50:32 +00:00
metricNamesBuf [ ] byte
2024-01-22 19:16:38 +00:00
2024-01-22 22:50:32 +00:00
// addrssPool is a pool for holding all the blockAddrs objects across all the loaded time series.
2024-01-22 19:16:38 +00:00
// It should reduce pressure on Go GC by reducing the number of blockAddrs object allocations.
2024-01-22 22:50:32 +00:00
addrssPool [ ] blockAddrs
2024-01-22 20:15:08 +00:00
2024-01-22 22:50:32 +00:00
// addrsPool is a pool for holding the most of blockAddrs.addrs slices.
2024-01-22 21:56:25 +00:00
// It should reduce pressure on Go GC by reducing the number of blockAddrs.addrs allocations.
2024-01-22 22:50:32 +00:00
addrsPool [ ] tmpBlockAddr
2024-01-22 21:56:25 +00:00
2024-01-22 22:50:32 +00:00
// m maps metricName to the addrssPool index.
m map [ string ] int
2024-01-22 20:15:08 +00:00
2024-01-22 22:50:32 +00:00
// orderedMetricNames contains metric names in the order of their load.
2024-01-22 20:15:08 +00:00
// This order is important for sequential read of data from tmpBlocksFile.
2024-01-22 22:50:32 +00:00
orderedMetricNames [ ] string
2024-01-22 23:06:45 +00:00
// prevMetricName contains the metric name previously seen at RegisterAndWriteBlock.
prevMetricName [ ] byte
// prevAddrsIdx contains the addrssPool index previously seen at RegisterAndWriteBlock.
prevAddrsIdx int
2024-01-22 22:50:32 +00:00
}
type tmpBlocksFileWrapperShardWithPadding struct {
tmpBlocksFileWrapperShard
// The padding prevents false sharing on widespread platforms with
// 128 mod (cache line size) = 0 .
_ [ 128 - unsafe . Sizeof ( tmpBlocksFileWrapperShard { } ) % 128 ] byte
2019-09-28 09:20:50 +00:00
}
2022-11-18 11:40:01 +00:00
type blockAddrs struct {
2024-01-22 21:56:25 +00:00
addrs [ ] tmpBlockAddr
}
func haveSameBlockAddrTails ( a , b [ ] tmpBlockAddr ) bool {
sha := ( * reflect . SliceHeader ) ( unsafe . Pointer ( & a ) )
shb := ( * reflect . SliceHeader ) ( unsafe . Pointer ( & b ) )
return sha . Data + uintptr ( sha . Len ) * unsafe . Sizeof ( tmpBlockAddr { } ) == shb . Data + uintptr ( shb . Len ) * unsafe . Sizeof ( tmpBlockAddr { } )
2023-01-10 06:03:21 +00:00
}
2024-01-22 22:50:32 +00:00
func ( tbfwLocal * tmpBlocksFileWrapperShard ) newBlockAddrs ( ) int {
addrssPool := tbfwLocal . addrssPool
2024-01-22 20:15:08 +00:00
if cap ( addrssPool ) > len ( addrssPool ) {
addrssPool = addrssPool [ : len ( addrssPool ) + 1 ]
2024-01-22 19:16:38 +00:00
} else {
2024-01-22 20:15:08 +00:00
addrssPool = append ( addrssPool , blockAddrs { } )
2024-01-22 19:16:38 +00:00
}
2024-01-22 22:50:32 +00:00
tbfwLocal . addrssPool = addrssPool
2024-01-22 20:15:08 +00:00
idx := len ( addrssPool ) - 1
return idx
2022-11-18 11:40:01 +00:00
}
2022-10-25 11:41:56 +00:00
func newTmpBlocksFileWrapper ( sns [ ] * storageNode ) * tmpBlocksFileWrapper {
n := len ( sns )
2024-01-22 22:50:32 +00:00
shards := make ( [ ] tmpBlocksFileWrapperShardWithPadding , n )
for i := range shards {
shard := & shards [ i ]
shard . tbf = getTmpBlocksFile ( )
shard . m = make ( map [ string ] int )
2022-08-11 20:22:53 +00:00
}
return & tmpBlocksFileWrapper {
2024-01-22 22:50:32 +00:00
shards : shards ,
2020-09-24 17:16:19 +00:00
}
}
2022-10-01 19:05:43 +00:00
func ( tbfw * tmpBlocksFileWrapper ) RegisterAndWriteBlock ( mb * storage . MetricBlock , workerID uint ) error {
2024-01-22 22:50:32 +00:00
tbfwLocal := & tbfw . shards [ workerID ]
2019-09-28 17:38:24 +00:00
bb := tmpBufPool . Get ( )
2020-04-27 05:13:41 +00:00
bb . B = storage . MarshalBlock ( bb . B [ : 0 ] , & mb . Block )
2024-01-22 22:50:32 +00:00
addr , err := tbfwLocal . tbf . WriteBlockData ( bb . B , workerID )
2019-09-28 17:38:24 +00:00
tmpBufPool . Put ( bb )
2022-08-11 20:22:53 +00:00
if err != nil {
return err
2019-09-28 17:38:24 +00:00
}
2024-01-22 23:06:45 +00:00
2024-01-22 22:50:32 +00:00
m := tbfwLocal . m
2024-01-22 23:06:45 +00:00
metricName := mb . MetricName
addrsIdx := tbfwLocal . prevAddrsIdx
if tbfwLocal . prevMetricName == nil || string ( metricName ) != string ( tbfwLocal . prevMetricName ) {
idx , ok := m [ string ( metricName ) ]
if ! ok {
idx = tbfwLocal . newBlockAddrs ( )
}
addrsIdx = idx
tbfwLocal . prevMetricName = append ( tbfwLocal . prevMetricName [ : 0 ] , metricName ... )
tbfwLocal . prevAddrsIdx = addrsIdx
2022-11-18 11:40:01 +00:00
}
2024-01-22 22:50:32 +00:00
addrs := & tbfwLocal . addrssPool [ addrsIdx ]
2024-01-22 23:06:45 +00:00
2024-01-22 22:50:32 +00:00
addrsPool := tbfwLocal . addrsPool
2024-01-22 21:56:25 +00:00
if addrs . addrs == nil || haveSameBlockAddrTails ( addrs . addrs , addrsPool ) {
// It is safe appending addr to addrsPool, since there are no other items added there yet.
addrsPool = append ( addrsPool , addr )
2024-01-22 22:50:32 +00:00
tbfwLocal . addrsPool = addrsPool
2024-01-22 21:56:25 +00:00
addrs . addrs = addrsPool [ len ( addrsPool ) - len ( addrs . addrs ) - 1 : len ( addrsPool ) : len ( addrsPool ) ]
} else {
// It is unsafe appending addr to addrsPool, since there are other items added there.
// So just append it to addrs.addrs.
addrs . addrs = append ( addrs . addrs , addr )
}
2024-01-22 23:06:45 +00:00
2022-11-18 11:40:01 +00:00
if len ( addrs . addrs ) == 1 {
2024-01-22 22:50:32 +00:00
metricNamesBuf := tbfwLocal . metricNamesBuf
2024-01-22 17:13:39 +00:00
metricNamesBufLen := len ( metricNamesBuf )
metricNamesBuf = append ( metricNamesBuf , metricName ... )
metricNameStr := bytesutil . ToUnsafeString ( metricNamesBuf [ metricNamesBufLen : ] )
2024-01-22 22:50:32 +00:00
orderedMetricNames := tbfwLocal . orderedMetricNames
2023-03-12 08:42:17 +00:00
orderedMetricNames = append ( orderedMetricNames , metricNameStr )
2024-01-22 20:15:08 +00:00
m [ metricNameStr ] = addrsIdx
2024-01-22 17:13:39 +00:00
2024-01-22 22:50:32 +00:00
tbfwLocal . orderedMetricNames = orderedMetricNames
tbfwLocal . metricNamesBuf = metricNamesBuf
2022-08-11 20:22:53 +00:00
}
2024-01-22 23:06:45 +00:00
2022-08-11 20:22:53 +00:00
return nil
2019-09-28 09:20:50 +00:00
}
2024-01-22 20:15:08 +00:00
func ( tbfw * tmpBlocksFileWrapper ) Finalize ( ) ( [ ] string , [ ] blockAddrs , map [ string ] int , uint64 , error ) {
2024-01-22 22:50:32 +00:00
shards := tbfw . shards
2022-08-17 11:07:49 +00:00
var bytesTotal uint64
2024-01-22 22:50:32 +00:00
for i := range shards {
tbf := shards [ i ] . tbf
2022-08-17 11:07:49 +00:00
if err := tbf . Finalize ( ) ; err != nil {
2024-01-22 22:50:32 +00:00
tbfw . closeTmpBlockFiles ( )
return nil , nil , nil , 0 , fmt . Errorf ( "cannot finalize temporary blocks file with %d series: %w" , len ( shards [ i ] . m ) , err )
2022-08-17 11:07:49 +00:00
}
bytesTotal += tbf . Len ( )
}
2024-01-22 22:50:32 +00:00
// merge data collected from all the shards
tbfwFirst := & shards [ 0 ]
orderedMetricNames := tbfwFirst . orderedMetricNames
addrsByMetricName := tbfwFirst . m
for i := 1 ; i < len ( shards ) ; i ++ {
tbfwLocal := & shards [ i ]
m := tbfwLocal . m
addrssPool := tbfwLocal . addrssPool
for _ , metricName := range tbfwLocal . orderedMetricNames {
2024-01-22 20:15:08 +00:00
dstAddrsIdx , ok := addrsByMetricName [ metricName ]
2022-08-17 11:07:49 +00:00
if ! ok {
orderedMetricNames = append ( orderedMetricNames , metricName )
2024-01-22 22:50:32 +00:00
dstAddrsIdx = tbfwFirst . newBlockAddrs ( )
2024-01-22 20:15:08 +00:00
addrsByMetricName [ metricName ] = dstAddrsIdx
2022-08-17 11:07:49 +00:00
}
2024-01-22 22:50:32 +00:00
dstAddrs := & tbfwFirst . addrssPool [ dstAddrsIdx ]
dstAddrs . addrs = append ( dstAddrs . addrs , addrssPool [ m [ metricName ] ] . addrs ... )
2022-08-17 11:07:49 +00:00
}
}
2024-01-22 22:50:32 +00:00
return orderedMetricNames , tbfwFirst . addrssPool , addrsByMetricName , bytesTotal , nil
}
func ( tbfw * tmpBlocksFileWrapper ) closeTmpBlockFiles ( ) {
tbfs := tbfw . getTmpBlockFiles ( )
closeTmpBlockFiles ( tbfs )
}
func ( tbfw * tmpBlocksFileWrapper ) getTmpBlockFiles ( ) [ ] * tmpBlocksFile {
shards := tbfw . shards
tbfs := make ( [ ] * tmpBlocksFile , len ( shards ) )
for i := range shards {
tbfs [ i ] = shards [ i ] . tbf
}
return tbfs
2022-08-17 11:07:49 +00:00
}
2020-09-26 01:29:45 +00:00
var metricNamePool = & sync . Pool {
New : func ( ) interface { } {
return & storage . MetricName { }
} ,
}
// ExportBlocks searches for time series matching sq and calls f for each found block.
//
// f is called in parallel from multiple goroutines.
// It is the responsibility of f to call b.UnmarshalData before reading timestamps and values from the block.
// It is the responsibility of f to filter blocks according to the given tr.
2022-07-05 21:11:59 +00:00
func ExportBlocks ( qt * querytracer . Tracer , sq * storage . SearchQuery , deadline searchutils . Deadline ,
2022-10-01 19:05:43 +00:00
f func ( mn * storage . MetricName , b * storage . Block , tr storage . TimeRange , workerID uint ) error ) error {
2022-06-08 18:05:17 +00:00
qt = qt . NewChild ( "export blocks: %s" , sq )
defer qt . Done ( )
2020-09-26 01:29:45 +00:00
if deadline . Exceeded ( ) {
2020-11-14 10:36:21 +00:00
return fmt . Errorf ( "timeout exceeded before starting data export: %s" , deadline . String ( ) )
2020-09-26 01:29:45 +00:00
}
tr := storage . TimeRange {
MinTimestamp : sq . MinTimestamp ,
MaxTimestamp : sq . MaxTimestamp ,
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
blocksRead := newPerNodeCounter ( sns )
samples := newPerNodeCounter ( sns )
2022-10-01 19:05:43 +00:00
processBlock := func ( mb * storage . MetricBlock , workerID uint ) error {
2020-09-26 01:29:45 +00:00
mn := metricNamePool . Get ( ) . ( * storage . MetricName )
if err := mn . Unmarshal ( mb . MetricName ) ; err != nil {
return fmt . Errorf ( "cannot unmarshal metricName: %w" , err )
}
2022-10-01 19:05:43 +00:00
if err := f ( mn , & mb . Block , tr , workerID ) ; err != nil {
2020-09-26 01:29:45 +00:00
return err
}
mn . Reset ( )
metricNamePool . Put ( mn )
2022-10-01 19:05:43 +00:00
blocksRead . Add ( workerID , 1 )
samples . Add ( workerID , uint64 ( mb . Block . RowsCount ( ) ) )
2020-09-26 01:29:45 +00:00
return nil
}
2022-10-25 11:41:56 +00:00
_ , err := processBlocks ( qt , sns , true , sq , processBlock , deadline )
2022-08-11 20:22:53 +00:00
qt . Printf ( "export blocks=%d, samples=%d, err=%v" , blocksRead . GetTotal ( ) , samples . GetTotal ( ) , err )
2020-09-26 01:29:45 +00:00
if err != nil {
2020-11-14 10:36:21 +00:00
return fmt . Errorf ( "error occured during export: %w" , err )
2020-09-26 01:29:45 +00:00
}
2020-11-14 10:36:21 +00:00
return nil
2020-09-26 01:29:45 +00:00
}
2020-11-16 08:55:55 +00:00
// SearchMetricNames returns all the metric names matching sq until the given deadline.
2022-06-28 14:36:27 +00:00
//
// The returned metric names must be unmarshaled via storage.MetricName.UnmarshalString().
2022-07-05 21:11:59 +00:00
func SearchMetricNames ( qt * querytracer . Tracer , denyPartialResponse bool , sq * storage . SearchQuery , deadline searchutils . Deadline ) ( [ ] string , bool , error ) {
2022-06-08 18:05:17 +00:00
qt = qt . NewChild ( "fetch metric names: %s" , sq )
defer qt . Done ( )
2020-11-16 08:55:55 +00:00
if deadline . Exceeded ( ) {
return nil , false , fmt . Errorf ( "timeout exceeded before starting to search metric names: %s" , deadline . String ( ) )
}
requestData := sq . Marshal ( nil )
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
2022-06-28 14:36:27 +00:00
metricNames [ ] string
2020-11-16 08:55:55 +00:00
err error
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
snr := startStorageNodesRequest ( qt , sns , denyPartialResponse , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2020-11-23 08:51:40 +00:00
sn . searchMetricNamesRequests . Inc ( )
2022-05-31 23:31:40 +00:00
metricNames , err := sn . processSearchMetricNames ( qt , requestData , deadline )
2020-11-23 08:51:40 +00:00
if err != nil {
2020-11-23 13:00:04 +00:00
sn . searchMetricNamesErrors . Inc ( )
2020-11-23 08:51:40 +00:00
err = fmt . Errorf ( "cannot search metric names on vmstorage %s: %w" , sn . connPool . Addr ( ) , err )
}
return & nodeResult {
metricNames : metricNames ,
err : err ,
}
} )
2020-11-16 08:55:55 +00:00
// Collect results.
2022-06-28 14:36:27 +00:00
metricNamesMap := make ( map [ string ] struct { } )
2020-11-23 08:51:40 +00:00
isPartial , err := snr . collectResults ( partialSearchMetricNamesResults , func ( result interface { } ) error {
2020-11-22 22:15:51 +00:00
nr := result . ( * nodeResult )
2020-11-16 08:55:55 +00:00
if nr . err != nil {
2020-11-22 22:15:51 +00:00
return nr . err
2020-11-16 08:55:55 +00:00
}
for _ , metricName := range nr . metricNames {
2022-06-28 14:36:27 +00:00
metricNamesMap [ metricName ] = struct { } { }
2020-11-16 08:55:55 +00:00
}
2020-11-22 22:15:51 +00:00
return nil
} )
if err != nil {
return nil , isPartial , fmt . Errorf ( "cannot fetch metric names from vmstorage nodes: %w" , err )
2020-11-16 08:55:55 +00:00
}
2022-07-05 22:33:35 +00:00
metricNames := make ( [ ] string , 0 , len ( metricNamesMap ) )
2022-06-28 14:36:27 +00:00
for metricName := range metricNamesMap {
metricNames = append ( metricNames , metricName )
2020-11-16 08:55:55 +00:00
}
2022-06-28 14:36:27 +00:00
sort . Strings ( metricNames )
qt . Printf ( "sort %d metric names" , len ( metricNames ) )
return metricNames , isPartial , nil
2020-11-16 08:55:55 +00:00
}
2023-06-23 11:17:34 +00:00
// limitExceededErr error generated by vmselect
// on checking complexity limits during processing responses
// from storage nodes.
type limitExceededErr struct {
err error
}
// Error satisfies error interface
func ( e limitExceededErr ) Error ( ) string { return e . err . Error ( ) }
2020-09-26 01:29:45 +00:00
// ProcessSearchQuery performs sq until the given deadline.
2020-09-15 17:39:34 +00:00
//
// Results.RunParallel or Results.Cancel must be called on the returned Results.
2022-07-05 21:11:59 +00:00
func ProcessSearchQuery ( qt * querytracer . Tracer , denyPartialResponse bool , sq * storage . SearchQuery , deadline searchutils . Deadline ) ( * Results , bool , error ) {
2022-06-28 09:55:20 +00:00
qt = qt . NewChild ( "fetch matching series: %s" , sq )
2022-06-08 18:05:17 +00:00
defer qt . Done ( )
2020-07-21 15:34:59 +00:00
if deadline . Exceeded ( ) {
return nil , false , fmt . Errorf ( "timeout exceeded before starting the query processing: %s" , deadline . String ( ) )
}
2022-05-31 23:31:40 +00:00
// Setup search.
2019-05-22 21:16:55 +00:00
tr := storage . TimeRange {
MinTimestamp : sq . MinTimestamp ,
MaxTimestamp : sq . MaxTimestamp ,
}
2022-10-25 11:41:56 +00:00
sns := getStorageNodes ( )
tbfw := newTmpBlocksFileWrapper ( sns )
blocksRead := newPerNodeCounter ( sns )
samples := newPerNodeCounter ( sns )
maxSamplesPerWorker := uint64 ( * maxSamplesPerQuery ) / uint64 ( len ( sns ) )
2022-10-01 19:05:43 +00:00
processBlock := func ( mb * storage . MetricBlock , workerID uint ) error {
blocksRead . Add ( workerID , 1 )
n := samples . Add ( workerID , uint64 ( mb . Block . RowsCount ( ) ) )
2022-08-11 20:22:53 +00:00
if * maxSamplesPerQuery > 0 && n > maxSamplesPerWorker && samples . GetTotal ( ) > uint64 ( * maxSamplesPerQuery ) {
2023-07-06 04:13:32 +00:00
return & limitExceededErr {
err : fmt . Errorf ( "cannot select more than -search.maxSamplesPerQuery=%d samples; possible solutions: " +
"to increase the -search.maxSamplesPerQuery; to reduce time range for the query; " +
"to use more specific label filters in order to select lower number of series" , * maxSamplesPerQuery ) ,
}
2022-08-11 20:22:53 +00:00
}
2022-10-01 19:05:43 +00:00
if err := tbfw . RegisterAndWriteBlock ( mb , workerID ) ; err != nil {
2020-09-26 01:29:45 +00:00
return fmt . Errorf ( "cannot write MetricBlock to temporary blocks file: %w" , err )
}
return nil
}
2022-10-25 11:41:56 +00:00
isPartial , err := processBlocks ( qt , sns , denyPartialResponse , sq , processBlock , deadline )
2020-09-26 01:29:45 +00:00
if err != nil {
2024-01-22 22:50:32 +00:00
tbfw . closeTmpBlockFiles ( )
2020-11-10 16:48:50 +00:00
return nil , false , fmt . Errorf ( "error occured during search: %w" , err )
2020-09-26 01:29:45 +00:00
}
2024-01-22 20:15:08 +00:00
orderedMetricNames , addrssPool , m , bytesTotal , err := tbfw . Finalize ( )
2022-08-11 20:22:53 +00:00
if err != nil {
return nil , false , fmt . Errorf ( "cannot finalize temporary blocks files: %w" , err )
2020-09-26 01:29:45 +00:00
}
2024-01-22 20:15:08 +00:00
qt . Printf ( "fetch unique series=%d, blocks=%d, samples=%d, bytes=%d" , len ( m ) , blocksRead . GetTotal ( ) , samples . GetTotal ( ) , bytesTotal )
2020-09-26 01:29:45 +00:00
var rss Results
rss . tr = tr
rss . deadline = deadline
2024-01-22 22:50:32 +00:00
rss . tbfs = tbfw . getTmpBlockFiles ( )
2022-08-11 20:22:53 +00:00
pts := make ( [ ] packedTimeseries , len ( orderedMetricNames ) )
for i , metricName := range orderedMetricNames {
2020-09-26 01:29:45 +00:00
pts [ i ] = packedTimeseries {
metricName : metricName ,
2024-01-22 20:15:08 +00:00
addrs : addrssPool [ m [ metricName ] ] . addrs ,
2020-09-26 01:29:45 +00:00
}
}
rss . packedTimeseries = pts
2020-11-14 10:36:21 +00:00
return & rss , isPartial , nil
2020-09-26 01:29:45 +00:00
}
2022-07-06 10:19:45 +00:00
// ProcessBlocks calls processBlock per each block matching the given sq.
func ProcessBlocks ( qt * querytracer . Tracer , denyPartialResponse bool , sq * storage . SearchQuery ,
2022-10-25 11:41:56 +00:00
processBlock func ( mb * storage . MetricBlock , workerID uint ) error , deadline searchutils . Deadline ) ( bool , error ) {
sns := getStorageNodes ( )
return processBlocks ( qt , sns , denyPartialResponse , sq , processBlock , deadline )
}
func processBlocks ( qt * querytracer . Tracer , sns [ ] * storageNode , denyPartialResponse bool , sq * storage . SearchQuery ,
2022-10-01 19:05:43 +00:00
processBlock func ( mb * storage . MetricBlock , workerID uint ) error , deadline searchutils . Deadline ) ( bool , error ) {
2020-09-26 01:29:45 +00:00
requestData := sq . Marshal ( nil )
2022-10-25 11:41:56 +00:00
// Make sure that processBlock is no longer called after the exit from processBlocks() function.
2022-08-11 18:37:21 +00:00
// Use per-worker WaitGroup instead of a shared WaitGroup in order to avoid inter-CPU contention,
2023-11-14 18:57:29 +00:00
// which may significantly slow down the rate of processBlock calls on multi-CPU systems.
2022-09-02 18:34:00 +00:00
type wgStruct struct {
// mu prevents from calling processBlock when stop is set to true
mu sync . Mutex
// wg is used for waiting until currently executed processBlock calls are finished.
2022-08-11 18:37:21 +00:00
wg sync . WaitGroup
2022-09-02 18:34:00 +00:00
// stop must be set to true when no more processBlocks calls should be made.
stop bool
}
type wgWithPadding struct {
wgStruct
2022-08-21 21:32:28 +00:00
// The padding prevents false sharing on widespread platforms with
2022-08-11 18:37:21 +00:00
// 128 mod (cache line size) = 0 .
2022-09-02 18:34:00 +00:00
_ [ 128 - unsafe . Sizeof ( wgStruct { } ) % 128 ] byte
2022-08-11 18:37:21 +00:00
}
2022-10-25 11:41:56 +00:00
wgs := make ( [ ] wgWithPadding , len ( sns ) )
2022-10-01 19:05:43 +00:00
f := func ( mb * storage . MetricBlock , workerID uint ) error {
muwg := & wgs [ workerID ]
2022-09-02 18:34:00 +00:00
muwg . mu . Lock ( )
if muwg . stop {
muwg . mu . Unlock ( )
2022-08-08 09:54:55 +00:00
return nil
}
2022-09-02 18:34:00 +00:00
muwg . wg . Add ( 1 )
muwg . mu . Unlock ( )
2022-10-01 19:05:43 +00:00
err := processBlock ( mb , workerID )
2022-09-02 18:34:00 +00:00
muwg . wg . Done ( )
return err
2022-08-08 09:54:55 +00:00
}
2020-09-26 01:29:45 +00:00
// Send the query to all the storage nodes in parallel.
2022-10-25 11:41:56 +00:00
snr := startStorageNodesRequest ( qt , sns , denyPartialResponse , func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } {
2020-11-23 08:51:40 +00:00
sn . searchRequests . Inc ( )
2022-10-01 19:05:43 +00:00
err := sn . processSearchQuery ( qt , requestData , f , workerID , deadline )
2020-11-23 08:51:40 +00:00
if err != nil {
2020-11-23 13:00:04 +00:00
sn . searchErrors . Inc ( )
2020-11-23 08:51:40 +00:00
err = fmt . Errorf ( "cannot perform search on vmstorage %s: %w" , sn . connPool . Addr ( ) , err )
}
return & err
} )
2019-05-22 21:16:55 +00:00
2019-05-22 21:23:23 +00:00
// Collect results.
2020-11-23 08:51:40 +00:00
isPartial , err := snr . collectResults ( partialSearchResults , func ( result interface { } ) error {
2020-11-22 22:15:51 +00:00
errP := result . ( * error )
return * errP
} )
2022-10-25 11:41:56 +00:00
// Make sure that processBlock is no longer called after the exit from processBlocks() function.
2022-09-02 18:34:00 +00:00
for i := range wgs {
muwg := & wgs [ i ]
muwg . mu . Lock ( )
muwg . stop = true
muwg . mu . Unlock ( )
}
2022-08-11 18:37:21 +00:00
for i := range wgs {
wgs [ i ] . wg . Wait ( )
}
2020-11-22 22:15:51 +00:00
if err != nil {
return isPartial , fmt . Errorf ( "cannot fetch query results from vmstorage nodes: %w" , err )
}
return isPartial , nil
}
2020-11-23 08:51:40 +00:00
type storageNodesRequest struct {
denyPartialResponse bool
2022-11-18 11:01:42 +00:00
resultsCh chan rpcResult
qts map [ * querytracer . Tracer ] struct { }
2022-10-25 11:41:56 +00:00
sns [ ] * storageNode
2020-11-23 08:51:40 +00:00
}
2022-11-18 11:01:42 +00:00
type rpcResult struct {
2023-12-12 22:06:30 +00:00
data interface { }
qt * querytracer . Tracer
group * storageNodesGroup
2022-11-18 11:01:42 +00:00
}
2022-10-25 11:41:56 +00:00
func startStorageNodesRequest ( qt * querytracer . Tracer , sns [ ] * storageNode , denyPartialResponse bool ,
f func ( qt * querytracer . Tracer , workerID uint , sn * storageNode ) interface { } ) * storageNodesRequest {
2022-11-18 11:01:42 +00:00
resultsCh := make ( chan rpcResult , len ( sns ) )
qts := make ( map [ * querytracer . Tracer ] struct { } , len ( sns ) )
2022-10-25 11:41:56 +00:00
for idx , sn := range sns {
2022-06-08 18:05:17 +00:00
qtChild := qt . NewChild ( "rpc at vmstorage %s" , sn . connPool . Addr ( ) )
2022-11-18 11:01:42 +00:00
qts [ qtChild ] = struct { } { }
2022-10-01 19:05:43 +00:00
go func ( workerID uint , sn * storageNode ) {
2022-11-18 11:01:42 +00:00
data := f ( qtChild , workerID , sn )
resultsCh <- rpcResult {
2023-12-12 22:06:30 +00:00
data : data ,
qt : qtChild ,
group : sn . group ,
2022-11-18 11:01:42 +00:00
}
2022-10-01 19:05:43 +00:00
} ( uint ( idx ) , sn )
2020-11-23 08:51:40 +00:00
}
return & storageNodesRequest {
denyPartialResponse : denyPartialResponse ,
resultsCh : resultsCh ,
2022-11-18 11:01:42 +00:00
qts : qts ,
2022-10-25 11:41:56 +00:00
sns : sns ,
2020-11-23 08:51:40 +00:00
}
}
2022-11-18 11:01:42 +00:00
func ( snr * storageNodesRequest ) finishQueryTracers ( msg string ) {
for qt := range snr . qts {
snr . finishQueryTracer ( qt , msg )
}
}
func ( snr * storageNodesRequest ) finishQueryTracer ( qt * querytracer . Tracer , msg string ) {
if msg == "" {
qt . Done ( )
} else {
qt . Donef ( "%s" , msg )
}
delete ( snr . qts , qt )
}
2020-11-23 10:33:17 +00:00
func ( snr * storageNodesRequest ) collectAllResults ( f func ( result interface { } ) error ) error {
2022-10-25 11:41:56 +00:00
sns := snr . sns
for i := 0 ; i < len ( sns ) ; i ++ {
2020-11-23 08:51:40 +00:00
result := <- snr . resultsCh
2022-11-18 11:01:42 +00:00
if err := f ( result . data ) ; err != nil {
snr . finishQueryTracer ( result . qt , fmt . Sprintf ( "error: %s" , err ) )
2022-02-21 19:15:02 +00:00
// Immediately return the error to the caller without waiting for responses from other vmstorage nodes -
// they will be processed in brackground.
2022-11-18 11:01:42 +00:00
snr . finishQueryTracers ( "cancel request because of error in other vmstorage nodes" )
2022-02-21 19:15:02 +00:00
return err
2020-11-23 08:51:40 +00:00
}
2022-11-18 11:01:42 +00:00
snr . finishQueryTracer ( result . qt , "" )
2020-11-23 08:51:40 +00:00
}
return nil
}
func ( snr * storageNodesRequest ) collectResults ( partialResultsCounter * metrics . Counter , f func ( result interface { } ) error ) ( bool , error ) {
2022-10-25 11:41:56 +00:00
sns := snr . sns
2023-12-20 17:53:46 +00:00
if len ( sns ) == 0 {
return false , nil
}
groupsCount := sns [ 0 ] . group . groupsCount
resultsCollectedPerGroup := make ( map [ * storageNodesGroup ] int , groupsCount )
errsPartialPerGroup := make ( map [ * storageNodesGroup ] [ ] error )
for range sns {
2020-11-23 10:33:17 +00:00
// There is no need in timer here, since all the goroutines executing the f function
// passed to startStorageNodesRequest must be finished until the deadline.
2020-11-23 08:51:40 +00:00
result := <- snr . resultsCh
2023-12-12 22:06:30 +00:00
group := result . group
2022-11-18 11:01:42 +00:00
if err := f ( result . data ) ; err != nil {
snr . finishQueryTracer ( result . qt , fmt . Sprintf ( "error: %s" , err ) )
2022-02-21 19:15:02 +00:00
var er * errRemote
if errors . As ( err , & er ) {
// Immediately return the error reported by vmstorage to the caller,
// since such errors usually mean misconfiguration at vmstorage.
// The misconfiguration must be known by the caller, so it is fixed ASAP.
2022-11-18 11:01:42 +00:00
snr . finishQueryTracers ( "cancel request because of error in other vmstorage nodes" )
2022-02-21 19:15:02 +00:00
return false , err
}
2023-06-23 11:17:34 +00:00
var limitErr * limitExceededErr
if errors . As ( err , & limitErr ) {
// Immediately return the error, since complexity limits are already exceeded,
// and we don't need to process the rest of results.
snr . finishQueryTracers ( "cancel request because query complexity limit was exceeded" )
return false , err
}
2023-12-12 22:06:30 +00:00
errsPartialPerGroup [ group ] = append ( errsPartialPerGroup [ group ] , err )
if snr . denyPartialResponse && len ( errsPartialPerGroup [ group ] ) >= group . replicationFactor {
2022-06-23 17:17:24 +00:00
// Return the error to the caller if partial responses are denied
2023-12-12 22:06:30 +00:00
// and the number of partial responses for the given group reach its replicationFactor,
2022-06-23 17:17:24 +00:00
// since this means that the response is partial.
2023-12-12 22:06:30 +00:00
snr . finishQueryTracers ( fmt . Sprintf ( "cancel request because partial responses are denied and replicationFactor=%d vmstorage nodes at group %q failed to return response" ,
group . replicationFactor , group . name ) )
2023-09-07 14:07:03 +00:00
// Returns 503 status code for partial response, so the caller could retry it if needed.
err = & httpserver . ErrorWithStatusCode {
2023-09-10 13:18:15 +00:00
Err : err ,
2023-09-07 14:07:03 +00:00
StatusCode : http . StatusServiceUnavailable ,
}
2022-06-23 17:17:24 +00:00
return false , err
}
2019-05-22 21:23:23 +00:00
continue
2019-05-22 21:16:55 +00:00
}
2022-11-18 11:01:42 +00:00
snr . finishQueryTracer ( result . qt , "" )
2023-12-12 22:06:30 +00:00
resultsCollectedPerGroup [ group ] ++
2023-12-20 17:53:46 +00:00
if * skipSlowReplicas && len ( resultsCollectedPerGroup ) == groupsCount {
2023-12-12 22:06:30 +00:00
canSkipSlowReplicas := true
for g , n := range resultsCollectedPerGroup {
if n <= g . nodesCount - g . replicationFactor {
canSkipSlowReplicas = false
break
}
}
if canSkipSlowReplicas {
// There is no need in waiting for the remaining results,
// because the collected results contain all the data according to the given per-group replicationFactor.
// This should speed up responses when a part of vmstorage nodes are slow and/or temporarily unavailable.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/711
snr . finishQueryTracers ( "cancel request because -search.skipSlowReplicas is set and every group returned the needed number of responses according to replicationFactor" )
return false , nil
}
}
}
// Verify whether the full result can be returned
isFullResponse := true
for g , errsPartial := range errsPartialPerGroup {
if len ( errsPartial ) >= g . replicationFactor {
isFullResponse = false
break
}
}
if isFullResponse {
2022-06-27 09:21:23 +00:00
// Assume that the result is full if the the number of failing vmstorage nodes
2023-12-12 22:06:30 +00:00
// is smaller than the replicationFactor per each group.
2022-02-21 19:15:02 +00:00
return false , nil
2019-05-22 21:16:55 +00:00
}
2023-12-12 22:06:30 +00:00
// Verify whether there is at least a single node per each group, which successfully returned result,
// in order to return partial result.
for g , errsPartial := range errsPartialPerGroup {
if len ( errsPartial ) == g . nodesCount {
// All the vmstorage nodes at the given group g returned error.
// Return only the first error, since it has no sense in returning all errors.
// Returns 503 status code for partial response, so the caller could retry it if needed.
err := & httpserver . ErrorWithStatusCode {
Err : errsPartial [ 0 ] ,
StatusCode : http . StatusServiceUnavailable ,
}
return false , err
}
if len ( errsPartial ) > 0 {
partialErrorsLogger . Warnf ( "%d out of %d vmstorage nodes at group %q were unavailable during the query; a sample error: %s" , len ( errsPartial ) , len ( sns ) , g . name , errsPartial [ 0 ] )
2023-09-07 14:07:03 +00:00
}
2022-02-21 19:15:02 +00:00
}
2023-12-12 22:06:30 +00:00
2022-02-21 19:15:02 +00:00
// Return partial results.
2023-12-12 22:06:30 +00:00
// This allows continuing returning responses in the case
2022-06-27 09:21:23 +00:00
// if a part of vmstorage nodes are temporarily unavailable.
partialResultsCounter . Inc ( )
2022-02-21 19:15:02 +00:00
// Do not return the error, since it may spam logs on busy vmselect
// serving high amounts of requests.
return true , nil
2019-05-22 21:23:23 +00:00
}
2022-06-27 09:21:23 +00:00
var partialErrorsLogger = logger . WithThrottler ( "partialErrors" , 10 * time . Second )
2023-12-12 22:06:30 +00:00
type storageNodesGroup struct {
// group name
name string
// replicationFactor for the given group
replicationFactor int
// the number of nodes in the group
nodesCount int
2023-12-20 17:53:46 +00:00
// groupsCount is the number of groups in the list the given group belongs to
groupsCount int
2023-12-12 22:06:30 +00:00
}
func initStorageNodeGroups ( addrs [ ] string ) map [ string ] * storageNodesGroup {
m := make ( map [ string ] * storageNodesGroup )
for _ , addr := range addrs {
groupName , _ := netutil . ParseGroupAddr ( addr )
g , ok := m [ groupName ]
if ! ok {
g = & storageNodesGroup {
name : groupName ,
replicationFactor : replicationFactor . Get ( groupName ) ,
}
m [ groupName ] = g
}
g . nodesCount ++
}
2023-12-20 17:53:46 +00:00
groupsCount := len ( m )
for _ , g := range m {
g . groupsCount = groupsCount
}
2023-12-12 22:06:30 +00:00
return m
}
2019-05-22 21:23:23 +00:00
type storageNode struct {
2023-12-12 22:06:30 +00:00
// The group this storageNode belongs to.
group * storageNodesGroup
// Connection pool for the given storageNode.
2019-05-22 21:23:23 +00:00
connPool * netutil . ConnPool
2021-05-24 16:11:35 +00:00
// The number of concurrent queries to storageNode.
concurrentQueries * metrics . Counter
2019-05-22 21:23:23 +00:00
2020-11-23 10:33:17 +00:00
// The number of RegisterMetricNames requests to storageNode.
registerMetricNamesRequests * metrics . Counter
// The number of RegisterMetricNames request errors to storageNode.
2020-11-23 13:00:04 +00:00
registerMetricNamesErrors * metrics . Counter
2020-11-23 10:33:17 +00:00
2019-05-22 21:23:23 +00:00
// The number of DeleteSeries requests to storageNode.
deleteSeriesRequests * metrics . Counter
// The number of DeleteSeries request errors to storageNode.
2020-11-23 13:00:04 +00:00
deleteSeriesErrors * metrics . Counter
2019-05-22 21:23:23 +00:00
2022-06-12 01:32:13 +00:00
// The number of requests to labelNames.
labelNamesRequests * metrics . Counter
2019-05-22 21:23:23 +00:00
2022-06-12 01:32:13 +00:00
// The number of errors during requests to labelNames.
labelNamesErrors * metrics . Counter
2020-11-04 22:15:43 +00:00
2019-05-22 21:23:23 +00:00
// The number of requests to labelValues.
labelValuesRequests * metrics . Counter
2020-11-04 22:15:43 +00:00
// The number of errors during requests to labelValuesOnTimeRange.
2020-11-23 13:00:04 +00:00
labelValuesErrors * metrics . Counter
2019-05-22 21:23:23 +00:00
2020-09-10 21:29:26 +00:00
// The number of requests to tagValueSuffixes.
tagValueSuffixesRequests * metrics . Counter
// The number of errors during requests to tagValueSuffixes.
2020-11-23 13:00:04 +00:00
tagValueSuffixesErrors * metrics . Counter
2020-09-10 21:29:26 +00:00
2020-04-22 16:57:36 +00:00
// The number of requests to tsdb status.
tsdbStatusRequests * metrics . Counter
// The number of errors during requests to tsdb status.
2020-11-23 13:00:04 +00:00
tsdbStatusErrors * metrics . Counter
2020-04-22 16:57:36 +00:00
2019-05-22 21:23:23 +00:00
// The number of requests to seriesCount.
seriesCountRequests * metrics . Counter
// The number of errors during requests to seriesCount.
2020-11-23 13:00:04 +00:00
seriesCountErrors * metrics . Counter
2019-05-22 21:23:23 +00:00
2022-06-27 11:00:24 +00:00
// The number of searchMetricNames requests to storageNode.
2020-11-16 08:55:55 +00:00
searchMetricNamesRequests * metrics . Counter
2022-06-27 11:00:24 +00:00
// The number of searchMetricNames errors to storageNode.
searchMetricNamesErrors * metrics . Counter
2019-05-22 21:23:23 +00:00
// The number of search requests to storageNode.
searchRequests * metrics . Counter
// The number of search request errors to storageNode.
2020-11-23 13:00:04 +00:00
searchErrors * metrics . Counter
2019-05-22 21:23:23 +00:00
// The number of metric blocks read.
metricBlocksRead * metrics . Counter
// The number of read metric rows.
metricRowsRead * metrics . Counter
2022-11-25 18:32:45 +00:00
// The number of list tenants requests to storageNode.
tenantsRequests * metrics . Counter
// The number of list tenants errors to storageNode.
tenantsErrors * metrics . Counter
2019-05-22 21:23:23 +00:00
}
2022-05-31 23:31:40 +00:00
func ( sn * storageNode ) registerMetricNames ( qt * querytracer . Tracer , mrs [ ] storage . MetricRow , deadline searchutils . Deadline ) error {
2020-11-23 10:33:17 +00:00
if len ( mrs ) == 0 {
return nil
}
f := func ( bc * handshake . BufferedConn ) error {
return sn . registerMetricNamesOnConn ( bc , mrs )
}
2022-05-31 23:31:40 +00:00
return sn . execOnConnWithPossibleRetry ( qt , "registerMetricNames_v3" , f , deadline )
2020-11-23 10:33:17 +00:00
}
2022-07-05 20:56:31 +00:00
func ( sn * storageNode ) deleteSeries ( qt * querytracer . Tracer , requestData [ ] byte , deadline searchutils . Deadline ) ( int , error ) {
2019-05-22 21:23:23 +00:00
var deletedCount int
f := func ( bc * handshake . BufferedConn ) error {
2022-07-05 20:56:31 +00:00
n , err := sn . deleteSeriesOnConn ( bc , requestData )
2019-05-22 21:23:23 +00:00
if err != nil {
return err
}
2021-03-30 11:54:34 +00:00
deletedCount = n
2019-05-22 21:23:23 +00:00
return nil
}
2022-07-05 20:56:31 +00:00
if err := sn . execOnConnWithPossibleRetry ( qt , "deleteSeries_v5" , f , deadline ) ; err != nil {
2021-03-30 11:54:34 +00:00
return 0 , err
2019-05-22 21:23:23 +00:00
}
return deletedCount , nil
}
2022-06-12 01:32:13 +00:00
func ( sn * storageNode ) getLabelNames ( qt * querytracer . Tracer , requestData [ ] byte , maxLabelNames int , deadline searchutils . Deadline ) ( [ ] string , error ) {
2020-11-04 22:15:43 +00:00
var labels [ ] string
f := func ( bc * handshake . BufferedConn ) error {
2022-06-12 01:32:13 +00:00
ls , err := sn . getLabelNamesOnConn ( bc , requestData , maxLabelNames )
2020-11-04 22:15:43 +00:00
if err != nil {
return err
}
labels = ls
return nil
}
2022-06-12 01:32:13 +00:00
if err := sn . execOnConnWithPossibleRetry ( qt , "labelNames_v5" , f , deadline ) ; err != nil {
2021-03-30 11:54:34 +00:00
return nil , err
2020-11-04 22:15:43 +00:00
}
return labels , nil
}
2022-06-12 01:32:13 +00:00
func ( sn * storageNode ) getLabelValues ( qt * querytracer . Tracer , labelName string , requestData [ ] byte , maxLabelValues int , deadline searchutils . Deadline ) ( [ ] string , error ) {
2020-11-04 22:15:43 +00:00
var labelValues [ ] string
f := func ( bc * handshake . BufferedConn ) error {
2022-06-12 01:32:13 +00:00
lvs , err := sn . getLabelValuesOnConn ( bc , labelName , requestData , maxLabelValues )
2020-11-04 22:15:43 +00:00
if err != nil {
return err
}
labelValues = lvs
return nil
}
2022-06-12 01:32:13 +00:00
if err := sn . execOnConnWithPossibleRetry ( qt , "labelValues_v5" , f , deadline ) ; err != nil {
2021-03-30 11:54:34 +00:00
return nil , err
2019-05-22 21:23:23 +00:00
}
return labelValues , nil
}
2022-11-25 18:32:45 +00:00
func ( sn * storageNode ) getTenants ( qt * querytracer . Tracer , tr storage . TimeRange , deadline searchutils . Deadline ) ( [ ] string , error ) {
var tenants [ ] string
f := func ( bc * handshake . BufferedConn ) error {
result , err := sn . getTenantsOnConn ( bc , tr )
if err != nil {
return err
}
tenants = result
return nil
}
if err := sn . execOnConnWithPossibleRetry ( qt , "tenants_v1" , f , deadline ) ; err != nil {
return nil , err
}
return tenants , nil
}
2022-05-31 23:31:40 +00:00
func ( sn * storageNode ) getTagValueSuffixes ( qt * querytracer . Tracer , accountID , projectID uint32 , tr storage . TimeRange , tagKey , tagValuePrefix string ,
2022-07-05 21:31:41 +00:00
delimiter byte , maxSuffixes int , deadline searchutils . Deadline ) ( [ ] string , error ) {
2020-09-10 21:29:26 +00:00
var suffixes [ ] string
f := func ( bc * handshake . BufferedConn ) error {
2022-07-05 21:31:41 +00:00
ss , err := sn . getTagValueSuffixesOnConn ( bc , accountID , projectID , tr , tagKey , tagValuePrefix , delimiter , maxSuffixes )
2020-09-10 21:29:26 +00:00
if err != nil {
return err
}
suffixes = ss
return nil
}
2022-07-05 21:31:41 +00:00
if err := sn . execOnConnWithPossibleRetry ( qt , "tagValueSuffixes_v4" , f , deadline ) ; err != nil {
2021-03-30 11:54:34 +00:00
return nil , err
2020-09-10 21:29:26 +00:00
}
return suffixes , nil
}
2022-06-14 14:46:16 +00:00
func ( sn * storageNode ) getTSDBStatus ( qt * querytracer . Tracer , requestData [ ] byte , focusLabel string , topN int , deadline searchutils . Deadline ) ( * storage . TSDBStatus , error ) {
2021-05-12 12:18:45 +00:00
var status * storage . TSDBStatus
f := func ( bc * handshake . BufferedConn ) error {
2022-06-14 14:46:16 +00:00
st , err := sn . getTSDBStatusOnConn ( bc , requestData , focusLabel , topN )
2021-05-12 12:18:45 +00:00
if err != nil {
return err
}
status = st
return nil
}
2022-06-14 14:46:16 +00:00
if err := sn . execOnConnWithPossibleRetry ( qt , "tsdbStatus_v5" , f , deadline ) ; err != nil {
2021-05-12 12:18:45 +00:00
return nil , err
}
return status , nil
}
2022-05-31 23:31:40 +00:00
func ( sn * storageNode ) getSeriesCount ( qt * querytracer . Tracer , accountID , projectID uint32 , deadline searchutils . Deadline ) ( uint64 , error ) {
2019-05-22 21:23:23 +00:00
var n uint64
f := func ( bc * handshake . BufferedConn ) error {
nn , err := sn . getSeriesCountOnConn ( bc , accountID , projectID )
if err != nil {
return err
}
n = nn
return nil
}
2022-05-31 23:31:40 +00:00
if err := sn . execOnConnWithPossibleRetry ( qt , "seriesCount_v4" , f , deadline ) ; err != nil {
2021-03-30 11:54:34 +00:00
return 0 , err
2019-05-22 21:23:23 +00:00
}
return n , nil
}
2022-06-28 14:36:27 +00:00
func ( sn * storageNode ) processSearchMetricNames ( qt * querytracer . Tracer , requestData [ ] byte , deadline searchutils . Deadline ) ( [ ] string , error ) {
var metricNames [ ] string
2020-11-16 08:55:55 +00:00
f := func ( bc * handshake . BufferedConn ) error {
mns , err := sn . processSearchMetricNamesOnConn ( bc , requestData )
if err != nil {
return err
}
metricNames = mns
return nil
}
2022-05-31 23:31:40 +00:00
if err := sn . execOnConnWithPossibleRetry ( qt , "searchMetricNames_v3" , f , deadline ) ; err != nil {
2021-03-30 11:54:34 +00:00
return nil , err
2020-11-16 08:55:55 +00:00
}
return metricNames , nil
}
2022-10-01 19:05:43 +00:00
func ( sn * storageNode ) processSearchQuery ( qt * querytracer . Tracer , requestData [ ] byte , processBlock func ( mb * storage . MetricBlock , workerID uint ) error ,
workerID uint , deadline searchutils . Deadline ) error {
2019-05-22 21:23:23 +00:00
f := func ( bc * handshake . BufferedConn ) error {
2023-09-01 07:34:16 +00:00
return sn . processSearchQueryOnConn ( bc , requestData , processBlock , workerID )
2019-05-22 21:23:23 +00:00
}
2022-06-28 09:55:20 +00:00
return sn . execOnConnWithPossibleRetry ( qt , "search_v7" , f , deadline )
2021-03-30 11:54:34 +00:00
}
2022-05-31 23:31:40 +00:00
func ( sn * storageNode ) execOnConnWithPossibleRetry ( qt * querytracer . Tracer , funcName string , f func ( bc * handshake . BufferedConn ) error , deadline searchutils . Deadline ) error {
2022-06-08 18:05:17 +00:00
qtChild := qt . NewChild ( "rpc call %s()" , funcName )
2022-05-31 23:31:40 +00:00
err := sn . execOnConn ( qtChild , funcName , f , deadline )
2023-02-01 16:56:36 +00:00
defer qtChild . Done ( )
2021-03-30 11:54:34 +00:00
if err == nil {
return nil
2019-05-22 21:23:23 +00:00
}
2021-03-30 11:54:34 +00:00
var er * errRemote
var ne net . Error
2023-11-14 18:57:29 +00:00
if errors . As ( err , & er ) || errors . As ( err , & ne ) && ne . Timeout ( ) || deadline . Exceeded ( ) {
// There is no sense in repeating the query on the following errors:
//
// - induced by vmstorage (errRemote)
// - network timeout errors
// - request deadline exceeded errors
2021-03-30 11:54:34 +00:00
return err
}
// Repeat the query in the hope the error was temporary.
2023-02-01 16:56:36 +00:00
qtRetry := qtChild . NewChild ( "retry rpc call %s() after error" , funcName )
err = sn . execOnConn ( qtRetry , funcName , f , deadline )
qtRetry . Done ( )
2022-05-31 23:31:40 +00:00
return err
2019-05-22 21:23:23 +00:00
}
2022-05-31 23:31:40 +00:00
func ( sn * storageNode ) execOnConn ( qt * querytracer . Tracer , funcName string , f func ( bc * handshake . BufferedConn ) error , deadline searchutils . Deadline ) error {
2021-05-24 16:11:35 +00:00
sn . concurrentQueries . Inc ( )
defer sn . concurrentQueries . Dec ( )
2019-05-22 21:23:23 +00:00
2020-09-16 18:03:51 +00:00
d := time . Unix ( int64 ( deadline . Deadline ( ) ) , 0 )
nowSecs := fasttime . UnixTimestamp ( )
currentTime := time . Unix ( int64 ( nowSecs ) , 0 )
timeout := d . Sub ( currentTime )
if timeout <= 0 {
2021-03-30 11:54:34 +00:00
return fmt . Errorf ( "request timeout reached: %s" , deadline . String ( ) )
2020-09-16 18:03:51 +00:00
}
2019-05-22 21:23:23 +00:00
bc , err := sn . connPool . Get ( )
if err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "cannot obtain connection from a pool: %w" , err )
2019-05-22 21:23:23 +00:00
}
2020-09-16 18:03:51 +00:00
// Extend the connection deadline by 2 seconds, so the remote storage could return `timeout` error
// without the need to break the connection.
connDeadline := d . Add ( 2 * time . Second )
if err := bc . SetDeadline ( connDeadline ) ; err != nil {
2019-05-22 21:23:23 +00:00
_ = bc . Close ( )
logger . Panicf ( "FATAL: cannot set connection deadline: %s" , err )
}
2022-05-31 23:31:40 +00:00
if err := writeBytes ( bc , [ ] byte ( funcName ) ) ; err != nil {
2019-05-22 21:23:23 +00:00
// Close the connection instead of returning it to the pool,
// since it may be broken.
_ = bc . Close ( )
2022-05-31 23:31:40 +00:00
return fmt . Errorf ( "cannot send funcName=%q to the server: %w" , funcName , err )
2019-05-22 21:23:23 +00:00
}
2022-05-31 23:31:40 +00:00
// Send query trace flag
traceEnabled := qt . Enabled ( )
if err := writeBool ( bc , traceEnabled ) ; err != nil {
// Close the connection instead of returning it to the pool,
// since it may be broken.
_ = bc . Close ( )
return fmt . Errorf ( "cannot send traceEnabled=%v for funcName=%q to the server: %w" , traceEnabled , funcName , err )
}
2020-07-23 17:42:57 +00:00
// Send the remaining timeout instead of deadline to remote server, since it may have different time.
2020-09-16 18:03:51 +00:00
timeoutSecs := uint32 ( timeout . Seconds ( ) + 1 )
if err := writeUint32 ( bc , timeoutSecs ) ; err != nil {
2020-07-23 17:42:57 +00:00
// Close the connection instead of returning it to the pool,
// since it may be broken.
_ = bc . Close ( )
2022-05-31 23:31:40 +00:00
return fmt . Errorf ( "cannot send timeout=%d for funcName=%q to the server: %w" , timeout , funcName , err )
2020-07-23 17:42:57 +00:00
}
2022-05-31 23:31:40 +00:00
// Execute the rpc function.
2019-05-22 21:23:23 +00:00
if err := f ( bc ) ; err != nil {
remoteAddr := bc . RemoteAddr ( )
2020-06-30 21:58:26 +00:00
var er * errRemote
if errors . As ( err , & er ) {
2019-05-22 21:23:23 +00:00
// Remote error. The connection may be re-used. Return it to the pool.
2022-06-01 11:35:00 +00:00
_ = readTrace ( qt , bc )
2019-05-22 21:23:23 +00:00
sn . connPool . Put ( bc )
} else {
// Local error.
// Close the connection instead of returning it to the pool,
// since it may be broken.
_ = bc . Close ( )
}
2022-08-07 21:20:37 +00:00
if deadline . Exceeded ( ) || errors . Is ( err , os . ErrDeadlineExceeded ) {
2022-05-31 23:31:40 +00:00
return fmt . Errorf ( "cannot execute funcName=%q on vmstorage %q with timeout %s: %w" , funcName , remoteAddr , deadline . String ( ) , err )
2022-02-21 19:15:02 +00:00
}
2022-05-31 23:31:40 +00:00
return fmt . Errorf ( "cannot execute funcName=%q on vmstorage %q: %w" , funcName , remoteAddr , err )
}
// Read trace from the response
2022-06-01 11:35:00 +00:00
if err := readTrace ( qt , bc ) ; err != nil {
2022-05-31 23:31:40 +00:00
// Close the connection instead of returning it to the pool,
// since it may be broken.
_ = bc . Close ( )
2022-06-01 11:35:00 +00:00
return err
}
// Return the connection back to the pool, assuming it is healthy.
sn . connPool . Put ( bc )
return nil
}
func readTrace ( qt * querytracer . Tracer , bc * handshake . BufferedConn ) error {
bb := traceJSONBufPool . Get ( )
var err error
bb . B , err = readBytes ( bb . B [ : 0 ] , bc , maxTraceJSONSize )
if err != nil {
return fmt . Errorf ( "cannot read trace from the server: %w" , err )
2022-05-31 23:31:40 +00:00
}
if err := qt . AddJSON ( bb . B ) ; err != nil {
2022-06-01 11:35:00 +00:00
return fmt . Errorf ( "cannot parse trace read from the server: %w" , err )
2019-05-22 21:23:23 +00:00
}
2022-05-31 23:31:40 +00:00
traceJSONBufPool . Put ( bb )
2019-05-22 21:23:23 +00:00
return nil
}
2022-05-31 23:31:40 +00:00
var traceJSONBufPool bytesutil . ByteBufferPool
const maxTraceJSONSize = 1024 * 1024
2019-05-22 21:23:23 +00:00
type errRemote struct {
msg string
}
func ( er * errRemote ) Error ( ) string {
return er . msg
}
2020-06-30 21:58:26 +00:00
func newErrRemote ( buf [ ] byte ) error {
err := & errRemote {
msg : string ( buf ) ,
}
if ! strings . Contains ( err . msg , "denyQueriesOutsideRetention" ) {
return err
}
return & httpserver . ErrorWithStatusCode {
Err : err ,
StatusCode : http . StatusServiceUnavailable ,
}
}
2020-11-23 10:33:17 +00:00
func ( sn * storageNode ) registerMetricNamesOnConn ( bc * handshake . BufferedConn , mrs [ ] storage . MetricRow ) error {
// Send the request to sn.
if err := writeUint64 ( bc , uint64 ( len ( mrs ) ) ) ; err != nil {
return fmt . Errorf ( "cannot send metricsCount to conn: %w" , err )
}
for i , mr := range mrs {
if err := writeBytes ( bc , mr . MetricNameRaw ) ; err != nil {
return fmt . Errorf ( "cannot send MetricNameRaw #%d to conn: %w" , i + 1 , err )
}
if err := writeUint64 ( bc , uint64 ( mr . Timestamp ) ) ; err != nil {
return fmt . Errorf ( "cannot send Timestamp #%d to conn: %w" , i + 1 , err )
}
}
if err := bc . Flush ( ) ; err != nil {
return fmt . Errorf ( "cannot flush registerMetricNames request to conn: %w" , err )
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
return fmt . Errorf ( "cannot read error message: %w" , err )
}
if len ( buf ) > 0 {
return newErrRemote ( buf )
}
return nil
}
2022-07-05 20:56:31 +00:00
func ( sn * storageNode ) deleteSeriesOnConn ( bc * handshake . BufferedConn , requestData [ ] byte ) ( int , error ) {
2019-05-22 21:23:23 +00:00
// Send the request to sn
if err := writeBytes ( bc , requestData ) ; err != nil {
2022-07-05 20:56:31 +00:00
return 0 , fmt . Errorf ( "cannot send deleteSeries request to conn: %w" , err )
2019-05-22 21:23:23 +00:00
}
if err := bc . Flush ( ) ; err != nil {
2022-07-05 20:56:31 +00:00
return 0 , fmt . Errorf ( "cannot flush deleteSeries request to conn: %w" , err )
2019-05-22 21:23:23 +00:00
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
2020-06-30 19:58:18 +00:00
return 0 , fmt . Errorf ( "cannot read error message: %w" , err )
2019-05-22 21:23:23 +00:00
}
if len ( buf ) > 0 {
2020-06-30 21:58:26 +00:00
return 0 , newErrRemote ( buf )
2019-05-22 21:23:23 +00:00
}
// Read deletedCount
deletedCount , err := readUint64 ( bc )
if err != nil {
2020-06-30 19:58:18 +00:00
return 0 , fmt . Errorf ( "cannot read deletedCount value: %w" , err )
2019-05-22 21:23:23 +00:00
}
return int ( deletedCount ) , nil
}
2022-06-12 01:32:13 +00:00
const maxLabelNameSize = 16 * 1024 * 1024
2020-11-04 22:15:43 +00:00
2022-06-12 01:32:13 +00:00
func ( sn * storageNode ) getLabelNamesOnConn ( bc * handshake . BufferedConn , requestData [ ] byte , maxLabelNames int ) ( [ ] string , error ) {
2019-05-22 21:23:23 +00:00
// Send the request to sn.
2022-06-12 01:32:13 +00:00
if err := writeBytes ( bc , requestData ) ; err != nil {
return nil , fmt . Errorf ( "cannot write requestData: %w" , err )
2019-05-22 21:23:23 +00:00
}
2022-06-12 01:32:13 +00:00
if err := writeLimit ( bc , maxLabelNames ) ; err != nil {
return nil , fmt . Errorf ( "cannot write maxLabelNames=%d: %w" , maxLabelNames , err )
2022-06-10 06:50:30 +00:00
}
2019-05-22 21:23:23 +00:00
if err := bc . Flush ( ) ; err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot flush request to conn: %w" , err )
2019-05-22 21:23:23 +00:00
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot read error message: %w" , err )
2019-05-22 21:23:23 +00:00
}
if len ( buf ) > 0 {
2020-06-30 21:58:26 +00:00
return nil , newErrRemote ( buf )
2019-05-22 21:23:23 +00:00
}
// Read response
var labels [ ] string
for {
2022-06-12 01:32:13 +00:00
buf , err = readBytes ( buf [ : 0 ] , bc , maxLabelNameSize )
2019-05-22 21:23:23 +00:00
if err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot read labels: %w" , err )
2019-05-22 21:23:23 +00:00
}
if len ( buf ) == 0 {
// Reached the end of the response
return labels , nil
}
labels = append ( labels , string ( buf ) )
}
}
const maxLabelValueSize = 16 * 1024 * 1024
2022-11-25 18:32:45 +00:00
const maxTenantValueSize = 16 * 1024 * 1024 // TODO: calc 'uint32:uint32'
2019-05-22 21:23:23 +00:00
2022-06-12 01:32:13 +00:00
func ( sn * storageNode ) getLabelValuesOnConn ( bc * handshake . BufferedConn , labelName string , requestData [ ] byte , maxLabelValues int ) ( [ ] string , error ) {
2020-11-04 22:15:43 +00:00
// Send the request to sn.
if err := writeBytes ( bc , [ ] byte ( labelName ) ) ; err != nil {
return nil , fmt . Errorf ( "cannot send labelName=%q to conn: %w" , labelName , err )
}
2022-06-12 01:32:13 +00:00
if err := writeBytes ( bc , requestData ) ; err != nil {
return nil , fmt . Errorf ( "cannot write requestData: %w" , err )
2019-05-22 21:23:23 +00:00
}
2022-06-12 01:32:13 +00:00
if err := writeLimit ( bc , maxLabelValues ) ; err != nil {
return nil , fmt . Errorf ( "cannot write maxLabelValues=%d: %w" , maxLabelValues , err )
2022-06-10 06:50:30 +00:00
}
2019-05-22 21:23:23 +00:00
if err := bc . Flush ( ) ; err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot flush labelName to conn: %w" , err )
2019-05-22 21:23:23 +00:00
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot read error message: %w" , err )
2019-05-22 21:23:23 +00:00
}
if len ( buf ) > 0 {
2020-06-30 21:58:26 +00:00
return nil , newErrRemote ( buf )
2019-05-22 21:23:23 +00:00
}
// Read response
2019-06-10 15:55:20 +00:00
labelValues , _ , err := readLabelValues ( buf , bc )
if err != nil {
return nil , err
}
return labelValues , nil
}
func readLabelValues ( buf [ ] byte , bc * handshake . BufferedConn ) ( [ ] string , [ ] byte , error ) {
2019-05-22 21:23:23 +00:00
var labelValues [ ] string
for {
2019-06-10 15:55:20 +00:00
var err error
2019-05-22 21:23:23 +00:00
buf , err = readBytes ( buf [ : 0 ] , bc , maxLabelValueSize )
if err != nil {
2020-06-30 19:58:18 +00:00
return nil , buf , fmt . Errorf ( "cannot read labelValue: %w" , err )
2019-05-22 21:23:23 +00:00
}
if len ( buf ) == 0 {
// Reached the end of the response
2019-06-10 15:55:20 +00:00
return labelValues , buf , nil
2019-05-22 21:23:23 +00:00
}
labelValues = append ( labelValues , string ( buf ) )
}
}
2022-11-25 18:32:45 +00:00
func ( sn * storageNode ) getTenantsOnConn ( bc * handshake . BufferedConn , tr storage . TimeRange ) ( [ ] string , error ) {
if err := writeTimeRange ( bc , tr ) ; err != nil {
return nil , err
}
if err := bc . Flush ( ) ; err != nil {
return nil , fmt . Errorf ( "cannot flush request to conn: %w" , err )
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
return nil , fmt . Errorf ( "cannot read error message: %w" , err )
}
if len ( buf ) > 0 {
return nil , newErrRemote ( buf )
}
// Read response
var tenants [ ] string
for {
var err error
buf , err = readBytes ( buf [ : 0 ] , bc , maxTenantValueSize )
if err != nil {
return nil , fmt . Errorf ( "cannot read tenant #%d: %w" , len ( tenants ) , err )
}
if len ( buf ) == 0 {
// Reached the end of the response
return tenants , nil
}
tenants = append ( tenants , string ( buf ) )
}
}
2020-09-10 21:29:26 +00:00
func ( sn * storageNode ) getTagValueSuffixesOnConn ( bc * handshake . BufferedConn , accountID , projectID uint32 ,
2022-07-05 21:31:41 +00:00
tr storage . TimeRange , tagKey , tagValuePrefix string , delimiter byte , maxSuffixes int ) ( [ ] string , error ) {
2019-06-10 15:55:20 +00:00
// Send the request to sn.
2020-09-10 21:29:26 +00:00
if err := sendAccountIDProjectID ( bc , accountID , projectID ) ; err != nil {
return nil , err
2019-06-10 15:55:20 +00:00
}
2020-11-04 22:15:43 +00:00
if err := writeTimeRange ( bc , tr ) ; err != nil {
return nil , err
2020-09-10 21:29:26 +00:00
}
if err := writeBytes ( bc , [ ] byte ( tagKey ) ) ; err != nil {
return nil , fmt . Errorf ( "cannot send tagKey=%q to conn: %w" , tagKey , err )
}
if err := writeBytes ( bc , [ ] byte ( tagValuePrefix ) ) ; err != nil {
return nil , fmt . Errorf ( "cannot send tagValuePrefix=%q to conn: %w" , tagValuePrefix , err )
}
if err := writeByte ( bc , delimiter ) ; err != nil {
return nil , fmt . Errorf ( "cannot send delimiter=%c to conn: %w" , delimiter , err )
2019-06-10 15:55:20 +00:00
}
2022-07-05 21:31:41 +00:00
if err := writeLimit ( bc , maxSuffixes ) ; err != nil {
return nil , fmt . Errorf ( "cannot send maxSuffixes=%d to conn: %w" , maxSuffixes , err )
}
2019-06-10 15:55:20 +00:00
if err := bc . Flush ( ) ; err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot flush request to conn: %w" , err )
2019-06-10 15:55:20 +00:00
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot read error message: %w" , err )
2019-06-10 15:55:20 +00:00
}
if len ( buf ) > 0 {
2020-06-30 21:58:26 +00:00
return nil , newErrRemote ( buf )
2019-06-10 15:55:20 +00:00
}
2020-09-10 21:29:26 +00:00
// Read response.
// The response may contain empty suffix, so it is prepended with the number of the following suffixes.
suffixesCount , err := readUint64 ( bc )
if err != nil {
return nil , fmt . Errorf ( "cannot read the number of tag value suffixes: %w" , err )
}
suffixes := make ( [ ] string , 0 , suffixesCount )
for i := 0 ; i < int ( suffixesCount ) ; i ++ {
buf , err = readBytes ( buf [ : 0 ] , bc , maxLabelValueSize )
if err != nil {
return nil , fmt . Errorf ( "cannot read tag value suffix #%d: %w" , i + 1 , err )
}
suffixes = append ( suffixes , string ( buf ) )
}
return suffixes , nil
}
2022-06-14 14:46:16 +00:00
func ( sn * storageNode ) getTSDBStatusOnConn ( bc * handshake . BufferedConn , requestData [ ] byte , focusLabel string , topN int ) ( * storage . TSDBStatus , error ) {
2021-05-12 12:18:45 +00:00
// Send the request to sn.
if err := writeBytes ( bc , requestData ) ; err != nil {
return nil , fmt . Errorf ( "cannot write requestData: %w" , err )
}
2022-06-14 14:46:16 +00:00
if err := writeBytes ( bc , [ ] byte ( focusLabel ) ) ; err != nil {
return nil , fmt . Errorf ( "cannot write focusLabel=%q: %w" , focusLabel , err )
}
2021-05-12 12:18:45 +00:00
// topN shouldn't exceed 32 bits, so send it as uint32.
if err := writeUint32 ( bc , uint32 ( topN ) ) ; err != nil {
return nil , fmt . Errorf ( "cannot send topN=%d to conn: %w" , topN , err )
}
if err := bc . Flush ( ) ; err != nil {
2022-06-14 14:46:16 +00:00
return nil , fmt . Errorf ( "cannot flush tsdbStatus args to conn: %w" , err )
2021-05-12 12:18:45 +00:00
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
return nil , fmt . Errorf ( "cannot read error message: %w" , err )
}
if len ( buf ) > 0 {
return nil , newErrRemote ( buf )
}
// Read response
2022-06-08 16:25:59 +00:00
return readTSDBStatus ( bc )
}
func readTSDBStatus ( bc * handshake . BufferedConn ) ( * storage . TSDBStatus , error ) {
2022-06-16 07:44:29 +00:00
totalSeries , err := readUint64 ( bc )
if err != nil {
return nil , fmt . Errorf ( "cannot read totalSeries: %w" , err )
}
totalLabelValuePairs , err := readUint64 ( bc )
if err != nil {
return nil , fmt . Errorf ( "cannot read totalLabelValuePairs: %w" , err )
}
2021-05-12 12:18:45 +00:00
seriesCountByMetricName , err := readTopHeapEntries ( bc )
if err != nil {
return nil , fmt . Errorf ( "cannot read seriesCountByMetricName: %w" , err )
}
2022-06-16 07:44:29 +00:00
seriesCountByLabelName , err := readTopHeapEntries ( bc )
2021-05-12 12:18:45 +00:00
if err != nil {
2022-06-16 07:44:29 +00:00
return nil , fmt . Errorf ( "cannot read seriesCountByLabelName: %w" , err )
2021-05-12 12:18:45 +00:00
}
2022-06-16 07:44:29 +00:00
seriesCountByFocusLabelValue , err := readTopHeapEntries ( bc )
2021-05-12 12:18:45 +00:00
if err != nil {
2022-06-16 07:44:29 +00:00
return nil , fmt . Errorf ( "cannot read seriesCountByFocusLabelValue: %w" , err )
2021-05-12 12:18:45 +00:00
}
2022-06-16 07:44:29 +00:00
seriesCountByLabelValuePair , err := readTopHeapEntries ( bc )
2022-06-08 16:25:59 +00:00
if err != nil {
2022-06-16 07:44:29 +00:00
return nil , fmt . Errorf ( "cannot read seriesCountByLabelValuePair: %w" , err )
2022-06-08 16:25:59 +00:00
}
2022-06-16 07:44:29 +00:00
labelValueCountByLabelName , err := readTopHeapEntries ( bc )
2022-06-08 16:25:59 +00:00
if err != nil {
2022-06-16 07:44:29 +00:00
return nil , fmt . Errorf ( "cannot read labelValueCountByLabelName: %w" , err )
2022-06-08 16:25:59 +00:00
}
2021-05-12 12:18:45 +00:00
status := & storage . TSDBStatus {
2022-06-16 07:44:29 +00:00
TotalSeries : totalSeries ,
TotalLabelValuePairs : totalLabelValuePairs ,
SeriesCountByMetricName : seriesCountByMetricName ,
SeriesCountByLabelName : seriesCountByLabelName ,
SeriesCountByFocusLabelValue : seriesCountByFocusLabelValue ,
SeriesCountByLabelValuePair : seriesCountByLabelValuePair ,
LabelValueCountByLabelName : labelValueCountByLabelName ,
2021-05-12 12:18:45 +00:00
}
return status , nil
}
2020-04-22 16:57:36 +00:00
func readTopHeapEntries ( bc * handshake . BufferedConn ) ( [ ] storage . TopHeapEntry , error ) {
n , err := readUint64 ( bc )
if err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot read the number of topHeapEntries: %w" , err )
2020-04-22 16:57:36 +00:00
}
var a [ ] storage . TopHeapEntry
var buf [ ] byte
for i := uint64 ( 0 ) ; i < n ; i ++ {
2022-06-12 01:32:13 +00:00
buf , err = readBytes ( buf [ : 0 ] , bc , maxLabelNameSize )
2020-04-22 16:57:36 +00:00
if err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot read label name: %w" , err )
2020-04-22 16:57:36 +00:00
}
count , err := readUint64 ( bc )
if err != nil {
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "cannot read label count: %w" , err )
2020-04-22 16:57:36 +00:00
}
a = append ( a , storage . TopHeapEntry {
Name : string ( buf ) ,
Count : count ,
} )
}
return a , nil
}
2019-05-22 21:23:23 +00:00
func ( sn * storageNode ) getSeriesCountOnConn ( bc * handshake . BufferedConn , accountID , projectID uint32 ) ( uint64 , error ) {
// Send the request to sn.
2020-09-10 21:29:26 +00:00
if err := sendAccountIDProjectID ( bc , accountID , projectID ) ; err != nil {
return 0 , err
2019-05-22 21:23:23 +00:00
}
if err := bc . Flush ( ) ; err != nil {
2020-06-30 19:58:18 +00:00
return 0 , fmt . Errorf ( "cannot flush seriesCount args to conn: %w" , err )
2019-05-22 21:23:23 +00:00
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
2020-06-30 19:58:18 +00:00
return 0 , fmt . Errorf ( "cannot read error message: %w" , err )
2019-05-22 21:23:23 +00:00
}
if len ( buf ) > 0 {
2020-06-30 21:58:26 +00:00
return 0 , newErrRemote ( buf )
2019-05-22 21:23:23 +00:00
}
// Read response
n , err := readUint64 ( bc )
if err != nil {
2020-06-30 19:58:18 +00:00
return 0 , fmt . Errorf ( "cannot read series count: %w" , err )
2019-05-22 21:23:23 +00:00
}
return n , nil
}
// maxMetricBlockSize is the maximum size of serialized MetricBlock.
const maxMetricBlockSize = 1024 * 1024
// maxErrorMessageSize is the maximum size of error message received
// from vmstorage.
const maxErrorMessageSize = 64 * 1024
2022-06-28 14:36:27 +00:00
func ( sn * storageNode ) processSearchMetricNamesOnConn ( bc * handshake . BufferedConn , requestData [ ] byte ) ( [ ] string , error ) {
2020-11-16 08:55:55 +00:00
// Send the requst to sn.
if err := writeBytes ( bc , requestData ) ; err != nil {
return nil , fmt . Errorf ( "cannot write requestData: %w" , err )
}
if err := bc . Flush ( ) ; err != nil {
return nil , fmt . Errorf ( "cannot flush requestData to conn: %w" , err )
}
// Read response error.
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
if err != nil {
return nil , fmt . Errorf ( "cannot read error message: %w" , err )
}
if len ( buf ) > 0 {
return nil , newErrRemote ( buf )
}
// Read metricNames from response.
metricNamesCount , err := readUint64 ( bc )
if err != nil {
return nil , fmt . Errorf ( "cannot read metricNamesCount: %w" , err )
}
2022-06-28 14:36:27 +00:00
metricNames := make ( [ ] string , metricNamesCount )
2020-11-16 08:55:55 +00:00
for i := int64 ( 0 ) ; i < int64 ( metricNamesCount ) ; i ++ {
buf , err = readBytes ( buf [ : 0 ] , bc , maxMetricNameSize )
if err != nil {
return nil , fmt . Errorf ( "cannot read metricName #%d: %w" , i + 1 , err )
}
2022-06-28 14:36:27 +00:00
metricNames [ i ] = string ( buf )
2020-11-16 08:55:55 +00:00
}
return metricNames , nil
}
const maxMetricNameSize = 64 * 1024
2022-08-11 18:37:21 +00:00
func ( sn * storageNode ) processSearchQueryOnConn ( bc * handshake . BufferedConn , requestData [ ] byte ,
2022-10-01 19:05:43 +00:00
processBlock func ( mb * storage . MetricBlock , workerID uint ) error , workerID uint ) error {
2019-05-22 21:23:23 +00:00
// Send the request to sn.
if err := writeBytes ( bc , requestData ) ; err != nil {
2021-02-24 09:43:09 +00:00
return fmt . Errorf ( "cannot write requestData: %w" , err )
2019-05-22 21:23:23 +00:00
}
if err := bc . Flush ( ) ; err != nil {
2021-02-24 09:43:09 +00:00
return fmt . Errorf ( "cannot flush requestData to conn: %w" , err )
2019-05-22 21:23:23 +00:00
}
// Read response error.
2020-11-16 08:55:55 +00:00
buf , err := readBytes ( nil , bc , maxErrorMessageSize )
2019-05-22 21:23:23 +00:00
if err != nil {
2021-02-24 09:43:09 +00:00
return fmt . Errorf ( "cannot read error message: %w" , err )
2019-05-22 21:23:23 +00:00
}
if len ( buf ) > 0 {
2021-02-24 09:43:09 +00:00
return newErrRemote ( buf )
2019-05-22 21:23:23 +00:00
}
// Read response. It may consist of multiple MetricBlocks.
2019-09-28 09:20:50 +00:00
blocksRead := 0
2020-04-27 05:13:41 +00:00
var mb storage . MetricBlock
2019-05-22 21:23:23 +00:00
for {
buf , err = readBytes ( buf [ : 0 ] , bc , maxMetricBlockSize )
if err != nil {
2021-02-24 09:43:09 +00:00
return fmt . Errorf ( "cannot read MetricBlock #%d: %w" , blocksRead , err )
2019-05-22 21:23:23 +00:00
}
if len ( buf ) == 0 {
// Reached the end of the response
2021-02-24 09:43:09 +00:00
return nil
2019-05-22 21:23:23 +00:00
}
tail , err := mb . Unmarshal ( buf )
if err != nil {
2022-07-06 10:19:45 +00:00
return fmt . Errorf ( "cannot unmarshal MetricBlock #%d from %d bytes: %w" , blocksRead , len ( buf ) , err )
2019-05-22 21:23:23 +00:00
}
if len ( tail ) != 0 {
2021-02-24 09:43:09 +00:00
return fmt . Errorf ( "non-empty tail after unmarshaling MetricBlock #%d: (len=%d) %q" , blocksRead , len ( tail ) , tail )
2019-05-22 21:23:23 +00:00
}
2019-09-28 09:20:50 +00:00
blocksRead ++
2019-05-22 21:23:23 +00:00
sn . metricBlocksRead . Inc ( )
sn . metricRowsRead . Add ( mb . Block . RowsCount ( ) )
2022-10-01 19:05:43 +00:00
if err := processBlock ( & mb , workerID ) ; err != nil {
2021-02-24 09:43:09 +00:00
return fmt . Errorf ( "cannot process MetricBlock #%d: %w" , blocksRead , err )
2019-09-28 09:20:50 +00:00
}
2019-05-22 21:23:23 +00:00
}
}
2020-11-04 22:15:43 +00:00
func writeTimeRange ( bc * handshake . BufferedConn , tr storage . TimeRange ) error {
if err := writeUint64 ( bc , uint64 ( tr . MinTimestamp ) ) ; err != nil {
return fmt . Errorf ( "cannot send minTimestamp=%d to conn: %w" , tr . MinTimestamp , err )
}
if err := writeUint64 ( bc , uint64 ( tr . MaxTimestamp ) ) ; err != nil {
return fmt . Errorf ( "cannot send maxTimestamp=%d to conn: %w" , tr . MaxTimestamp , err )
}
return nil
}
2022-06-10 06:50:30 +00:00
func writeLimit ( bc * handshake . BufferedConn , limit int ) error {
if limit < 0 {
limit = 0
}
if limit > 1 << 31 - 1 {
limit = 1 << 31 - 1
}
limitU32 := uint32 ( limit )
if err := writeUint32 ( bc , limitU32 ) ; err != nil {
return fmt . Errorf ( "cannot write limit=%d to conn: %w" , limitU32 , err )
}
return nil
}
2019-05-22 21:23:23 +00:00
func writeBytes ( bc * handshake . BufferedConn , buf [ ] byte ) error {
sizeBuf := encoding . MarshalUint64 ( nil , uint64 ( len ( buf ) ) )
if _ , err := bc . Write ( sizeBuf ) ; err != nil {
return err
}
2020-09-10 21:29:26 +00:00
_ , err := bc . Write ( buf )
return err
2019-05-22 21:23:23 +00:00
}
func writeUint32 ( bc * handshake . BufferedConn , n uint32 ) error {
buf := encoding . MarshalUint32 ( nil , n )
2020-09-10 21:29:26 +00:00
_ , err := bc . Write ( buf )
return err
}
func writeUint64 ( bc * handshake . BufferedConn , n uint64 ) error {
buf := encoding . MarshalUint64 ( nil , n )
_ , err := bc . Write ( buf )
return err
2019-05-22 21:23:23 +00:00
}
2019-08-04 19:15:33 +00:00
func writeBool ( bc * handshake . BufferedConn , b bool ) error {
var buf [ 1 ] byte
if b {
buf [ 0 ] = 1
}
2020-09-10 21:29:26 +00:00
_ , err := bc . Write ( buf [ : ] )
return err
}
func writeByte ( bc * handshake . BufferedConn , b byte ) error {
var buf [ 1 ] byte
buf [ 0 ] = b
_ , err := bc . Write ( buf [ : ] )
return err
}
func sendAccountIDProjectID ( bc * handshake . BufferedConn , accountID , projectID uint32 ) error {
if err := writeUint32 ( bc , accountID ) ; err != nil {
return fmt . Errorf ( "cannot send accountID=%d to conn: %w" , accountID , err )
}
if err := writeUint32 ( bc , projectID ) ; err != nil {
return fmt . Errorf ( "cannot send projectID=%d to conn: %w" , projectID , err )
2019-08-04 19:15:33 +00:00
}
return nil
}
2019-05-22 21:23:23 +00:00
func readBytes ( buf [ ] byte , bc * handshake . BufferedConn , maxDataSize int ) ( [ ] byte , error ) {
2022-02-01 15:48:25 +00:00
buf = bytesutil . ResizeNoCopyMayOverallocate ( buf , 8 )
2019-12-24 12:40:04 +00:00
if n , err := io . ReadFull ( bc , buf ) ; err != nil {
2020-06-30 19:58:18 +00:00
return buf , fmt . Errorf ( "cannot read %d bytes with data size: %w; read only %d bytes" , len ( buf ) , err , n )
2019-05-22 21:23:23 +00:00
}
dataSize := encoding . UnmarshalUint64 ( buf )
if dataSize > uint64 ( maxDataSize ) {
return buf , fmt . Errorf ( "too big data size: %d; it mustn't exceed %d bytes" , dataSize , maxDataSize )
}
2022-02-01 15:48:25 +00:00
buf = bytesutil . ResizeNoCopyMayOverallocate ( buf , int ( dataSize ) )
2019-05-22 21:23:23 +00:00
if dataSize == 0 {
return buf , nil
}
2019-09-11 11:11:37 +00:00
if n , err := io . ReadFull ( bc , buf ) ; err != nil {
2020-06-30 19:58:18 +00:00
return buf , fmt . Errorf ( "cannot read data with size %d: %w; read only %d bytes" , dataSize , err , n )
2019-05-22 21:23:23 +00:00
}
return buf , nil
}
func readUint64 ( bc * handshake . BufferedConn ) ( uint64 , error ) {
var buf [ 8 ] byte
if _ , err := io . ReadFull ( bc , buf [ : ] ) ; err != nil {
2020-06-30 19:58:18 +00:00
return 0 , fmt . Errorf ( "cannot read uint64: %w" , err )
2019-05-22 21:23:23 +00:00
}
n := encoding . UnmarshalUint64 ( buf [ : ] )
return n , nil
}
2022-10-25 11:41:56 +00:00
type storageNodesBucket struct {
ms * metrics . Set
sns [ ] * storageNode
}
2023-07-20 00:37:49 +00:00
var storageNodes atomic . Pointer [ storageNodesBucket ]
2019-05-22 21:23:23 +00:00
2022-10-28 08:36:28 +00:00
func getStorageNodesBucket ( ) * storageNodesBucket {
2023-07-20 00:37:49 +00:00
return storageNodes . Load ( )
2022-10-28 08:36:28 +00:00
}
func setStorageNodesBucket ( snb * storageNodesBucket ) {
storageNodes . Store ( snb )
}
func getStorageNodes ( ) [ ] * storageNode {
snb := getStorageNodesBucket ( )
return snb . sns
}
2022-10-25 11:41:56 +00:00
// Init initializes storage nodes' connections to the given addrs.
//
// MustStop must be called when the initialized connections are no longer needed.
func Init ( addrs [ ] string ) {
2022-10-28 08:36:28 +00:00
snb := initStorageNodes ( addrs )
setStorageNodesBucket ( snb )
}
// MustStop gracefully stops netstorage.
func MustStop ( ) {
snb := getStorageNodesBucket ( )
mustStopStorageNodes ( snb )
}
func initStorageNodes ( addrs [ ] string ) * storageNodesBucket {
2019-05-22 21:23:23 +00:00
if len ( addrs ) == 0 {
logger . Panicf ( "BUG: addrs must be non-empty" )
}
2023-12-12 22:06:30 +00:00
groupsMap := initStorageNodeGroups ( addrs )
2023-05-30 11:04:31 +00:00
var snsLock sync . Mutex
2022-10-25 11:41:56 +00:00
sns := make ( [ ] * storageNode , 0 , len ( addrs ) )
2023-05-30 11:04:31 +00:00
var wg sync . WaitGroup
2022-10-25 11:41:56 +00:00
ms := metrics . NewSet ( )
2023-07-06 03:35:44 +00:00
// initialize connections to storage nodes in parallel in order speed up the initialization
// for big number of storage nodes.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4364
2019-05-22 21:23:23 +00:00
for _ , addr := range addrs {
2023-12-12 22:06:30 +00:00
var groupName string
groupName , addr = netutil . ParseGroupAddr ( addr )
group := groupsMap [ groupName ]
2023-05-30 11:04:31 +00:00
wg . Add ( 1 )
go func ( addr string ) {
defer wg . Done ( )
2023-12-12 22:06:30 +00:00
sn := newStorageNode ( ms , group , addr )
2023-05-30 11:04:31 +00:00
snsLock . Lock ( )
sns = append ( sns , sn )
snsLock . Unlock ( )
} ( addr )
2022-10-25 11:41:56 +00:00
}
2023-05-30 11:04:31 +00:00
wg . Wait ( )
2022-10-25 11:41:56 +00:00
metrics . RegisterSet ( ms )
2022-10-28 08:36:28 +00:00
return & storageNodesBucket {
2022-10-25 11:41:56 +00:00
sns : sns ,
ms : ms ,
2022-10-28 08:36:28 +00:00
}
2019-05-22 21:23:23 +00:00
}
2023-12-12 22:06:30 +00:00
func newStorageNode ( ms * metrics . Set , group * storageNodesGroup , addr string ) * storageNode {
2023-07-06 03:35:44 +00:00
if _ , _ , err := net . SplitHostPort ( addr ) ; err != nil {
// Automatically add missing port.
addr += ":8401"
}
// There is no need in requests compression, since vmselect requests are usually very small.
2023-08-29 09:46:39 +00:00
connPool := netutil . NewConnPool ( ms , "vmselect" , addr , handshake . VMSelectClient , 0 , * vmstorageDialTimeout , * vmstorageUserTimeout )
2023-07-06 03:35:44 +00:00
sn := & storageNode {
2023-12-12 22:06:30 +00:00
group : group ,
2023-07-06 03:35:44 +00:00
connPool : connPool ,
concurrentQueries : ms . NewCounter ( fmt . Sprintf ( ` vm_concurrent_queries { name="vmselect", addr=%q} ` , addr ) ) ,
registerMetricNamesRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="registerMetricNames", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
registerMetricNamesErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="registerMetricNames", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
deleteSeriesRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="deleteSeries", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
deleteSeriesErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="deleteSeries", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
labelNamesRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="labelNames", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
labelNamesErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="labelNames", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
labelValuesRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="labelValues", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
labelValuesErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="labelValues", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
tagValueSuffixesRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="tagValueSuffixes", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
tagValueSuffixesErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="tagValueSuffixes", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
tsdbStatusRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="tsdbStatus", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
tsdbStatusErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="tsdbStatus", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
seriesCountRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="seriesCount", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
seriesCountErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="seriesCount", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
searchMetricNamesRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="searchMetricNames", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
searchMetricNamesErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="searchMetricNames", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
searchRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="search", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
searchErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="search", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
tenantsRequests : ms . NewCounter ( fmt . Sprintf ( ` vm_requests_total { action="tenants", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
tenantsErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_request_errors_total { action="tenants", type="rpcClient", name="vmselect", addr=%q} ` , addr ) ) ,
metricBlocksRead : ms . NewCounter ( fmt . Sprintf ( ` vm_metric_blocks_read_total { name="vmselect", addr=%q} ` , addr ) ) ,
metricRowsRead : ms . NewCounter ( fmt . Sprintf ( ` vm_metric_rows_read_total { name="vmselect", addr=%q} ` , addr ) ) ,
}
return sn
}
2022-10-28 08:36:28 +00:00
func mustStopStorageNodes ( snb * storageNodesBucket ) {
2022-10-25 11:41:56 +00:00
for _ , sn := range snb . sns {
sn . connPool . MustStop ( )
}
metrics . UnregisterSet ( snb . ms )
snb . ms . UnregisterAllMetrics ( )
2019-05-22 21:16:55 +00:00
}
2019-05-22 21:23:23 +00:00
var (
2022-06-24 10:29:34 +00:00
partialLabelNamesResults = metrics . NewCounter ( ` vm_partial_results_total { action="labelNames", name="vmselect"} ` )
partialLabelValuesResults = metrics . NewCounter ( ` vm_partial_results_total { action="labelValues", name="vmselect"} ` )
partialTagValueSuffixesResults = metrics . NewCounter ( ` vm_partial_results_total { action="tagValueSuffixes", name="vmselect"} ` )
partialTSDBStatusResults = metrics . NewCounter ( ` vm_partial_results_total { action="tsdbStatus", name="vmselect"} ` )
partialSeriesCountResults = metrics . NewCounter ( ` vm_partial_results_total { action="seriesCount", name="vmselect"} ` )
partialSearchMetricNamesResults = metrics . NewCounter ( ` vm_partial_results_total { action="searchMetricNames", name="vmselect"} ` )
partialSearchResults = metrics . NewCounter ( ` vm_partial_results_total { action="search", name="vmselect"} ` )
2019-05-22 21:23:23 +00:00
)
2020-11-16 01:58:12 +00:00
func applyGraphiteRegexpFilter ( filter string , ss [ ] string ) ( [ ] string , error ) {
// Anchor filter regexp to the beginning of the string as Graphite does.
// See https://github.com/graphite-project/graphite-web/blob/3ad279df5cb90b211953e39161df416e54a84948/webapp/graphite/tags/localdatabase.py#L157
filter = "^(?:" + filter + ")"
2023-01-10 05:43:04 +00:00
re , err := metricsql . CompileRegexp ( filter )
2020-11-16 01:58:12 +00:00
if err != nil {
return nil , fmt . Errorf ( "cannot parse regexp filter=%q: %w" , filter , err )
}
dst := ss [ : 0 ]
for _ , s := range ss {
if re . MatchString ( s ) {
dst = append ( dst , s )
}
}
return dst , nil
}
2022-08-11 20:22:53 +00:00
type uint64WithPadding struct {
n uint64
2022-08-21 21:32:28 +00:00
// The padding prevents false sharing on widespread platforms with
2022-08-11 20:22:53 +00:00
// 128 mod (cache line size) = 0 .
2022-08-21 21:32:28 +00:00
_ [ 128 - unsafe . Sizeof ( uint64 ( 0 ) ) % 128 ] byte
2022-08-11 20:22:53 +00:00
}
type perNodeCounter struct {
ns [ ] uint64WithPadding
}
2022-10-25 11:41:56 +00:00
func newPerNodeCounter ( sns [ ] * storageNode ) * perNodeCounter {
2022-08-11 20:22:53 +00:00
return & perNodeCounter {
2022-10-25 11:41:56 +00:00
ns : make ( [ ] uint64WithPadding , len ( sns ) ) ,
2022-08-11 20:22:53 +00:00
}
}
2022-10-01 19:05:43 +00:00
func ( pnc * perNodeCounter ) Add ( nodeIdx uint , n uint64 ) uint64 {
2022-08-11 20:22:53 +00:00
return atomic . AddUint64 ( & pnc . ns [ nodeIdx ] . n , n )
}
func ( pnc * perNodeCounter ) GetTotal ( ) uint64 {
var total uint64
for _ , n := range pnc . ns {
total += n . n
}
return total
}