2019-05-22 21:23:23 +00:00
package netstorage
import (
2021-10-08 09:52:56 +00:00
"errors"
2019-05-24 09:51:07 +00:00
"flag"
2019-05-22 21:23:23 +00:00
"fmt"
2020-04-27 06:32:08 +00:00
"io"
2021-09-15 15:04:28 +00:00
"net"
2019-05-22 21:23:23 +00:00
"sync"
2020-05-27 12:07:16 +00:00
"sync/atomic"
2019-05-22 21:23:23 +00:00
"time"
2020-05-27 12:07:16 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
2019-06-08 19:29:25 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consts"
2019-05-22 21:23:23 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
2020-06-01 11:33:29 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2019-05-22 21:23:23 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2019-06-08 19:29:25 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
2019-05-22 21:23:23 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
2019-06-08 19:29:25 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
2020-09-28 18:35:40 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
2024-01-22 16:12:37 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
2019-05-22 21:23:23 +00:00
"github.com/VictoriaMetrics/metrics"
2022-06-21 17:27:05 +00:00
"github.com/cespare/xxhash/v2"
2019-05-22 21:23:23 +00:00
)
2020-05-28 16:57:05 +00:00
var (
2024-01-23 00:05:37 +00:00
disableRPCCompression = flag . Bool ( "rpc.disableCompression" , false , "Whether to disable compression for the data sent from vminsert to vmstorage. This reduces CPU usage at the cost of higher network bandwidth usage" )
2020-05-28 16:57:05 +00:00
replicationFactor = flag . Int ( "replicationFactor" , 1 , "Replication factor for the ingested data, i.e. how many copies to make among distinct -storageNode instances. " +
"Note that vmselect must run with -dedup.minScrapeInterval=1ms for data de-duplication when replicationFactor is greater than 1. " +
"Higher values for -dedup.minScrapeInterval at vmselect is OK" )
2024-02-05 13:17:03 +00:00
disableRerouting = flag . Bool ( "disableRerouting" , true , "Whether to disable re-routing when some of vmstorage nodes accept incoming data at slower speed compared to other storage nodes. Disabled re-routing limits the ingestion rate by the slowest vmstorage node. On the other side, disabled re-routing minimizes the number of active time series in the cluster during rolling restarts and during spikes in series churn rate. See also -disableReroutingOnUnavailable and -dropSamplesOnOverload" )
2023-08-10 10:18:29 +00:00
dropSamplesOnOverload = flag . Bool ( "dropSamplesOnOverload" , false , "Whether to drop incoming samples if the destination vmstorage node is overloaded and/or unavailable. This prioritizes cluster availability over consistency, e.g. the cluster continues accepting all the ingested samples, but some of them may be dropped if vmstorage nodes are temporarily unavailable and/or overloaded. The drop of samples happens before the replication, so it's not recommended to use this flag with -replicationFactor enabled." )
2023-08-29 10:09:24 +00:00
vmstorageDialTimeout = flag . Duration ( "vmstorageDialTimeout" , 3 * time . Second , "Timeout for establishing RPC connections from vminsert to vmstorage. " +
"See also -vmstorageUserTimeout" )
vmstorageUserTimeout = flag . Duration ( "vmstorageUserTimeout" , 3 * time . Second , "Network timeout for RPC connections from vminsert to vmstorage (Linux only). " +
"Lower values speed up re-rerouting recovery when some of vmstorage nodes become unavailable because of networking issues. " +
"Read more about TCP_USER_TIMEOUT at https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/ . " +
"See also -vmstorageDialTimeout" )
2024-02-05 12:46:57 +00:00
disableReroutingOnUnavailable = flag . Bool ( "disableReroutingOnUnavailable" , false , "Whether to disable re-routing when some of vmstorage nodes are unavailable. " +
"Disabled re-routing stops ingestion when some storage nodes are unavailable. " +
"On the other side, disabled re-routing minimizes the number of active time series in the cluster " +
2024-02-05 13:17:03 +00:00
"during rolling restarts and during spikes in series churn rate. " +
"See also -disableRerouting" )
2020-05-28 16:57:05 +00:00
)
2019-05-24 09:51:07 +00:00
2021-10-08 10:52:56 +00:00
var errStorageReadOnly = errors . New ( "storage node is read only" )
2021-10-08 09:52:56 +00:00
2022-02-06 18:20:02 +00:00
func ( sn * storageNode ) isReady ( ) bool {
2024-02-24 00:44:19 +00:00
return ! sn . broken . Load ( ) && ! sn . isReadOnly . Load ( )
2020-05-27 12:07:16 +00:00
}
// push pushes buf to sn internal bufs.
2019-05-22 21:23:23 +00:00
//
2020-05-27 12:07:16 +00:00
// This function doesn't block on fast path.
2022-10-25 11:41:56 +00:00
// It may block only if storage nodes cannot handle the incoming ingestion rate.
2020-05-27 12:07:16 +00:00
// This blocking provides backpressure to the caller.
//
// The function falls back to sending data to other vmstorage nodes
// if sn is currently unavailable or overloaded.
2019-06-08 19:29:25 +00:00
//
2022-02-06 18:20:02 +00:00
// rows must match the number of rows in the buf.
2022-11-09 09:36:38 +00:00
func ( sn * storageNode ) push ( snb * storageNodesBucket , buf [ ] byte , rows int ) error {
2020-05-24 22:39:24 +00:00
if len ( buf ) > maxBufSizePerStorageNode {
logger . Panicf ( "BUG: len(buf)=%d cannot exceed %d" , len ( buf ) , maxBufSizePerStorageNode )
2019-06-08 19:29:25 +00:00
}
sn . rowsPushed . Add ( rows )
2022-02-06 18:20:02 +00:00
if sn . trySendBuf ( buf , rows ) {
// Fast path - the buffer is successfully sent to sn.
return nil
}
2024-02-24 00:44:19 +00:00
if * dropSamplesOnOverload && ! sn . isReadOnly . Load ( ) {
2022-02-06 18:21:40 +00:00
sn . rowsDroppedOnOverload . Add ( rows )
2022-06-27 09:31:16 +00:00
dropSamplesOnOverloadLogger . Warnf ( "some rows dropped, because -dropSamplesOnOverload is set and vmstorage %s cannot accept new rows now. " +
"See vm_rpc_rows_dropped_on_overload_total metric at /metrics page" , sn . dialer . Addr ( ) )
2022-02-06 18:21:40 +00:00
return nil
}
2022-02-06 18:20:02 +00:00
// Slow path - sn cannot accept buf now, so re-route it to other vmstorage nodes.
2022-11-09 09:36:38 +00:00
if err := sn . rerouteBufToOtherStorageNodes ( snb , buf , rows ) ; err != nil {
2022-02-06 18:20:02 +00:00
return fmt . Errorf ( "error when re-routing rows from %s: %w" , sn . dialer . Addr ( ) , err )
}
return nil
}
2019-06-08 19:29:25 +00:00
2022-06-27 09:31:16 +00:00
var dropSamplesOnOverloadLogger = logger . WithThrottler ( "droppedSamplesOnOverload" , 5 * time . Second )
2022-11-09 09:36:38 +00:00
func ( sn * storageNode ) rerouteBufToOtherStorageNodes ( snb * storageNodesBucket , buf [ ] byte , rows int ) error {
sns := snb . sns
2021-06-04 01:33:49 +00:00
sn . brLock . Lock ( )
again :
select {
2022-10-25 11:41:56 +00:00
case <- sn . stopCh :
2021-06-04 01:33:49 +00:00
sn . brLock . Unlock ( )
return fmt . Errorf ( "cannot send %d rows because of graceful shutdown" , rows )
default :
}
2024-02-27 12:15:15 +00:00
2022-02-06 18:20:02 +00:00
if ! sn . isReady ( ) {
2022-10-25 11:41:56 +00:00
if len ( sns ) == 1 {
2021-06-04 01:33:49 +00:00
// There are no other storage nodes to re-route to. So wait until the current node becomes healthy.
sn . brCond . Wait ( )
goto again
}
2024-02-05 12:46:57 +00:00
if * disableReroutingOnUnavailable {
2024-02-27 12:15:15 +00:00
// We should not send timeseries from currently unavailable storage to alive storage nodes.
2024-02-05 12:46:57 +00:00
sn . brCond . Wait ( )
goto again
}
2021-06-04 01:33:49 +00:00
sn . brLock . Unlock ( )
2024-02-27 12:15:15 +00:00
2022-02-06 18:20:02 +00:00
// The vmstorage node isn't ready for data processing. Re-route buf to healthy vmstorage nodes even if disableRerouting is set.
2022-11-09 09:36:38 +00:00
rowsProcessed , err := rerouteRowsToReadyStorageNodes ( snb , sn , buf )
2022-02-06 18:20:02 +00:00
rows -= rowsProcessed
if err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "%d rows dropped because the current vsmtorage is unavailable and %w" , rows , err )
2019-06-08 19:29:25 +00:00
}
2019-05-22 21:23:23 +00:00
return nil
}
2024-02-27 12:15:15 +00:00
2020-05-27 12:07:16 +00:00
if len ( sn . br . buf ) + len ( buf ) <= maxBufSizePerStorageNode {
2019-06-08 19:29:25 +00:00
// Fast path: the buf contents fits sn.buf.
2020-05-27 12:07:16 +00:00
sn . br . buf = append ( sn . br . buf , buf ... )
sn . br . rows += rows
sn . brLock . Unlock ( )
2019-06-08 19:29:25 +00:00
return nil
2019-05-22 21:23:23 +00:00
}
2022-02-06 18:20:02 +00:00
// Slow path: the buf contents doesn't fit sn.buf, so try re-routing it to other vmstorage nodes.
2022-10-25 11:41:56 +00:00
if * disableRerouting || len ( sns ) == 1 {
2021-06-04 01:33:49 +00:00
sn . brCond . Wait ( )
goto again
}
2020-05-27 12:07:16 +00:00
sn . brLock . Unlock ( )
2022-11-09 09:36:38 +00:00
rowsProcessed , err := rerouteRowsToFreeStorageNodes ( snb , sn , buf )
2022-02-06 18:20:02 +00:00
rows -= rowsProcessed
if err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "%d rows dropped because the current vmstorage buf is full and %w" , rows , err )
2019-06-08 19:29:25 +00:00
}
return nil
}
2022-11-09 09:36:38 +00:00
func ( sn * storageNode ) run ( snb * storageNodesBucket , snIdx int ) {
2020-05-28 16:57:05 +00:00
replicas := * replicationFactor
if replicas <= 0 {
replicas = 1
}
2022-11-09 09:36:38 +00:00
sns := snb . sns
2022-10-25 11:41:56 +00:00
if replicas > len ( sns ) {
replicas = len ( sns )
2020-05-28 16:57:05 +00:00
}
2021-10-08 10:52:56 +00:00
sn . readOnlyCheckerWG . Add ( 1 )
go func ( ) {
defer sn . readOnlyCheckerWG . Done ( )
2022-10-25 11:41:56 +00:00
sn . readOnlyChecker ( )
2021-10-08 10:52:56 +00:00
} ( )
defer sn . readOnlyCheckerWG . Wait ( )
2024-01-22 16:12:37 +00:00
d := timeutil . AddJitterToDuration ( time . Millisecond * 200 )
ticker := time . NewTicker ( d )
2020-05-27 12:07:16 +00:00
defer ticker . Stop ( )
var br bufRows
2020-06-01 11:33:29 +00:00
brLastResetTime := fasttime . UnixTimestamp ( )
2020-05-27 12:07:16 +00:00
mustStop := false
for ! mustStop {
sn . brLock . Lock ( )
2024-02-27 12:15:15 +00:00
waitForNewData := len ( sn . br . buf ) == 0
2020-05-27 12:07:16 +00:00
sn . brLock . Unlock ( )
2024-02-27 12:15:15 +00:00
if waitForNewData {
select {
case <- sn . stopCh :
mustStop = true
// Make sure the br.buf is flushed last time before returning
// in order to send the remaining bits of data.
case <- ticker . C :
}
2020-05-27 12:07:16 +00:00
}
2024-02-27 12:15:15 +00:00
2020-09-28 18:35:40 +00:00
sn . brLock . Lock ( )
sn . br , br = br , sn . br
2021-06-04 01:33:49 +00:00
sn . brCond . Broadcast ( )
2020-09-28 18:35:40 +00:00
sn . brLock . Unlock ( )
2024-02-27 12:15:15 +00:00
2020-06-01 11:33:29 +00:00
currentTime := fasttime . UnixTimestamp ( )
if len ( br . buf ) < cap ( br . buf ) / 4 && currentTime - brLastResetTime > 10 {
// Free up capacity space occupied by br.buf in order to reduce memory usage after spikes.
br . buf = append ( br . buf [ : 0 : 0 ] , br . buf ... )
brLastResetTime = currentTime
}
2020-09-28 18:35:40 +00:00
sn . checkHealth ( )
2020-05-28 16:57:05 +00:00
if len ( br . buf ) == 0 {
2020-09-28 18:35:40 +00:00
// Nothing to send.
2020-05-27 12:07:16 +00:00
continue
}
2022-10-25 11:41:56 +00:00
// Send br to replicas storage nodes starting from snIdx.
2022-11-09 09:36:38 +00:00
for ! sendBufToReplicasNonblocking ( snb , & br , snIdx , replicas ) {
2024-01-22 16:12:37 +00:00
d := timeutil . AddJitterToDuration ( time . Millisecond * 200 )
t := timerpool . Get ( d )
2020-09-28 18:35:40 +00:00
select {
2022-10-25 11:41:56 +00:00
case <- sn . stopCh :
2020-09-28 18:35:40 +00:00
timerpool . Put ( t )
2024-02-27 12:15:15 +00:00
logger . Errorf ( "dropping %d rows on graceful shutdown, since all the vmstorage nodes are unavailable" , br . rows )
2020-09-28 18:35:40 +00:00
return
case <- t . C :
timerpool . Put ( t )
sn . checkHealth ( )
}
2020-05-27 12:07:16 +00:00
}
2020-05-28 16:57:05 +00:00
br . reset ( )
}
}
2022-11-09 09:36:38 +00:00
func sendBufToReplicasNonblocking ( snb * storageNodesBucket , br * bufRows , snIdx , replicas int ) bool {
2022-02-07 13:38:54 +00:00
usedStorageNodes := make ( map [ * storageNode ] struct { } , replicas )
2022-11-09 09:36:38 +00:00
sns := snb . sns
2020-05-28 16:57:05 +00:00
for i := 0 ; i < replicas ; i ++ {
idx := snIdx + i
attempts := 0
for {
attempts ++
2022-10-25 11:41:56 +00:00
if attempts > len ( sns ) {
2020-05-28 16:57:05 +00:00
if i == 0 {
// The data wasn't replicated at all.
2022-06-27 09:31:16 +00:00
cannotReplicateLogger . Warnf ( "cannot push %d bytes with %d rows to storage nodes, since all the nodes are temporarily unavailable; " +
"re-trying to send the data soon" , len ( br . buf ) , br . rows )
2020-05-28 16:57:05 +00:00
return false
}
// The data is partially replicated, so just emit a warning and return true.
// We could retry sending the data again, but this may result in uncontrolled duplicate data.
// So it is better returning true.
rowsIncompletelyReplicatedTotal . Add ( br . rows )
2022-06-27 09:31:16 +00:00
incompleteReplicationLogger . Warnf ( "cannot make a copy #%d out of %d copies according to -replicationFactor=%d for %d bytes with %d rows, " +
"since a part of storage nodes is temporarily unavailable" , i + 1 , replicas , * replicationFactor , len ( br . buf ) , br . rows )
2020-05-28 16:57:05 +00:00
return true
}
2022-10-25 11:41:56 +00:00
if idx >= len ( sns ) {
idx %= len ( sns )
2020-05-28 16:57:05 +00:00
}
2022-10-25 11:41:56 +00:00
sn := sns [ idx ]
2020-05-28 16:57:05 +00:00
idx ++
2022-02-07 13:38:54 +00:00
if _ , ok := usedStorageNodes [ sn ] ; ok {
2020-05-28 16:57:05 +00:00
// The br has been already replicated to sn. Skip it.
continue
}
2020-09-28 18:35:40 +00:00
if ! sn . sendBufRowsNonblocking ( br ) {
2020-05-28 16:57:05 +00:00
// Cannot send data to sn. Go to the next sn.
continue
}
// Successfully sent data to sn.
2022-02-07 13:38:54 +00:00
usedStorageNodes [ sn ] = struct { } { }
2020-05-28 16:57:05 +00:00
break
2020-05-27 12:07:16 +00:00
}
2019-05-22 21:23:23 +00:00
}
2020-05-28 16:57:05 +00:00
return true
}
2022-06-27 09:31:16 +00:00
var (
cannotReplicateLogger = logger . WithThrottler ( "cannotReplicateDataBecauseNoStorageNodes" , 5 * time . Second )
incompleteReplicationLogger = logger . WithThrottler ( "incompleteReplication" , 5 * time . Second )
)
2020-06-18 17:41:33 +00:00
func ( sn * storageNode ) checkHealth ( ) {
sn . bcLock . Lock ( )
defer sn . bcLock . Unlock ( )
if sn . bc != nil {
2020-09-28 18:35:40 +00:00
// The sn looks healthy.
return
2020-06-18 17:41:33 +00:00
}
bc , err := sn . dial ( )
if err != nil {
2024-02-24 00:44:19 +00:00
sn . broken . Store ( true )
2021-06-04 01:33:49 +00:00
sn . brCond . Broadcast ( )
2020-09-28 21:20:01 +00:00
if sn . lastDialErr == nil {
// Log the error only once.
sn . lastDialErr = err
logger . Warnf ( "cannot dial storageNode %q: %s" , sn . dialer . Addr ( ) , err )
}
2020-06-18 17:51:28 +00:00
return
2020-06-18 17:41:33 +00:00
}
2020-09-28 18:35:40 +00:00
logger . Infof ( "successfully dialed -storageNode=%q" , sn . dialer . Addr ( ) )
2020-09-28 21:20:01 +00:00
sn . lastDialErr = nil
2020-06-18 17:41:33 +00:00
sn . bc = bc
2024-02-24 00:44:19 +00:00
sn . broken . Store ( false )
2021-06-04 01:33:49 +00:00
sn . brCond . Broadcast ( )
2020-06-18 17:41:33 +00:00
}
2020-09-28 18:35:40 +00:00
func ( sn * storageNode ) sendBufRowsNonblocking ( br * bufRows ) bool {
2022-02-06 18:20:02 +00:00
if ! sn . isReady ( ) {
2020-09-28 18:35:40 +00:00
return false
}
2024-02-27 12:15:15 +00:00
2020-05-28 16:57:05 +00:00
sn . bcLock . Lock ( )
defer sn . bcLock . Unlock ( )
if sn . bc == nil {
2020-09-28 18:35:40 +00:00
// Do not call sn.dial() here in order to prevent long blocking on sn.bcLock.Lock(),
// which can negatively impact data sending in sendBufToReplicasNonblocking().
2020-11-13 22:43:32 +00:00
// sn.dial() should be called by sn.checkHealth() on unsuccessful call to sendBufToReplicasNonblocking().
2020-09-28 18:35:40 +00:00
return false
2020-05-28 16:57:05 +00:00
}
2021-08-11 08:40:52 +00:00
startTime := time . Now ( )
2020-05-28 16:57:05 +00:00
err := sendToConn ( sn . bc , br . buf )
2021-08-11 08:40:52 +00:00
duration := time . Since ( startTime )
sn . sendDurationSeconds . Add ( duration . Seconds ( ) )
2020-05-28 16:57:05 +00:00
if err == nil {
2020-09-28 18:35:40 +00:00
// Successfully sent buf to bc.
2020-05-28 16:57:05 +00:00
sn . rowsSent . Add ( br . rows )
return true
}
2021-10-08 10:52:56 +00:00
if errors . Is ( err , errStorageReadOnly ) {
// The vmstorage is transitioned to readonly mode.
2024-02-24 00:44:19 +00:00
sn . isReadOnly . Store ( true )
2021-10-08 09:52:56 +00:00
sn . brCond . Broadcast ( )
2021-10-08 10:52:56 +00:00
// Signal the caller that the data wasn't accepted by the vmstorage,
// so it will be re-routed to the remaining vmstorage nodes.
2021-10-08 09:52:56 +00:00
return false
}
2020-05-28 16:57:05 +00:00
// Couldn't flush buf to sn. Mark sn as broken.
2022-06-27 09:31:16 +00:00
cannotSendBufsLogger . Warnf ( "cannot send %d bytes with %d rows to -storageNode=%q: %s; closing the connection to storageNode and " +
"re-routing this data to healthy storage nodes" , len ( br . buf ) , br . rows , sn . dialer . Addr ( ) , err )
2020-05-28 16:57:05 +00:00
if err = sn . bc . Close ( ) ; err != nil {
2022-08-04 15:33:21 +00:00
cannotCloseStorageNodeConnLogger . Warnf ( "cannot close connection to storageNode %q: %s" , sn . dialer . Addr ( ) , err )
2020-05-28 16:57:05 +00:00
}
sn . bc = nil
2024-02-24 00:44:19 +00:00
sn . broken . Store ( true )
2021-06-04 01:33:49 +00:00
sn . brCond . Broadcast ( )
2020-09-28 18:35:40 +00:00
sn . connectionErrors . Inc ( )
2020-05-28 16:57:05 +00:00
return false
2019-05-22 21:23:23 +00:00
}
2022-08-04 15:33:21 +00:00
var cannotCloseStorageNodeConnLogger = logger . WithThrottler ( "cannotCloseStorageNodeConn" , 5 * time . Second )
2022-06-27 09:31:16 +00:00
var cannotSendBufsLogger = logger . WithThrottler ( "cannotSendBufRows" , 5 * time . Second )
2020-05-27 12:07:16 +00:00
func sendToConn ( bc * handshake . BufferedConn , buf [ ] byte ) error {
2023-08-30 14:24:24 +00:00
// if len(buf) == 0, it must be sent to the vmstorage too in order to check for vmstorage health
// See checkReadOnlyMode() and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4870
2020-01-21 16:20:14 +00:00
timeoutSeconds := len ( buf ) / 3e5
2019-09-13 19:25:15 +00:00
if timeoutSeconds < 60 {
timeoutSeconds = 60
2019-09-11 10:37:03 +00:00
}
timeout := time . Duration ( timeoutSeconds ) * time . Second
deadline := time . Now ( ) . Add ( timeout )
2020-04-27 06:32:08 +00:00
if err := bc . SetWriteDeadline ( deadline ) ; err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "cannot set write deadline to %s: %w" , deadline , err )
2019-05-22 21:23:23 +00:00
}
2019-09-11 10:37:03 +00:00
// sizeBuf guarantees that the rows batch will be either fully
// read or fully discarded on the vmstorage side.
// sizeBuf is used for read optimization in vmstorage.
2020-05-27 12:07:16 +00:00
sizeBuf := sizeBufPool . Get ( )
defer sizeBufPool . Put ( sizeBuf )
sizeBuf . B = encoding . MarshalUint64 ( sizeBuf . B [ : 0 ] , uint64 ( len ( buf ) ) )
if _ , err := bc . Write ( sizeBuf . B ) ; err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "cannot write data size %d: %w" , len ( buf ) , err )
2019-05-22 21:23:23 +00:00
}
2020-04-27 06:32:08 +00:00
if _ , err := bc . Write ( buf ) ; err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "cannot write data with size %d: %w" , len ( buf ) , err )
2019-05-22 21:23:23 +00:00
}
2020-04-27 06:32:08 +00:00
if err := bc . Flush ( ) ; err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "cannot flush data with size %d: %w" , len ( buf ) , err )
2019-06-08 19:29:25 +00:00
}
2020-04-27 06:32:08 +00:00
// Wait for `ack` from vmstorage.
// This guarantees that the message has been fully received by vmstorage.
2020-04-28 08:18:41 +00:00
deadline = time . Now ( ) . Add ( timeout )
2020-04-27 06:32:08 +00:00
if err := bc . SetReadDeadline ( deadline ) ; err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "cannot set read deadline for reading `ack` to vmstorage: %w" , err )
2020-04-27 06:32:08 +00:00
}
2020-05-27 12:07:16 +00:00
if _ , err := io . ReadFull ( bc , sizeBuf . B [ : 1 ] ) ; err != nil {
2020-06-30 19:58:18 +00:00
return fmt . Errorf ( "cannot read `ack` from vmstorage: %w" , err )
2020-04-27 06:32:08 +00:00
}
2021-10-08 09:52:56 +00:00
ackResp := sizeBuf . B [ 0 ]
switch ackResp {
case 1 :
2021-10-08 10:52:56 +00:00
// ok response, data successfully accepted by vmstorage
2021-10-08 09:52:56 +00:00
case 2 :
2021-10-08 10:52:56 +00:00
// vmstorage is in readonly mode
return errStorageReadOnly
2021-10-08 09:52:56 +00:00
default :
return fmt . Errorf ( "unexpected `ack` received from vmstorage; got %d; want 1 or 2" , sizeBuf . B [ 0 ] )
2020-04-27 06:32:08 +00:00
}
2021-10-08 09:52:56 +00:00
2019-05-22 21:23:23 +00:00
return nil
}
2020-05-27 12:07:16 +00:00
var sizeBufPool bytesutil . ByteBufferPool
func ( sn * storageNode ) dial ( ) ( * handshake . BufferedConn , error ) {
2019-05-22 21:23:23 +00:00
c , err := sn . dialer . Dial ( )
if err != nil {
sn . dialErrors . Inc ( )
2020-05-27 12:07:16 +00:00
return nil , err
2019-05-22 21:23:23 +00:00
}
compressionLevel := 1
2019-05-24 09:51:07 +00:00
if * disableRPCCompression {
compressionLevel = 0
}
2019-05-22 21:23:23 +00:00
bc , err := handshake . VMInsertClient ( c , compressionLevel )
if err != nil {
_ = c . Close ( )
sn . handshakeErrors . Inc ( )
2020-06-30 19:58:18 +00:00
return nil , fmt . Errorf ( "handshake error: %w" , err )
2019-05-22 21:23:23 +00:00
}
2020-05-27 12:07:16 +00:00
return bc , nil
2019-05-22 21:23:23 +00:00
}
2019-06-08 19:29:25 +00:00
// storageNode is a client sending data to vmstorage node.
2019-05-22 21:23:23 +00:00
type storageNode struct {
2024-02-24 00:44:19 +00:00
// broken is set to true if the given vmstorage node is temporarily unhealthy.
2020-05-27 12:07:16 +00:00
// In this case the data is re-routed to the remaining healthy vmstorage nodes.
2024-02-24 00:44:19 +00:00
broken atomic . Bool
2019-06-08 19:29:25 +00:00
2024-02-24 00:44:19 +00:00
// isReadOnly is set to true if the given vmstorage node is read only
2021-10-08 09:52:56 +00:00
// In this case the data is re-routed to the remaining healthy vmstorage nodes.
2024-02-24 00:44:19 +00:00
isReadOnly atomic . Bool
2021-10-08 09:52:56 +00:00
2020-05-27 12:07:16 +00:00
// brLock protects br.
brLock sync . Mutex
2019-05-22 21:23:23 +00:00
2021-06-04 01:33:49 +00:00
// brCond is used for waiting for free space in br.
brCond * sync . Cond
2020-05-27 12:07:16 +00:00
// Buffer with data that needs to be written to the storage node.
2020-05-28 16:57:05 +00:00
// It must be accessed under brLock.
2020-05-27 12:07:16 +00:00
br bufRows
2019-05-22 21:23:23 +00:00
2020-05-28 16:57:05 +00:00
// bcLock protects bc.
bcLock sync . Mutex
2021-10-08 10:52:56 +00:00
// waitGroup for readOnlyChecker
readOnlyCheckerWG sync . WaitGroup
2020-05-28 16:57:05 +00:00
// bc is a single connection to vmstorage for data transfer.
// It must be accessed under bcLock.
bc * handshake . BufferedConn
2019-06-08 19:29:25 +00:00
dialer * netutil . TCPDialer
2022-10-25 11:41:56 +00:00
stopCh chan struct { }
2020-09-28 21:20:01 +00:00
// last error during dial.
lastDialErr error
2019-06-08 19:29:25 +00:00
// The number of dial errors to vmstorage node.
2019-05-22 21:23:23 +00:00
dialErrors * metrics . Counter
2019-06-08 19:29:25 +00:00
// The number of handshake errors to vmstorage node.
2019-05-22 21:23:23 +00:00
handshakeErrors * metrics . Counter
2019-06-08 19:29:25 +00:00
// The number of connection errors to vmstorage node.
2019-05-22 21:23:23 +00:00
connectionErrors * metrics . Counter
2019-06-08 19:29:25 +00:00
// The number of rows pushed to storageNode with push method.
rowsPushed * metrics . Counter
// The number of rows sent to vmstorage node.
rowsSent * metrics . Counter
2019-05-22 21:23:23 +00:00
2022-02-06 18:21:40 +00:00
// The number of rows dropped on overload if -dropSamplesOnOverload is set.
rowsDroppedOnOverload * metrics . Counter
2019-06-08 19:29:25 +00:00
// The number of rows rerouted from the given vmstorage node
// to healthy nodes when the given node was unhealthy.
rowsReroutedFromHere * metrics . Counter
// The number of rows rerouted to the given vmstorage node
// from other nodes when they were unhealthy.
rowsReroutedToHere * metrics . Counter
2021-08-11 08:40:52 +00:00
// The total duration spent for sending data to vmstorage node.
// This metric is useful for determining the saturation of vminsert->vmstorage link.
sendDurationSeconds * metrics . FloatCounter
2019-05-22 21:23:23 +00:00
}
2022-10-25 11:41:56 +00:00
type storageNodesBucket struct {
2022-11-09 09:36:38 +00:00
ms * metrics . Set
// nodesHash is used for consistently selecting a storage node by key.
nodesHash * consistentHash
// sns is a list of storage nodes.
sns [ ] * storageNode
2022-10-25 11:41:56 +00:00
stopCh chan struct { }
wg * sync . WaitGroup
}
2019-05-22 21:23:23 +00:00
2022-10-25 11:41:56 +00:00
// storageNodes contains a list of vmstorage node clients.
2023-07-20 00:37:49 +00:00
var storageNodes atomic . Pointer [ storageNodesBucket ]
2019-05-22 21:23:23 +00:00
2022-10-28 08:36:28 +00:00
func getStorageNodesBucket ( ) * storageNodesBucket {
2023-07-20 00:37:49 +00:00
return storageNodes . Load ( )
2022-10-28 08:36:28 +00:00
}
func setStorageNodesBucket ( snb * storageNodesBucket ) {
storageNodes . Store ( snb )
}
2022-10-25 11:41:56 +00:00
// Init initializes vmstorage nodes' connections to the given addrs.
2021-10-07 09:21:42 +00:00
//
2022-02-06 18:20:02 +00:00
// hashSeed is used for changing the distribution of input time series among addrs.
2022-10-25 11:41:56 +00:00
//
// Call MustStop when the initialized vmstorage connections are no longer needed.
func Init ( addrs [ ] string , hashSeed uint64 ) {
2022-10-28 08:36:28 +00:00
snb := initStorageNodes ( addrs , hashSeed )
setStorageNodesBucket ( snb )
}
// MustStop stops netstorage.
func MustStop ( ) {
snb := getStorageNodesBucket ( )
mustStopStorageNodes ( snb )
}
func initStorageNodes ( addrs [ ] string , hashSeed uint64 ) * storageNodesBucket {
2019-05-22 21:23:23 +00:00
if len ( addrs ) == 0 {
logger . Panicf ( "BUG: addrs must be non-empty" )
}
2022-10-25 11:41:56 +00:00
ms := metrics . NewSet ( )
2022-11-09 09:36:38 +00:00
nodesHash := newConsistentHash ( addrs , hashSeed )
2022-10-25 11:41:56 +00:00
sns := make ( [ ] * storageNode , 0 , len ( addrs ) )
stopCh := make ( chan struct { } )
2019-05-22 21:23:23 +00:00
for _ , addr := range addrs {
2021-09-15 15:04:28 +00:00
if _ , _ , err := net . SplitHostPort ( addr ) ; err != nil {
// Automatically add missing port.
addr += ":8400"
}
2019-05-22 21:23:23 +00:00
sn := & storageNode {
2023-08-29 09:46:39 +00:00
dialer : netutil . NewTCPDialer ( ms , "vminsert" , addr , * vmstorageDialTimeout , * vmstorageUserTimeout ) ,
2022-10-25 11:41:56 +00:00
stopCh : stopCh ,
dialErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_rpc_dial_errors_total { name="vminsert", addr=%q} ` , addr ) ) ,
handshakeErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_rpc_handshake_errors_total { name="vminsert", addr=%q} ` , addr ) ) ,
connectionErrors : ms . NewCounter ( fmt . Sprintf ( ` vm_rpc_connection_errors_total { name="vminsert", addr=%q} ` , addr ) ) ,
rowsPushed : ms . NewCounter ( fmt . Sprintf ( ` vm_rpc_rows_pushed_total { name="vminsert", addr=%q} ` , addr ) ) ,
rowsSent : ms . NewCounter ( fmt . Sprintf ( ` vm_rpc_rows_sent_total { name="vminsert", addr=%q} ` , addr ) ) ,
rowsDroppedOnOverload : ms . NewCounter ( fmt . Sprintf ( ` vm_rpc_rows_dropped_on_overload_total { name="vminsert", addr=%q} ` , addr ) ) ,
rowsReroutedFromHere : ms . NewCounter ( fmt . Sprintf ( ` vm_rpc_rows_rerouted_from_here_total { name="vminsert", addr=%q} ` , addr ) ) ,
rowsReroutedToHere : ms . NewCounter ( fmt . Sprintf ( ` vm_rpc_rows_rerouted_to_here_total { name="vminsert", addr=%q} ` , addr ) ) ,
sendDurationSeconds : ms . NewFloatCounter ( fmt . Sprintf ( ` vm_rpc_send_duration_seconds_total { name="vminsert", addr=%q} ` , addr ) ) ,
2019-05-22 21:23:23 +00:00
}
2021-06-04 01:33:49 +00:00
sn . brCond = sync . NewCond ( & sn . brLock )
2022-10-25 11:41:56 +00:00
_ = ms . NewGauge ( fmt . Sprintf ( ` vm_rpc_rows_pending { name="vminsert", addr=%q} ` , addr ) , func ( ) float64 {
2020-05-27 12:07:16 +00:00
sn . brLock . Lock ( )
n := sn . br . rows
sn . brLock . Unlock ( )
2019-06-08 19:29:25 +00:00
return float64 ( n )
} )
2022-10-25 11:41:56 +00:00
_ = ms . NewGauge ( fmt . Sprintf ( ` vm_rpc_buf_pending_bytes { name="vminsert", addr=%q} ` , addr ) , func ( ) float64 {
2020-05-27 12:07:16 +00:00
sn . brLock . Lock ( )
n := len ( sn . br . buf )
sn . brLock . Unlock ( )
2019-06-08 19:29:25 +00:00
return float64 ( n )
} )
2022-10-25 11:41:56 +00:00
_ = ms . NewGauge ( fmt . Sprintf ( ` vm_rpc_vmstorage_is_reachable { name="vminsert", addr=%q} ` , addr ) , func ( ) float64 {
2024-02-24 00:44:19 +00:00
if sn . broken . Load ( ) {
2020-11-17 20:13:20 +00:00
return 0
}
return 1
} )
2022-10-25 11:41:56 +00:00
_ = ms . NewGauge ( fmt . Sprintf ( ` vm_rpc_vmstorage_is_read_only { name="vminsert", addr=%q} ` , addr ) , func ( ) float64 {
2024-02-24 00:44:19 +00:00
if sn . isReadOnly . Load ( ) {
return 1
}
return 0
2021-10-08 09:52:56 +00:00
} )
2022-10-25 11:41:56 +00:00
sns = append ( sns , sn )
2019-05-22 21:23:23 +00:00
}
2019-06-08 19:29:25 +00:00
2022-10-25 11:41:56 +00:00
maxBufSizePerStorageNode = memory . Allowed ( ) / 8 / len ( sns )
2022-04-05 12:35:08 +00:00
if maxBufSizePerStorageNode > consts . MaxInsertPacketSizeForVMInsert {
maxBufSizePerStorageNode = consts . MaxInsertPacketSizeForVMInsert
2020-05-24 22:39:24 +00:00
}
2020-05-28 16:57:05 +00:00
2022-11-09 09:36:38 +00:00
metrics . RegisterSet ( ms )
2022-10-25 11:41:56 +00:00
var wg sync . WaitGroup
2022-11-09 09:36:38 +00:00
snb := & storageNodesBucket {
ms : ms ,
nodesHash : nodesHash ,
sns : sns ,
stopCh : stopCh ,
wg : & wg ,
}
2022-10-25 11:41:56 +00:00
for idx , sn := range sns {
wg . Add ( 1 )
2020-05-28 16:57:05 +00:00
go func ( sn * storageNode , idx int ) {
2022-11-09 09:36:38 +00:00
sn . run ( snb , idx )
2022-10-25 11:41:56 +00:00
wg . Done ( )
2020-05-28 16:57:05 +00:00
} ( sn , idx )
}
2022-10-25 11:41:56 +00:00
2022-11-09 09:36:38 +00:00
return snb
2019-05-22 21:23:23 +00:00
}
2022-10-28 08:36:28 +00:00
func mustStopStorageNodes ( snb * storageNodesBucket ) {
2022-10-25 11:41:56 +00:00
close ( snb . stopCh )
for _ , sn := range snb . sns {
2021-06-04 01:33:49 +00:00
sn . brCond . Broadcast ( )
}
2022-10-25 11:41:56 +00:00
snb . wg . Wait ( )
metrics . UnregisterSet ( snb . ms )
snb . ms . UnregisterAllMetrics ( )
2019-05-22 21:23:23 +00:00
}
2019-06-08 19:29:25 +00:00
2022-02-06 18:20:02 +00:00
// rerouteRowsToReadyStorageNodes reroutes src from not ready snSource to ready storage nodes.
2020-05-27 12:07:16 +00:00
//
2022-02-06 18:20:02 +00:00
// The function blocks until src is fully re-routed.
2022-11-09 09:36:38 +00:00
func rerouteRowsToReadyStorageNodes ( snb * storageNodesBucket , snSource * storageNode , src [ ] byte ) ( int , error ) {
2022-02-07 12:35:39 +00:00
reroutesTotal . Inc ( )
2022-02-06 18:20:02 +00:00
rowsProcessed := 0
var idxsExclude , idxsExcludeNew [ ] int
2022-11-09 09:36:38 +00:00
nodesHash := snb . nodesHash
sns := snb . sns
2024-02-25 01:15:33 +00:00
idxsExclude = getNotReadyStorageNodeIdxsBlocking ( snb , idxsExclude [ : 0 ] )
2022-02-06 18:20:02 +00:00
var mr storage . MetricRow
for len ( src ) > 0 {
tail , err := mr . UnmarshalX ( src )
if err != nil {
logger . Panicf ( "BUG: cannot unmarshal MetricRow: %s" , err )
}
rowBuf := src [ : len ( src ) - len ( tail ) ]
src = tail
reroutedRowsProcessed . Inc ( )
h := xxhash . Sum64 ( mr . MetricNameRaw )
mr . ResetX ( )
var sn * storageNode
for {
idx := nodesHash . getNodeIdx ( h , idxsExclude )
2022-10-25 11:41:56 +00:00
sn = sns [ idx ]
2022-02-06 18:20:02 +00:00
if sn . isReady ( ) {
break
}
2024-02-25 01:15:33 +00:00
select {
case <- sn . stopCh :
return rowsProcessed , fmt . Errorf ( "graceful shutdown started" )
default :
}
2022-02-06 18:20:02 +00:00
// re-generate idxsExclude list, since sn must be put there.
2024-02-25 01:15:33 +00:00
idxsExclude = getNotReadyStorageNodeIdxsBlocking ( snb , idxsExclude [ : 0 ] )
2022-02-06 18:20:02 +00:00
}
if * disableRerouting {
if ! sn . sendBufMayBlock ( rowBuf ) {
return rowsProcessed , fmt . Errorf ( "graceful shutdown started" )
}
rowsProcessed ++
if sn != snSource {
snSource . rowsReroutedFromHere . Inc ( )
sn . rowsReroutedToHere . Inc ( )
}
continue
}
2022-02-07 12:35:39 +00:00
again :
2022-02-06 18:20:02 +00:00
if sn . trySendBuf ( rowBuf , 1 ) {
rowsProcessed ++
if sn != snSource {
snSource . rowsReroutedFromHere . Inc ( )
sn . rowsReroutedToHere . Inc ( )
}
continue
}
// If the re-routing is enabled, then try sending the row to another storage node.
2022-11-09 09:36:38 +00:00
idxsExcludeNew = getNotReadyStorageNodeIdxs ( snb , idxsExcludeNew [ : 0 ] , sn )
2022-02-06 18:20:02 +00:00
idx := nodesHash . getNodeIdx ( h , idxsExcludeNew )
2022-10-25 11:41:56 +00:00
snNew := sns [ idx ]
2024-02-25 01:15:33 +00:00
if ! snNew . trySendBuf ( rowBuf , 1 ) {
2024-02-27 12:15:15 +00:00
// The row cannot be sent to both snSource, sn and snNew without blocking.
2024-02-25 01:15:33 +00:00
// Sleep for a while and try sending the row to snSource again.
time . Sleep ( 100 * time . Millisecond )
goto again
}
rowsProcessed ++
if snNew != snSource {
snSource . rowsReroutedFromHere . Inc ( )
snNew . rowsReroutedToHere . Inc ( )
2021-06-05 13:16:16 +00:00
}
2020-09-28 18:35:40 +00:00
}
2022-02-06 18:20:02 +00:00
return rowsProcessed , nil
}
// reouteRowsToFreeStorageNodes re-routes src from snSource to other storage nodes.
//
// It is expected that snSource has no enough buffer for sending src.
// It is expected than *dsableRerouting isn't set when calling this function.
2024-02-25 01:15:33 +00:00
// It is expected that len(snb.sns) >= 2
2022-11-09 09:36:38 +00:00
func rerouteRowsToFreeStorageNodes ( snb * storageNodesBucket , snSource * storageNode , src [ ] byte ) ( int , error ) {
2022-02-06 18:20:02 +00:00
if * disableRerouting {
logger . Panicf ( "BUG: disableRerouting must be disabled when calling rerouteRowsToFreeStorageNodes" )
}
2024-02-25 01:15:33 +00:00
sns := snb . sns
if len ( sns ) < 2 {
logger . Panicf ( "BUG: the number of storage nodes is too small for calling rerouteRowsToFreeStorageNodes: %d" , len ( sns ) )
}
2022-02-07 12:35:39 +00:00
reroutesTotal . Inc ( )
2022-02-06 18:20:02 +00:00
rowsProcessed := 0
var idxsExclude [ ] int
2022-11-09 09:36:38 +00:00
nodesHash := snb . nodesHash
idxsExclude = getNotReadyStorageNodeIdxs ( snb , idxsExclude [ : 0 ] , snSource )
2021-06-05 13:16:16 +00:00
var mr storage . MetricRow
2019-06-08 19:29:25 +00:00
for len ( src ) > 0 {
2021-05-08 14:55:44 +00:00
tail , err := mr . UnmarshalX ( src )
2019-06-08 19:29:25 +00:00
if err != nil {
2021-06-04 01:33:49 +00:00
logger . Panicf ( "BUG: cannot unmarshal MetricRow: %s" , err )
2019-06-08 19:29:25 +00:00
}
rowBuf := src [ : len ( src ) - len ( tail ) ]
src = tail
2021-06-04 01:33:49 +00:00
reroutedRowsProcessed . Inc ( )
h := xxhash . Sum64 ( mr . MetricNameRaw )
2021-05-08 14:55:44 +00:00
mr . ResetX ( )
2024-02-25 01:15:33 +00:00
2022-02-07 12:35:39 +00:00
again :
2024-02-25 01:15:33 +00:00
// Try sending the row to snSource in order to minimize re-routing.
2022-02-06 18:20:02 +00:00
if snSource . trySendBuf ( rowBuf , 1 ) {
rowsProcessed ++
continue
2021-06-04 01:33:49 +00:00
}
2024-02-25 01:15:33 +00:00
// The row couldn't be sent to snSrouce. Try re-routing it to other node.
idx := nodesHash . getNodeIdx ( h , idxsExclude )
sn := sns [ idx ]
for ! sn . isReady ( ) && len ( idxsExclude ) < len ( sns ) {
// re-generate idxsExclude list, since sn and snSource must be put there.
idxsExclude = getNotReadyStorageNodeIdxs ( snb , idxsExclude [ : 0 ] , snSource )
2022-02-06 18:20:02 +00:00
idx := nodesHash . getNodeIdx ( h , idxsExclude )
2022-10-25 11:41:56 +00:00
sn = sns [ idx ]
2022-02-06 18:20:02 +00:00
}
2024-02-25 01:15:33 +00:00
if ! sn . trySendBuf ( rowBuf , 1 ) {
// The row cannot be sent to both snSource and sn without blocking.
// Sleep for a while and try sending the row to snSource again.
time . Sleep ( 100 * time . Millisecond )
goto again
2022-02-06 18:20:02 +00:00
}
2024-02-25 01:15:33 +00:00
rowsProcessed ++
snSource . rowsReroutedFromHere . Inc ( )
sn . rowsReroutedToHere . Inc ( )
2019-06-08 19:29:25 +00:00
}
2022-02-06 18:20:02 +00:00
return rowsProcessed , nil
}
2024-02-25 01:15:33 +00:00
func getNotReadyStorageNodeIdxsBlocking ( snb * storageNodesBucket , dst [ ] int ) [ ] int {
dst = getNotReadyStorageNodeIdxs ( snb , dst [ : 0 ] , nil )
2022-11-09 09:36:38 +00:00
sns := snb . sns
2022-10-25 11:41:56 +00:00
if len ( dst ) < len ( sns ) {
2022-02-06 18:20:02 +00:00
return dst
}
2022-06-27 09:31:16 +00:00
noStorageNodesLogger . Warnf ( "all the vmstorage nodes are unavailable; stopping data processing util at least a single node becomes available" )
2022-02-06 18:20:02 +00:00
for {
2024-02-25 01:15:33 +00:00
tc := timerpool . Get ( time . Second )
select {
case <- snb . stopCh :
timerpool . Put ( tc )
return dst
case <- tc . C :
timerpool . Put ( tc )
}
dst = getNotReadyStorageNodeIdxs ( snb , dst [ : 0 ] , nil )
2022-10-25 11:41:56 +00:00
if availableNodes := len ( sns ) - len ( dst ) ; availableNodes > 0 {
2022-08-04 15:33:21 +00:00
storageNodesBecameAvailableLogger . Warnf ( "%d vmstorage nodes became available, so continue data processing" , availableNodes )
2022-02-06 18:20:02 +00:00
return dst
}
}
}
2022-08-04 15:33:21 +00:00
var storageNodesBecameAvailableLogger = logger . WithThrottler ( "storageNodesBecameAvailable" , 5 * time . Second )
2022-06-27 09:31:16 +00:00
var noStorageNodesLogger = logger . WithThrottler ( "storageNodesUnavailable" , 5 * time . Second )
2022-11-09 09:36:38 +00:00
func getNotReadyStorageNodeIdxs ( snb * storageNodesBucket , dst [ ] int , snExtra * storageNode ) [ ] int {
2022-02-06 18:20:02 +00:00
dst = dst [ : 0 ]
2022-11-09 09:36:38 +00:00
for i , sn := range snb . sns {
2022-02-07 11:20:43 +00:00
if sn == snExtra || ! sn . isReady ( ) {
2022-02-06 18:20:02 +00:00
dst = append ( dst , i )
}
}
return dst
}
func ( sn * storageNode ) trySendBuf ( buf [ ] byte , rows int ) bool {
2024-02-27 12:15:15 +00:00
if ! sn . isReady ( ) {
// Fast path without locking the sn.brLock.
return false
}
2022-02-06 18:20:02 +00:00
sent := false
sn . brLock . Lock ( )
if sn . isReady ( ) && len ( sn . br . buf ) + len ( buf ) <= maxBufSizePerStorageNode {
sn . br . buf = append ( sn . br . buf , buf ... )
sn . br . rows += rows
sent = true
}
sn . brLock . Unlock ( )
return sent
2020-05-27 12:07:16 +00:00
}
2021-06-04 01:33:49 +00:00
func ( sn * storageNode ) sendBufMayBlock ( buf [ ] byte ) bool {
2020-05-27 12:07:16 +00:00
sn . brLock . Lock ( )
2021-06-04 01:33:49 +00:00
for len ( sn . br . buf ) + len ( buf ) > maxBufSizePerStorageNode {
select {
2022-10-25 11:41:56 +00:00
case <- sn . stopCh :
2021-06-04 01:33:49 +00:00
sn . brLock . Unlock ( )
return false
default :
}
sn . brCond . Wait ( )
2020-05-27 12:07:16 +00:00
}
2021-06-04 01:33:49 +00:00
sn . br . buf = append ( sn . br . buf , buf ... )
sn . br . rows ++
2020-05-27 12:07:16 +00:00
sn . brLock . Unlock ( )
2021-06-04 01:33:49 +00:00
return true
2019-06-08 19:29:25 +00:00
}
2022-10-25 11:41:56 +00:00
func ( sn * storageNode ) readOnlyChecker ( ) {
2024-01-22 16:12:37 +00:00
d := timeutil . AddJitterToDuration ( time . Second * 30 )
ticker := time . NewTicker ( d )
2021-10-08 10:52:56 +00:00
defer ticker . Stop ( )
for {
select {
2022-10-25 11:41:56 +00:00
case <- sn . stopCh :
2021-10-08 09:52:56 +00:00
return
2021-10-08 10:52:56 +00:00
case <- ticker . C :
sn . checkReadOnlyMode ( )
2021-10-08 09:52:56 +00:00
}
}
2021-10-08 10:52:56 +00:00
}
func ( sn * storageNode ) checkReadOnlyMode ( ) {
2024-02-24 00:44:19 +00:00
if ! sn . isReadOnly . Load ( ) {
2021-10-08 10:52:56 +00:00
// fast path - the sn isn't in readonly mode
return
}
// Check whether the storage remains in readonly mode
sn . bcLock . Lock ( )
defer sn . bcLock . Unlock ( )
if sn . bc == nil {
return
}
// send nil buff to check ack response from storage
err := sendToConn ( sn . bc , nil )
if err == nil {
// The storage switched from readonly to non-readonly mode
2024-02-24 00:44:19 +00:00
sn . isReadOnly . Store ( false )
2021-10-08 10:52:56 +00:00
return
}
2023-09-01 15:56:41 +00:00
if errors . Is ( err , errStorageReadOnly ) {
// The storage remains in read-only mode
return
2021-10-08 10:52:56 +00:00
}
2023-09-01 15:56:41 +00:00
// There was an error when sending nil buf to the storage.
logger . Errorf ( "cannot check storage readonly mode for -storageNode=%q: %s" , sn . dialer . Addr ( ) , err )
// Mark the connection to the storage as broken.
if err = sn . bc . Close ( ) ; err != nil {
cannotCloseStorageNodeConnLogger . Warnf ( "cannot close connection to storageNode %q: %s" , sn . dialer . Addr ( ) , err )
}
sn . bc = nil
2024-02-24 00:44:19 +00:00
sn . broken . Store ( true )
2023-09-01 15:56:41 +00:00
sn . brCond . Broadcast ( )
sn . connectionErrors . Inc ( )
2021-10-08 09:52:56 +00:00
}
2019-06-08 19:29:25 +00:00
var (
2020-05-24 22:39:24 +00:00
maxBufSizePerStorageNode int
2021-06-04 01:33:49 +00:00
reroutedRowsProcessed = metrics . NewCounter ( ` vm_rpc_rerouted_rows_processed_total { name="vminsert"} ` )
reroutesTotal = metrics . NewCounter ( ` vm_rpc_reroutes_total { name="vminsert"} ` )
2020-05-28 16:57:05 +00:00
rowsIncompletelyReplicatedTotal = metrics . NewCounter ( ` vm_rpc_rows_incompletely_replicated_total { name="vminsert"} ` )
2019-06-08 19:29:25 +00:00
)