2021-05-08 14:55:44 +00:00
package clusternative
import (
"errors"
2023-11-14 00:00:42 +00:00
"flag"
2021-05-08 14:55:44 +00:00
"net"
"strings"
"sync"
"time"
2021-05-08 16:36:00 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver"
2021-05-08 14:55:44 +00:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/metrics"
)
2023-11-14 00:00:42 +00:00
var (
vminsertConnsShutdownDuration = flag . Duration ( "clusternative.vminsertConnsShutdownDuration" , 25 * time . Second , "The time needed for gradual closing of upstream " +
"vminsert connections during graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining lower-level clusters " +
"during rolling restart. Smaller duration reduces the time needed to close all the upstream vminsert connections, thus reducing the time for graceful shutdown. " +
2024-04-18 00:54:20 +00:00
"See https://docs.victoriametrics.com/cluster-victoriametrics/#improving-re-routing-performance-during-restart" )
2023-11-14 00:00:42 +00:00
)
2021-05-08 14:55:44 +00:00
var (
writeRequestsTCP = metrics . NewCounter ( ` vm_ingestserver_requests_total { type="clusternative", net="tcp"} ` )
writeErrorsTCP = metrics . NewCounter ( ` vm_ingestserver_request_errors_total { type="clusternative", net="tcp"} ` )
)
// Server accepts data from vminsert over TCP in the same way as vmstorage does.
type Server struct {
addr string
lnTCP net . Listener
wg sync . WaitGroup
2021-05-08 16:36:00 +00:00
cm ingestserver . ConnsMap
2021-05-08 14:55:44 +00:00
}
// MustStart starts clusternative server on the given addr.
//
// The incoming connections are processed with insertHandler.
//
// MustStop must be called on the returned server when it is no longer needed.
func MustStart ( addr string , insertHandler func ( c net . Conn ) error ) * Server {
logger . Infof ( "starting TCP clusternative server at %q" , addr )
2023-01-27 07:08:35 +00:00
lnTCP , err := netutil . NewTCPListener ( "clusternative" , addr , false , nil )
2021-05-08 14:55:44 +00:00
if err != nil {
logger . Fatalf ( "cannot start TCP clusternative server at %q: %s" , addr , err )
}
s := & Server {
addr : addr ,
lnTCP : lnTCP ,
}
2023-11-14 00:00:42 +00:00
s . cm . Init ( "vminsert_upstream" )
2021-05-08 14:55:44 +00:00
s . wg . Add ( 1 )
go func ( ) {
defer s . wg . Done ( )
2021-05-08 16:36:00 +00:00
s . serveTCP ( insertHandler )
2021-05-08 14:55:44 +00:00
logger . Infof ( "stopped TCP clusternative server at %q" , addr )
} ( )
return s
}
// MustStop stops the server.
func ( s * Server ) MustStop ( ) {
logger . Infof ( "stopping TCP clusternative server at %q..." , s . addr )
if err := s . lnTCP . Close ( ) ; err != nil {
logger . Errorf ( "cannot close TCP clusternative server: %s" , err )
}
2023-11-14 00:00:42 +00:00
s . cm . CloseAll ( * vminsertConnsShutdownDuration )
2021-05-08 14:55:44 +00:00
s . wg . Wait ( )
logger . Infof ( "TCP clusternative server at %q has been stopped" , s . addr )
}
2021-05-08 16:36:00 +00:00
func ( s * Server ) serveTCP ( insertHandler func ( c net . Conn ) error ) {
var wg sync . WaitGroup
2021-05-08 14:55:44 +00:00
for {
2021-05-08 16:36:00 +00:00
c , err := s . lnTCP . Accept ( )
2021-05-08 14:55:44 +00:00
if err != nil {
var ne net . Error
if errors . As ( err , & ne ) {
if ne . Temporary ( ) {
2021-05-08 16:36:00 +00:00
logger . Errorf ( "clusternative: temporary error when listening for TCP addr %q: %s" , s . lnTCP . Addr ( ) , err )
2021-05-08 14:55:44 +00:00
time . Sleep ( time . Second )
continue
}
if strings . Contains ( err . Error ( ) , "use of closed network connection" ) {
break
}
logger . Fatalf ( "unrecoverable error when accepting TCP clusternative connections: %s" , err )
}
logger . Fatalf ( "unexpected error when accepting TCP clusternative connections: %s" , err )
}
2021-05-08 16:36:00 +00:00
if ! s . cm . Add ( c ) {
// The server is already closed.
_ = c . Close ( )
break
}
wg . Add ( 1 )
2021-05-08 14:55:44 +00:00
go func ( ) {
2021-05-08 16:36:00 +00:00
defer func ( ) {
s . cm . Delete ( c )
_ = c . Close ( )
wg . Done ( )
} ( )
2021-05-08 14:55:44 +00:00
writeRequestsTCP . Inc ( )
if err := insertHandler ( c ) ; err != nil {
writeErrorsTCP . Inc ( )
logger . Errorf ( "error in TCP clusternative conn %q<->%q: %s" , c . LocalAddr ( ) , c . RemoteAddr ( ) , err )
}
} ( )
}
2021-05-08 16:36:00 +00:00
wg . Wait ( )
2021-05-08 14:55:44 +00:00
}