code.vegaprotocol.io/vega@v0.79.0/cmd/vega/commands/node/node.go (about) 1 // Copyright (C) 2023 Gobalsky Labs Limited 2 // 3 // This program is free software: you can redistribute it and/or modify 4 // it under the terms of the GNU Affero General Public License as 5 // published by the Free Software Foundation, either version 3 of the 6 // License, or (at your option) any later version. 7 // 8 // This program is distributed in the hope that it will be useful, 9 // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 // GNU Affero General Public License for more details. 12 // 13 // You should have received a copy of the GNU Affero General Public License 14 // along with this program. If not, see <http://www.gnu.org/licenses/>. 15 16 package node 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "os" 23 "os/signal" 24 "syscall" 25 26 "code.vegaprotocol.io/vega/core/admin" 27 "code.vegaprotocol.io/vega/core/api" 28 "code.vegaprotocol.io/vega/core/api/rest" 29 "code.vegaprotocol.io/vega/core/blockchain" 30 "code.vegaprotocol.io/vega/core/blockchain/abci" 31 "code.vegaprotocol.io/vega/core/blockchain/nullchain" 32 ethclient "code.vegaprotocol.io/vega/core/client/eth" 33 "code.vegaprotocol.io/vega/core/config" 34 "code.vegaprotocol.io/vega/core/coreapi" 35 "code.vegaprotocol.io/vega/core/metrics" 36 "code.vegaprotocol.io/vega/core/nodewallets" 37 "code.vegaprotocol.io/vega/core/protocol" 38 "code.vegaprotocol.io/vega/core/stats" 39 "code.vegaprotocol.io/vega/libs/pprof" 40 "code.vegaprotocol.io/vega/logging" 41 "code.vegaprotocol.io/vega/paths" 42 apipb "code.vegaprotocol.io/vega/protos/vega/api/v1" 43 "code.vegaprotocol.io/vega/version" 44 45 "github.com/cometbft/cometbft/abci/types" 46 tmtypes "github.com/cometbft/cometbft/types" 47 "google.golang.org/grpc" 48 ) 49 50 var ErrUnknownChainProvider = errors.New("unknown chain provider") 51 52 type Command struct { 53 ctx context.Context 54 cancel context.CancelFunc 55 56 Log *logging.Logger 57 58 pproffhandlr *pprof.Pprofhandler 59 stats *stats.Stats 60 61 conf config.Config 62 confWatcher *config.Watcher 63 64 nullBlockchain *nullchain.NullBlockchain 65 blockchainServer *blockchain.Server 66 blockchainClient *blockchain.Client 67 68 nodeWallets *nodewallets.NodeWallets 69 nodeWalletPassphrase string 70 71 vegaPaths paths.Paths 72 73 primaryEthClient *ethclient.PrimaryClient 74 primaryEthConfirmations *ethclient.EthereumConfirmations 75 76 secondaryEthClient *ethclient.SecondaryClient 77 secondaryEthConfirmations *ethclient.EthereumConfirmations 78 79 l2Clients *ethclient.L2Clients 80 81 abciApp *appW 82 protocol *protocol.Protocol 83 84 // APIs 85 grpcServer *api.GRPC 86 proxyServer *rest.ProxyServer 87 adminServer *admin.Server 88 coreService *coreapi.Service 89 90 tmNode *abci.TmNode 91 } 92 93 func (n *Command) Run( 94 confWatcher *config.Watcher, 95 vegaPaths paths.Paths, 96 nodeWalletPassphrase, tmHome, networkURL, network string, 97 log *logging.Logger, 98 ) error { 99 n.Log.Info("starting vega", 100 logging.String("version", version.Get()), 101 logging.String("commit-hash", version.GetCommitHash()), 102 ) 103 104 n.confWatcher = confWatcher 105 n.nodeWalletPassphrase = nodeWalletPassphrase 106 107 n.conf = confWatcher.Get() 108 n.vegaPaths = vegaPaths 109 110 if err := n.setupCommon(); err != nil { 111 return err 112 } 113 114 if err := n.loadNodeWallets(); err != nil { 115 return fmt.Errorf("could not load the node wallets: %w", err) 116 } 117 118 if err := n.startBlockchainClients(); err != nil { 119 return err 120 } 121 122 // TODO(): later we will want to select what version of the protocol 123 // to run, most likely via configuration, so we can use legacy or current 124 var err error 125 n.protocol, err = protocol.New( 126 n.ctx, 127 n.confWatcher, 128 n.Log, 129 n.cancel, 130 n.stopBlockchain, 131 n.nodeWallets, 132 n.primaryEthClient, 133 n.secondaryEthClient, 134 n.primaryEthConfirmations, 135 n.secondaryEthConfirmations, 136 n.blockchainClient, 137 vegaPaths, 138 n.stats, 139 n.l2Clients, 140 ) 141 if err != nil { 142 return err 143 } 144 145 if err := n.startAPIs(); err != nil { 146 return fmt.Errorf("could not start the core APIs: %w", err) 147 } 148 149 // The protocol must be started after the API, otherwise nobody is listening 150 // to the internal events emitted during that phase (like during the state 151 // restoration), which will cause issues to APIs consumer like system tests. 152 if err := n.protocol.Start(n.ctx); err != nil { 153 return fmt.Errorf("could not start the core: %w", err) 154 } 155 156 // if a chain is being replayed tendermint does this during the initial handshake with the 157 // app and does so synchronously. We to need to set this off in a goroutine so we can catch any 158 // SIGTERM during that replay and shutdown properly 159 errCh := make(chan error) 160 go func() { 161 defer func() { 162 // if a consensus failure happens during replay tendermint panics 163 // we need to catch it so we can call shutdown and then re-panic 164 if r := recover(); r != nil { 165 n.Stop() 166 panic(r) 167 } 168 }() 169 if err := n.startBlockchain(log, tmHome, network, networkURL); err != nil { 170 errCh <- err 171 } 172 // start the nullblockchain if we are in that mode, it *needs* to be after we've started the gRPC server 173 // otherwise it'll start calling init-chain and all the way before we're ready. 174 if n.conf.Blockchain.ChainProvider == blockchain.ProviderNullChain { 175 if err := n.nullBlockchain.StartServer(); err != nil { 176 errCh <- err 177 } 178 } 179 }() 180 181 // at this point all is good, and we should be started, we can 182 // just wait for signals or whatever 183 n.Log.Info("Vega startup complete", 184 logging.String("node-mode", string(n.conf.NodeMode))) 185 186 // wait for possible protocol upgrade, or user exit 187 if err := n.wait(errCh); err != nil { 188 return err 189 } 190 191 return n.Stop() 192 } 193 194 func (n *Command) wait(errCh <-chan error) error { 195 gracefulStop := make(chan os.Signal, 1) 196 signal.Notify(gracefulStop, syscall.SIGTERM, syscall.SIGINT) 197 for { 198 select { 199 case sig := <-gracefulStop: 200 n.Log.Info("Caught signal", logging.String("name", fmt.Sprintf("%+v", sig))) 201 return nil 202 case e := <-errCh: 203 n.Log.Error("problem starting blockchain", logging.Error(e)) 204 return e 205 case <-n.ctx.Done(): 206 // nothing to do 207 return nil 208 } 209 } 210 } 211 212 func (n *Command) stopBlockchain() error { 213 if n.blockchainServer == nil { 214 return nil 215 } 216 return n.blockchainServer.Stop() 217 } 218 219 func (n *Command) Stop() error { 220 upStatus := n.protocol.GetProtocolUpgradeService().GetUpgradeStatus() 221 222 // Blockchain server has been already stopped by the app during the upgrade. 223 // Calling stop again would block forever. 224 if n.blockchainServer != nil && !upStatus.ReadyToUpgrade { 225 n.blockchainServer.Stop() 226 } 227 if n.protocol != nil { 228 n.protocol.Stop() 229 } 230 if n.grpcServer != nil { 231 n.grpcServer.Stop() 232 } 233 if n.proxyServer != nil { 234 n.proxyServer.Stop() 235 } 236 if n.adminServer != nil { 237 n.adminServer.Stop() 238 } 239 240 if n.conf.IsValidator() { 241 if err := n.nodeWallets.Ethereum.Cleanup(); err != nil { 242 n.Log.Error("couldn't clean up Ethereum node wallet", logging.Error(err)) 243 } 244 } 245 246 var err error 247 if n.pproffhandlr != nil { 248 err = n.pproffhandlr.Stop() 249 } 250 251 n.Log.Info("Vega shutdown complete", 252 logging.String("version", version.Get()), 253 logging.String("version-hash", version.GetCommitHash())) 254 255 n.Log.Sync() 256 n.cancel() 257 258 // Blockchain server need to be killed as it is stuck in BeginBlock function. 259 if upStatus.ReadyToUpgrade { 260 return kill() 261 } 262 263 return err 264 } 265 266 func (n *Command) startAPIs() error { 267 n.grpcServer = api.NewGRPC( 268 n.Log, 269 n.conf.API, 270 n.stats, 271 n.blockchainClient, 272 n.protocol.GetEventForwarder(), 273 n.protocol.GetTimeService(), 274 n.protocol.GetEventService(), 275 n.protocol.GetPoW(), 276 n.protocol.GetSpamEngine(), 277 n.protocol.GetPowEngine(), 278 ) 279 280 n.coreService = coreapi.NewService(n.ctx, n.Log, n.conf.CoreAPI, n.protocol.GetBroker()) 281 n.grpcServer.RegisterService(func(server *grpc.Server) { 282 apipb.RegisterCoreStateServiceServer(server, n.coreService) 283 }) 284 285 // watch configs 286 n.confWatcher.OnConfigUpdate( 287 func(cfg config.Config) { n.grpcServer.ReloadConf(cfg.API) }, 288 ) 289 290 n.proxyServer = rest.NewProxyServer(n.Log, n.conf.API) 291 292 if n.conf.IsValidator() { 293 adminServer, err := admin.NewValidatorServer(n.Log, n.conf.Admin, n.vegaPaths, n.nodeWalletPassphrase, n.nodeWallets, n.protocol.GetProtocolUpgradeService()) 294 if err != nil { 295 return err 296 } 297 n.adminServer = adminServer 298 } else { 299 adminServer, err := admin.NewNonValidatorServer(n.Log, n.conf.Admin, n.protocol.GetProtocolUpgradeService()) 300 if err != nil { 301 return err 302 } 303 n.adminServer = adminServer 304 } 305 306 go n.grpcServer.Start() 307 go n.proxyServer.Start() 308 309 if n.adminServer != nil { 310 go n.adminServer.Start() 311 } 312 313 return nil 314 } 315 316 func (n *Command) startBlockchain(log *logging.Logger, tmHome, network, networkURL string) error { 317 // make sure any env variable is resolved 318 tmHome = os.ExpandEnv(tmHome) 319 n.abciApp = newAppW(n.protocol.Abci()) 320 321 switch n.conf.Blockchain.ChainProvider { 322 case blockchain.ProviderTendermint: 323 var err error 324 // initialise the node 325 n.tmNode, err = n.startABCI(log, n.abciApp, tmHome, network, networkURL) 326 if err != nil { 327 return err 328 } 329 n.blockchainServer = blockchain.NewServer(n.Log, n.tmNode) 330 // initialise the client 331 client, err := n.tmNode.GetClient() 332 if err != nil { 333 return err 334 } 335 n.blockchainClient.Set(client, n.tmNode.MempoolSize) 336 case blockchain.ProviderNullChain: 337 // nullchain acts as both the client and the server because its does everything 338 n.nullBlockchain = nullchain.NewClient( 339 n.Log, 340 n.conf.Blockchain.Null, 341 n.protocol.GetTimeService(), // if we've loaded from a snapshot we need to be able to ask the protocol what time its at 342 ) 343 n.nullBlockchain.SetABCIApp(n.abciApp) 344 n.blockchainServer = blockchain.NewServer(n.Log, n.nullBlockchain) 345 // n.blockchainClient = blockchain.NewClient(n.nullBlockchain) 346 n.blockchainClient.Set(n.nullBlockchain, 100*1024*1024) 347 348 default: 349 return ErrUnknownChainProvider 350 } 351 352 n.confWatcher.OnConfigUpdate( 353 func(cfg config.Config) { n.blockchainServer.ReloadConf(cfg.Blockchain) }, 354 ) 355 356 if err := n.blockchainServer.Start(); err != nil { 357 return err 358 } 359 360 if err := n.blockchainClient.Start(); err != nil { 361 return err 362 } 363 364 return nil 365 } 366 367 func (n *Command) setupCommon() (err error) { 368 // this shouldn't happen, the context is initialized in here 369 if n.cancel != nil { 370 n.cancel() 371 } 372 373 // ensure we cancel the context on error 374 defer func() { 375 if err != nil { 376 n.cancel() 377 } 378 }() 379 380 // initialize the application context 381 n.ctx, n.cancel = context.WithCancel(context.Background()) 382 383 // get the configuration, this have been loaded by the root 384 conf := n.confWatcher.Get() 385 386 // reload logger with the setup from configuration 387 n.Log = logging.NewLoggerFromConfig(conf.Logging).Named(n.Log.GetName()) 388 389 // enable pprof if necessary 390 if conf.Pprof.Enabled { 391 n.Log.Info("vega is starting with pprof profile, this is not a recommended setting for production") 392 n.pproffhandlr, err = pprof.New(n.Log, conf.Pprof) 393 if err != nil { 394 return err 395 } 396 n.confWatcher.OnConfigUpdate( 397 func(cfg config.Config) { n.pproffhandlr.ReloadConf(cfg.Pprof) }, 398 ) 399 } 400 401 n.stats = stats.New(n.Log, n.conf.Stats) 402 403 // start prometheus stuff 404 metrics.Start(n.conf.Metrics) 405 406 return err 407 } 408 409 func (n *Command) loadNodeWallets() (err error) { 410 // if we are a non-validator, nothing needs to be done here 411 if !n.conf.IsValidator() { 412 return nil 413 } 414 415 n.nodeWallets, err = nodewallets.GetNodeWallets(n.conf.NodeWallet, n.vegaPaths, n.nodeWalletPassphrase) 416 if err != nil { 417 return fmt.Errorf("couldn't get node wallets: %w", err) 418 } 419 420 return n.nodeWallets.Verify() 421 } 422 423 func (n *Command) startABCI(log *logging.Logger, app types.Application, tmHome string, network string, networkURL string) (*abci.TmNode, error) { 424 var ( 425 genesisDoc *tmtypes.GenesisDoc 426 err error 427 ) 428 if len(network) > 0 { 429 genesisDoc, err = httpGenesisDocProvider(network) 430 } else if len(networkURL) > 0 { 431 genesisDoc, err = genesisDocHTTPFromURL(networkURL) 432 } 433 if err != nil { 434 return nil, err 435 } 436 437 return abci.NewTmNode( 438 n.conf.Blockchain, 439 log, 440 tmHome, 441 app, 442 genesisDoc, 443 ) 444 } 445 446 func (n *Command) startBlockchainClients() error { 447 // just intantiate the client here, we'll setup the actual impl later on 448 // when the null blockchain or tendermint is started. 449 n.blockchainClient = blockchain.NewClient() 450 451 // if we are a non-validator, nothing needs to be done here 452 if !n.conf.IsValidator() { 453 return nil 454 } 455 456 // We may not need ethereum client initialized when we have not 457 // provided the ethereum endpoint. We skip creating client here 458 // when RPCEnpoint is empty and the nullchain present. 459 if n.conf.IsNullChain() && !n.conf.HaveEthClient() { 460 return nil 461 } 462 463 var err error 464 n.l2Clients, err = ethclient.NewL2Clients(n.ctx, n.Log, n.conf.Ethereum) 465 if err != nil { 466 return fmt.Errorf("could not instantiate ethereum l2 clients: %w", err) 467 } 468 469 n.primaryEthClient, err = ethclient.PrimaryDial(n.ctx, n.conf.Ethereum) 470 if err != nil { 471 return fmt.Errorf("could not instantiate primary ethereum client: %w", err) 472 } 473 474 n.secondaryEthClient, err = ethclient.SecondaryDial(n.ctx, n.conf.Ethereum) 475 if err != nil { 476 return fmt.Errorf("could not instantiate secondary ethereum client: %w", err) 477 } 478 479 n.primaryEthConfirmations = ethclient.NewEthereumConfirmations(n.conf.Ethereum, n.primaryEthClient, nil, ethclient.FinalityStateFinalized) 480 481 // for arbitrum the finality state of a block is in now way connected to the Arbitrum network reaching consensus so Vega gains nothing 482 // from waiting for safe/finalized. Instead we just wait for the event to be seen in the latest block and rely on the consensus check 483 // Vega performs itself with node-votes. If each validator is running their own Arbitrum node, or is using a node that they need trustworthy 484 // then this is sufficient. A far as is know, block reorgs do not happen on Arbitrum. 485 n.secondaryEthConfirmations = ethclient.NewEthereumConfirmations(n.conf.Ethereum, n.secondaryEthClient, nil, ethclient.FinalityStateLatest) 486 487 return nil 488 } 489 490 // kill the running process by signaling itself with SIGKILL. 491 func kill() error { 492 p, err := os.FindProcess(os.Getpid()) 493 if err != nil { 494 return err 495 } 496 return p.Signal(syscall.SIGKILL) 497 }