code.vegaprotocol.io/vega@v0.79.0/cmd/vega/commands/node/node.go (about)

     1  // Copyright (C) 2023 Gobalsky Labs Limited
     2  //
     3  // This program is free software: you can redistribute it and/or modify
     4  // it under the terms of the GNU Affero General Public License as
     5  // published by the Free Software Foundation, either version 3 of the
     6  // License, or (at your option) any later version.
     7  //
     8  // This program is distributed in the hope that it will be useful,
     9  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    10  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    11  // GNU Affero General Public License for more details.
    12  //
    13  // You should have received a copy of the GNU Affero General Public License
    14  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    15  
    16  package node
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"os"
    23  	"os/signal"
    24  	"syscall"
    25  
    26  	"code.vegaprotocol.io/vega/core/admin"
    27  	"code.vegaprotocol.io/vega/core/api"
    28  	"code.vegaprotocol.io/vega/core/api/rest"
    29  	"code.vegaprotocol.io/vega/core/blockchain"
    30  	"code.vegaprotocol.io/vega/core/blockchain/abci"
    31  	"code.vegaprotocol.io/vega/core/blockchain/nullchain"
    32  	ethclient "code.vegaprotocol.io/vega/core/client/eth"
    33  	"code.vegaprotocol.io/vega/core/config"
    34  	"code.vegaprotocol.io/vega/core/coreapi"
    35  	"code.vegaprotocol.io/vega/core/metrics"
    36  	"code.vegaprotocol.io/vega/core/nodewallets"
    37  	"code.vegaprotocol.io/vega/core/protocol"
    38  	"code.vegaprotocol.io/vega/core/stats"
    39  	"code.vegaprotocol.io/vega/libs/pprof"
    40  	"code.vegaprotocol.io/vega/logging"
    41  	"code.vegaprotocol.io/vega/paths"
    42  	apipb "code.vegaprotocol.io/vega/protos/vega/api/v1"
    43  	"code.vegaprotocol.io/vega/version"
    44  
    45  	"github.com/cometbft/cometbft/abci/types"
    46  	tmtypes "github.com/cometbft/cometbft/types"
    47  	"google.golang.org/grpc"
    48  )
    49  
    50  var ErrUnknownChainProvider = errors.New("unknown chain provider")
    51  
    52  type Command struct {
    53  	ctx    context.Context
    54  	cancel context.CancelFunc
    55  
    56  	Log *logging.Logger
    57  
    58  	pproffhandlr *pprof.Pprofhandler
    59  	stats        *stats.Stats
    60  
    61  	conf        config.Config
    62  	confWatcher *config.Watcher
    63  
    64  	nullBlockchain   *nullchain.NullBlockchain
    65  	blockchainServer *blockchain.Server
    66  	blockchainClient *blockchain.Client
    67  
    68  	nodeWallets          *nodewallets.NodeWallets
    69  	nodeWalletPassphrase string
    70  
    71  	vegaPaths paths.Paths
    72  
    73  	primaryEthClient        *ethclient.PrimaryClient
    74  	primaryEthConfirmations *ethclient.EthereumConfirmations
    75  
    76  	secondaryEthClient        *ethclient.SecondaryClient
    77  	secondaryEthConfirmations *ethclient.EthereumConfirmations
    78  
    79  	l2Clients *ethclient.L2Clients
    80  
    81  	abciApp  *appW
    82  	protocol *protocol.Protocol
    83  
    84  	// APIs
    85  	grpcServer  *api.GRPC
    86  	proxyServer *rest.ProxyServer
    87  	adminServer *admin.Server
    88  	coreService *coreapi.Service
    89  
    90  	tmNode *abci.TmNode
    91  }
    92  
    93  func (n *Command) Run(
    94  	confWatcher *config.Watcher,
    95  	vegaPaths paths.Paths,
    96  	nodeWalletPassphrase, tmHome, networkURL, network string,
    97  	log *logging.Logger,
    98  ) error {
    99  	n.Log.Info("starting vega",
   100  		logging.String("version", version.Get()),
   101  		logging.String("commit-hash", version.GetCommitHash()),
   102  	)
   103  
   104  	n.confWatcher = confWatcher
   105  	n.nodeWalletPassphrase = nodeWalletPassphrase
   106  
   107  	n.conf = confWatcher.Get()
   108  	n.vegaPaths = vegaPaths
   109  
   110  	if err := n.setupCommon(); err != nil {
   111  		return err
   112  	}
   113  
   114  	if err := n.loadNodeWallets(); err != nil {
   115  		return fmt.Errorf("could not load the node wallets: %w", err)
   116  	}
   117  
   118  	if err := n.startBlockchainClients(); err != nil {
   119  		return err
   120  	}
   121  
   122  	// TODO(): later we will want to select what version of the protocol
   123  	// to run, most likely via configuration, so we can use legacy or current
   124  	var err error
   125  	n.protocol, err = protocol.New(
   126  		n.ctx,
   127  		n.confWatcher,
   128  		n.Log,
   129  		n.cancel,
   130  		n.stopBlockchain,
   131  		n.nodeWallets,
   132  		n.primaryEthClient,
   133  		n.secondaryEthClient,
   134  		n.primaryEthConfirmations,
   135  		n.secondaryEthConfirmations,
   136  		n.blockchainClient,
   137  		vegaPaths,
   138  		n.stats,
   139  		n.l2Clients,
   140  	)
   141  	if err != nil {
   142  		return err
   143  	}
   144  
   145  	if err := n.startAPIs(); err != nil {
   146  		return fmt.Errorf("could not start the core APIs: %w", err)
   147  	}
   148  
   149  	// The protocol must be started after the API, otherwise nobody is listening
   150  	// to the internal events emitted during that phase (like during the state
   151  	// restoration), which will cause issues to APIs consumer like system tests.
   152  	if err := n.protocol.Start(n.ctx); err != nil {
   153  		return fmt.Errorf("could not start the core: %w", err)
   154  	}
   155  
   156  	// if a chain is being replayed tendermint does this during the initial handshake with the
   157  	// app and does so synchronously. We to need to set this off in a goroutine so we can catch any
   158  	// SIGTERM during that replay and shutdown properly
   159  	errCh := make(chan error)
   160  	go func() {
   161  		defer func() {
   162  			// if a consensus failure happens during replay tendermint panics
   163  			// we need to catch it so we can call shutdown and then re-panic
   164  			if r := recover(); r != nil {
   165  				n.Stop()
   166  				panic(r)
   167  			}
   168  		}()
   169  		if err := n.startBlockchain(log, tmHome, network, networkURL); err != nil {
   170  			errCh <- err
   171  		}
   172  		// start the nullblockchain if we are in that mode, it *needs* to be after we've started the gRPC server
   173  		// otherwise it'll start calling init-chain and all the way before we're ready.
   174  		if n.conf.Blockchain.ChainProvider == blockchain.ProviderNullChain {
   175  			if err := n.nullBlockchain.StartServer(); err != nil {
   176  				errCh <- err
   177  			}
   178  		}
   179  	}()
   180  
   181  	// at this point all is good, and we should be started, we can
   182  	// just wait for signals or whatever
   183  	n.Log.Info("Vega startup complete",
   184  		logging.String("node-mode", string(n.conf.NodeMode)))
   185  
   186  	// wait for possible protocol upgrade, or user exit
   187  	if err := n.wait(errCh); err != nil {
   188  		return err
   189  	}
   190  
   191  	return n.Stop()
   192  }
   193  
   194  func (n *Command) wait(errCh <-chan error) error {
   195  	gracefulStop := make(chan os.Signal, 1)
   196  	signal.Notify(gracefulStop, syscall.SIGTERM, syscall.SIGINT)
   197  	for {
   198  		select {
   199  		case sig := <-gracefulStop:
   200  			n.Log.Info("Caught signal", logging.String("name", fmt.Sprintf("%+v", sig)))
   201  			return nil
   202  		case e := <-errCh:
   203  			n.Log.Error("problem starting blockchain", logging.Error(e))
   204  			return e
   205  		case <-n.ctx.Done():
   206  			// nothing to do
   207  			return nil
   208  		}
   209  	}
   210  }
   211  
   212  func (n *Command) stopBlockchain() error {
   213  	if n.blockchainServer == nil {
   214  		return nil
   215  	}
   216  	return n.blockchainServer.Stop()
   217  }
   218  
   219  func (n *Command) Stop() error {
   220  	upStatus := n.protocol.GetProtocolUpgradeService().GetUpgradeStatus()
   221  
   222  	// Blockchain server has been already stopped by the app during the upgrade.
   223  	// Calling stop again would block forever.
   224  	if n.blockchainServer != nil && !upStatus.ReadyToUpgrade {
   225  		n.blockchainServer.Stop()
   226  	}
   227  	if n.protocol != nil {
   228  		n.protocol.Stop()
   229  	}
   230  	if n.grpcServer != nil {
   231  		n.grpcServer.Stop()
   232  	}
   233  	if n.proxyServer != nil {
   234  		n.proxyServer.Stop()
   235  	}
   236  	if n.adminServer != nil {
   237  		n.adminServer.Stop()
   238  	}
   239  
   240  	if n.conf.IsValidator() {
   241  		if err := n.nodeWallets.Ethereum.Cleanup(); err != nil {
   242  			n.Log.Error("couldn't clean up Ethereum node wallet", logging.Error(err))
   243  		}
   244  	}
   245  
   246  	var err error
   247  	if n.pproffhandlr != nil {
   248  		err = n.pproffhandlr.Stop()
   249  	}
   250  
   251  	n.Log.Info("Vega shutdown complete",
   252  		logging.String("version", version.Get()),
   253  		logging.String("version-hash", version.GetCommitHash()))
   254  
   255  	n.Log.Sync()
   256  	n.cancel()
   257  
   258  	// Blockchain server need to be killed as it is stuck in BeginBlock function.
   259  	if upStatus.ReadyToUpgrade {
   260  		return kill()
   261  	}
   262  
   263  	return err
   264  }
   265  
   266  func (n *Command) startAPIs() error {
   267  	n.grpcServer = api.NewGRPC(
   268  		n.Log,
   269  		n.conf.API,
   270  		n.stats,
   271  		n.blockchainClient,
   272  		n.protocol.GetEventForwarder(),
   273  		n.protocol.GetTimeService(),
   274  		n.protocol.GetEventService(),
   275  		n.protocol.GetPoW(),
   276  		n.protocol.GetSpamEngine(),
   277  		n.protocol.GetPowEngine(),
   278  	)
   279  
   280  	n.coreService = coreapi.NewService(n.ctx, n.Log, n.conf.CoreAPI, n.protocol.GetBroker())
   281  	n.grpcServer.RegisterService(func(server *grpc.Server) {
   282  		apipb.RegisterCoreStateServiceServer(server, n.coreService)
   283  	})
   284  
   285  	// watch configs
   286  	n.confWatcher.OnConfigUpdate(
   287  		func(cfg config.Config) { n.grpcServer.ReloadConf(cfg.API) },
   288  	)
   289  
   290  	n.proxyServer = rest.NewProxyServer(n.Log, n.conf.API)
   291  
   292  	if n.conf.IsValidator() {
   293  		adminServer, err := admin.NewValidatorServer(n.Log, n.conf.Admin, n.vegaPaths, n.nodeWalletPassphrase, n.nodeWallets, n.protocol.GetProtocolUpgradeService())
   294  		if err != nil {
   295  			return err
   296  		}
   297  		n.adminServer = adminServer
   298  	} else {
   299  		adminServer, err := admin.NewNonValidatorServer(n.Log, n.conf.Admin, n.protocol.GetProtocolUpgradeService())
   300  		if err != nil {
   301  			return err
   302  		}
   303  		n.adminServer = adminServer
   304  	}
   305  
   306  	go n.grpcServer.Start()
   307  	go n.proxyServer.Start()
   308  
   309  	if n.adminServer != nil {
   310  		go n.adminServer.Start()
   311  	}
   312  
   313  	return nil
   314  }
   315  
   316  func (n *Command) startBlockchain(log *logging.Logger, tmHome, network, networkURL string) error {
   317  	// make sure any env variable is resolved
   318  	tmHome = os.ExpandEnv(tmHome)
   319  	n.abciApp = newAppW(n.protocol.Abci())
   320  
   321  	switch n.conf.Blockchain.ChainProvider {
   322  	case blockchain.ProviderTendermint:
   323  		var err error
   324  		// initialise the node
   325  		n.tmNode, err = n.startABCI(log, n.abciApp, tmHome, network, networkURL)
   326  		if err != nil {
   327  			return err
   328  		}
   329  		n.blockchainServer = blockchain.NewServer(n.Log, n.tmNode)
   330  		// initialise the client
   331  		client, err := n.tmNode.GetClient()
   332  		if err != nil {
   333  			return err
   334  		}
   335  		n.blockchainClient.Set(client, n.tmNode.MempoolSize)
   336  	case blockchain.ProviderNullChain:
   337  		// nullchain acts as both the client and the server because its does everything
   338  		n.nullBlockchain = nullchain.NewClient(
   339  			n.Log,
   340  			n.conf.Blockchain.Null,
   341  			n.protocol.GetTimeService(), // if we've loaded from a snapshot we need to be able to ask the protocol what time its at
   342  		)
   343  		n.nullBlockchain.SetABCIApp(n.abciApp)
   344  		n.blockchainServer = blockchain.NewServer(n.Log, n.nullBlockchain)
   345  		// n.blockchainClient = blockchain.NewClient(n.nullBlockchain)
   346  		n.blockchainClient.Set(n.nullBlockchain, 100*1024*1024)
   347  
   348  	default:
   349  		return ErrUnknownChainProvider
   350  	}
   351  
   352  	n.confWatcher.OnConfigUpdate(
   353  		func(cfg config.Config) { n.blockchainServer.ReloadConf(cfg.Blockchain) },
   354  	)
   355  
   356  	if err := n.blockchainServer.Start(); err != nil {
   357  		return err
   358  	}
   359  
   360  	if err := n.blockchainClient.Start(); err != nil {
   361  		return err
   362  	}
   363  
   364  	return nil
   365  }
   366  
   367  func (n *Command) setupCommon() (err error) {
   368  	// this shouldn't happen, the context is initialized in here
   369  	if n.cancel != nil {
   370  		n.cancel()
   371  	}
   372  
   373  	// ensure we cancel the context on error
   374  	defer func() {
   375  		if err != nil {
   376  			n.cancel()
   377  		}
   378  	}()
   379  
   380  	// initialize the application context
   381  	n.ctx, n.cancel = context.WithCancel(context.Background())
   382  
   383  	// get the configuration, this have been loaded by the root
   384  	conf := n.confWatcher.Get()
   385  
   386  	// reload logger with the setup from configuration
   387  	n.Log = logging.NewLoggerFromConfig(conf.Logging).Named(n.Log.GetName())
   388  
   389  	// enable pprof if necessary
   390  	if conf.Pprof.Enabled {
   391  		n.Log.Info("vega is starting with pprof profile, this is not a recommended setting for production")
   392  		n.pproffhandlr, err = pprof.New(n.Log, conf.Pprof)
   393  		if err != nil {
   394  			return err
   395  		}
   396  		n.confWatcher.OnConfigUpdate(
   397  			func(cfg config.Config) { n.pproffhandlr.ReloadConf(cfg.Pprof) },
   398  		)
   399  	}
   400  
   401  	n.stats = stats.New(n.Log, n.conf.Stats)
   402  
   403  	// start prometheus stuff
   404  	metrics.Start(n.conf.Metrics)
   405  
   406  	return err
   407  }
   408  
   409  func (n *Command) loadNodeWallets() (err error) {
   410  	// if we are a non-validator, nothing needs to be done here
   411  	if !n.conf.IsValidator() {
   412  		return nil
   413  	}
   414  
   415  	n.nodeWallets, err = nodewallets.GetNodeWallets(n.conf.NodeWallet, n.vegaPaths, n.nodeWalletPassphrase)
   416  	if err != nil {
   417  		return fmt.Errorf("couldn't get node wallets: %w", err)
   418  	}
   419  
   420  	return n.nodeWallets.Verify()
   421  }
   422  
   423  func (n *Command) startABCI(log *logging.Logger, app types.Application, tmHome string, network string, networkURL string) (*abci.TmNode, error) {
   424  	var (
   425  		genesisDoc *tmtypes.GenesisDoc
   426  		err        error
   427  	)
   428  	if len(network) > 0 {
   429  		genesisDoc, err = httpGenesisDocProvider(network)
   430  	} else if len(networkURL) > 0 {
   431  		genesisDoc, err = genesisDocHTTPFromURL(networkURL)
   432  	}
   433  	if err != nil {
   434  		return nil, err
   435  	}
   436  
   437  	return abci.NewTmNode(
   438  		n.conf.Blockchain,
   439  		log,
   440  		tmHome,
   441  		app,
   442  		genesisDoc,
   443  	)
   444  }
   445  
   446  func (n *Command) startBlockchainClients() error {
   447  	// just intantiate the client here, we'll setup the actual impl later on
   448  	// when the null blockchain or tendermint is started.
   449  	n.blockchainClient = blockchain.NewClient()
   450  
   451  	// if we are a non-validator, nothing needs to be done here
   452  	if !n.conf.IsValidator() {
   453  		return nil
   454  	}
   455  
   456  	// We may not need ethereum client initialized when we have not
   457  	// provided the ethereum endpoint. We skip creating client here
   458  	// when RPCEnpoint is empty and the nullchain present.
   459  	if n.conf.IsNullChain() && !n.conf.HaveEthClient() {
   460  		return nil
   461  	}
   462  
   463  	var err error
   464  	n.l2Clients, err = ethclient.NewL2Clients(n.ctx, n.Log, n.conf.Ethereum)
   465  	if err != nil {
   466  		return fmt.Errorf("could not instantiate ethereum l2 clients: %w", err)
   467  	}
   468  
   469  	n.primaryEthClient, err = ethclient.PrimaryDial(n.ctx, n.conf.Ethereum)
   470  	if err != nil {
   471  		return fmt.Errorf("could not instantiate primary ethereum client: %w", err)
   472  	}
   473  
   474  	n.secondaryEthClient, err = ethclient.SecondaryDial(n.ctx, n.conf.Ethereum)
   475  	if err != nil {
   476  		return fmt.Errorf("could not instantiate secondary ethereum client: %w", err)
   477  	}
   478  
   479  	n.primaryEthConfirmations = ethclient.NewEthereumConfirmations(n.conf.Ethereum, n.primaryEthClient, nil, ethclient.FinalityStateFinalized)
   480  
   481  	// for arbitrum the finality state of a block is in now way connected to the Arbitrum network reaching consensus so Vega gains nothing
   482  	// from waiting for safe/finalized. Instead we just wait for the event to be seen in the latest block and rely on the consensus check
   483  	// Vega performs itself with node-votes. If each validator is running their own Arbitrum node, or is using a node that they need trustworthy
   484  	// then this is sufficient. A far as is know, block reorgs do not happen on Arbitrum.
   485  	n.secondaryEthConfirmations = ethclient.NewEthereumConfirmations(n.conf.Ethereum, n.secondaryEthClient, nil, ethclient.FinalityStateLatest)
   486  
   487  	return nil
   488  }
   489  
   490  // kill the running process by signaling itself with SIGKILL.
   491  func kill() error {
   492  	p, err := os.FindProcess(os.Getpid())
   493  	if err != nil {
   494  		return err
   495  	}
   496  	return p.Signal(syscall.SIGKILL)
   497  }