github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/cmd/node.go (about)

     1  package cmd
     2  
     3  import (
     4  	"context"
     5  	"os"
     6  	"os/signal"
     7  	"syscall"
     8  
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/onflow/flow-go/module/component"
    12  	"github.com/onflow/flow-go/module/irrecoverable"
    13  	"github.com/onflow/flow-go/module/util"
    14  	"github.com/onflow/flow-go/utils/logging"
    15  )
    16  
    17  var _ component.Component = (*FlowNodeImp)(nil)
    18  
    19  type Node interface {
    20  	component.Component
    21  
    22  	// Run initiates all common components (logger, database, protocol state etc.)
    23  	// then starts each component. It also sets up a channel to gracefully shut
    24  	// down each component if a SIGINT is received.
    25  	Run()
    26  }
    27  
    28  // FlowNodeImp is created by the FlowNodeBuilder with all components ready to be started.
    29  // The Run function starts all the components, and is blocked until either a termination
    30  // signal is received or a irrecoverable error is encountered.
    31  type FlowNodeImp struct {
    32  	component.Component
    33  	*NodeConfig
    34  	logger       zerolog.Logger
    35  	postShutdown func() error
    36  	fatalHandler func(error)
    37  }
    38  
    39  // NewNode returns a new node instance
    40  func NewNode(component component.Component, cfg *NodeConfig, logger zerolog.Logger, cleanup func() error, handleFatal func(error)) Node {
    41  	return &FlowNodeImp{
    42  		Component:    component,
    43  		NodeConfig:   cfg,
    44  		logger:       logger,
    45  		postShutdown: cleanup,
    46  		fatalHandler: handleFatal,
    47  	}
    48  }
    49  
    50  // Run starts all the node's components, then blocks until a SIGINT or SIGTERM is received, at
    51  // which point it gracefully shuts down.
    52  // Any unhandled irrecoverable errors thrown in child components will propagate up to here and
    53  // result in a fatal error.
    54  func (node *FlowNodeImp) Run() {
    55  	// Cancelling this context notifies all child components that it's time to shutdown
    56  	ctx, cancel := context.WithCancel(context.Background())
    57  	defer cancel()
    58  
    59  	// Block until node is shutting down
    60  	err := node.run(ctx, cancel)
    61  
    62  	// Any error received is considered fatal.
    63  	if err != nil {
    64  		node.fatalHandler(err)
    65  		return
    66  	}
    67  
    68  	// Run post shutdown cleanup logic
    69  	err = node.postShutdown()
    70  
    71  	// Since this occurs after all components have stopped, it is not considered fatal
    72  	if err != nil {
    73  		node.logger.Error().Err(err).Msg("error encountered during cleanup")
    74  	}
    75  
    76  	node.logger.Info().Msgf("%s node shutdown complete", node.BaseConfig.NodeRole)
    77  }
    78  
    79  // run starts the node and blocks until a SIGINT/SIGTERM is received or an error is encountered.
    80  // It returns:
    81  //   - nil if a termination signal is received, and all components have been gracefully stopped.
    82  //   - error if a irrecoverable error is received
    83  func (node *FlowNodeImp) run(ctx context.Context, shutdown context.CancelFunc) error {
    84  	// Components will pass unhandled irrecoverable errors to this channel via signalerCtx (or a
    85  	// child context). Any errors received on this channel should halt the node.
    86  	signalerCtx, errChan := irrecoverable.WithSignaler(ctx)
    87  
    88  	// This context will be marked done when SIGINT/SIGTERM is received.
    89  	sigCtx, stop := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
    90  
    91  	// 1: Start up
    92  	// Start all the components
    93  	node.Start(signalerCtx)
    94  
    95  	// Log when all components have been started
    96  	go func() {
    97  		select {
    98  		case <-node.Ready():
    99  			node.logger.Info().
   100  				Hex("spork_id", logging.ID(node.SporkID)).
   101  				Msgf("%s node startup complete", node.BaseConfig.NodeRole)
   102  		case <-ctx.Done():
   103  		}
   104  	}()
   105  
   106  	// 2: Run the node
   107  	// Block here until either a signal or irrecoverable error is received.
   108  	err := util.WaitError(errChan, sigCtx.Done())
   109  
   110  	// Stop relaying signals. Subsequent signals will be handled by the OS and will abort the
   111  	// process.
   112  	stop()
   113  
   114  	// If an irrecoverable error was received, abort
   115  	if err != nil {
   116  		return err
   117  	}
   118  
   119  	// 3: Shut down
   120  	// Send shutdown signal to components
   121  	node.logger.Info().Msgf("%s node shutting down", node.BaseConfig.NodeRole)
   122  	shutdown()
   123  
   124  	// Block here until all components have stopped or an irrecoverable error is received.
   125  	return util.WaitError(errChan, node.Done())
   126  }