github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/cmd/node.go (about) 1 package cmd 2 3 import ( 4 "context" 5 "os" 6 "os/signal" 7 "syscall" 8 9 "github.com/rs/zerolog" 10 11 "github.com/onflow/flow-go/module/component" 12 "github.com/onflow/flow-go/module/irrecoverable" 13 "github.com/onflow/flow-go/module/util" 14 "github.com/onflow/flow-go/utils/logging" 15 ) 16 17 var _ component.Component = (*FlowNodeImp)(nil) 18 19 type Node interface { 20 component.Component 21 22 // Run initiates all common components (logger, database, protocol state etc.) 23 // then starts each component. It also sets up a channel to gracefully shut 24 // down each component if a SIGINT is received. 25 Run() 26 } 27 28 // FlowNodeImp is created by the FlowNodeBuilder with all components ready to be started. 29 // The Run function starts all the components, and is blocked until either a termination 30 // signal is received or a irrecoverable error is encountered. 31 type FlowNodeImp struct { 32 component.Component 33 *NodeConfig 34 logger zerolog.Logger 35 postShutdown func() error 36 fatalHandler func(error) 37 } 38 39 // NewNode returns a new node instance 40 func NewNode(component component.Component, cfg *NodeConfig, logger zerolog.Logger, cleanup func() error, handleFatal func(error)) Node { 41 return &FlowNodeImp{ 42 Component: component, 43 NodeConfig: cfg, 44 logger: logger, 45 postShutdown: cleanup, 46 fatalHandler: handleFatal, 47 } 48 } 49 50 // Run starts all the node's components, then blocks until a SIGINT or SIGTERM is received, at 51 // which point it gracefully shuts down. 52 // Any unhandled irrecoverable errors thrown in child components will propagate up to here and 53 // result in a fatal error. 54 func (node *FlowNodeImp) Run() { 55 // Cancelling this context notifies all child components that it's time to shutdown 56 ctx, cancel := context.WithCancel(context.Background()) 57 defer cancel() 58 59 // Block until node is shutting down 60 err := node.run(ctx, cancel) 61 62 // Any error received is considered fatal. 63 if err != nil { 64 node.fatalHandler(err) 65 return 66 } 67 68 // Run post shutdown cleanup logic 69 err = node.postShutdown() 70 71 // Since this occurs after all components have stopped, it is not considered fatal 72 if err != nil { 73 node.logger.Error().Err(err).Msg("error encountered during cleanup") 74 } 75 76 node.logger.Info().Msgf("%s node shutdown complete", node.BaseConfig.NodeRole) 77 } 78 79 // run starts the node and blocks until a SIGINT/SIGTERM is received or an error is encountered. 80 // It returns: 81 // - nil if a termination signal is received, and all components have been gracefully stopped. 82 // - error if a irrecoverable error is received 83 func (node *FlowNodeImp) run(ctx context.Context, shutdown context.CancelFunc) error { 84 // Components will pass unhandled irrecoverable errors to this channel via signalerCtx (or a 85 // child context). Any errors received on this channel should halt the node. 86 signalerCtx, errChan := irrecoverable.WithSignaler(ctx) 87 88 // This context will be marked done when SIGINT/SIGTERM is received. 89 sigCtx, stop := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM) 90 91 // 1: Start up 92 // Start all the components 93 node.Start(signalerCtx) 94 95 // Log when all components have been started 96 go func() { 97 select { 98 case <-node.Ready(): 99 node.logger.Info(). 100 Hex("spork_id", logging.ID(node.SporkID)). 101 Msgf("%s node startup complete", node.BaseConfig.NodeRole) 102 case <-ctx.Done(): 103 } 104 }() 105 106 // 2: Run the node 107 // Block here until either a signal or irrecoverable error is received. 108 err := util.WaitError(errChan, sigCtx.Done()) 109 110 // Stop relaying signals. Subsequent signals will be handled by the OS and will abort the 111 // process. 112 stop() 113 114 // If an irrecoverable error was received, abort 115 if err != nil { 116 return err 117 } 118 119 // 3: Shut down 120 // Send shutdown signal to components 121 node.logger.Info().Msgf("%s node shutting down", node.BaseConfig.NodeRole) 122 shutdown() 123 124 // Block here until all components have stopped or an irrecoverable error is received. 125 return util.WaitError(errChan, node.Done()) 126 }