github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/component/component.go (about) 1 package component 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 8 "go.uber.org/atomic" 9 10 "github.com/onflow/flow-go/module" 11 "github.com/onflow/flow-go/module/irrecoverable" 12 "github.com/onflow/flow-go/module/util" 13 ) 14 15 // ErrComponentShutdown is returned by a component which has already been shut down. 16 var ErrComponentShutdown = fmt.Errorf("component has already shut down") 17 18 // Component represents a component which can be started and stopped, and exposes 19 // channels that close when startup and shutdown have completed. 20 // Once Start has been called, the channel returned by Done must close eventually, 21 // whether that be because of a graceful shutdown or an irrecoverable error. 22 type Component interface { 23 module.Startable 24 module.ReadyDoneAware 25 } 26 27 type ComponentFactory func() (Component, error) 28 29 // OnError reacts to an irrecoverable error 30 // It is meant to inspect the error, determining its type and seeing if e.g. a restart or some other measure is suitable, 31 // and then return an ErrorHandlingResult indicating how RunComponent should proceed. 32 // Before returning, it could also: 33 // - panic (in sandboxnet / benchmark) 34 // - log in various Error channels and / or send telemetry ... 35 type OnError = func(error) ErrorHandlingResult 36 37 type ErrorHandlingResult int 38 39 const ( 40 ErrorHandlingRestart ErrorHandlingResult = iota 41 ErrorHandlingStop 42 ) 43 44 // RunComponent repeatedly starts components returned from the given ComponentFactory, shutting them 45 // down when they encounter irrecoverable errors and passing those errors to the given error handler. 46 // If the given context is cancelled, it will wait for the current component instance to shutdown 47 // before returning. 48 // The returned error is either: 49 // - The context error if the context was canceled 50 // - The last error handled if the error handler returns ErrorHandlingStop 51 // - An error returned from componentFactory while generating an instance of component 52 func RunComponent(ctx context.Context, componentFactory ComponentFactory, handler OnError) error { 53 // reference to per-run signals for the component 54 var component Component 55 var cancel context.CancelFunc 56 var done <-chan struct{} 57 var irrecoverableErr <-chan error 58 59 start := func() error { 60 var err error 61 62 component, err = componentFactory() 63 if err != nil { 64 return err // failure to generate the component, should be handled out-of-band because a restart won't help 65 } 66 67 // context used to run the component 68 var runCtx context.Context 69 runCtx, cancel = context.WithCancel(ctx) 70 71 // signaler context used for irrecoverables 72 var signalCtx irrecoverable.SignalerContext 73 signalCtx, irrecoverableErr = irrecoverable.WithSignaler(runCtx) 74 75 // we start the component in a separate goroutine, since an irrecoverable error 76 // could be thrown with `signalCtx` which terminates the calling goroutine 77 go component.Start(signalCtx) 78 79 done = component.Done() 80 81 return nil 82 } 83 84 stop := func() { 85 // shutdown the component and wait until it's done 86 cancel() 87 <-done 88 } 89 90 for { 91 select { 92 case <-ctx.Done(): 93 return ctx.Err() 94 default: 95 } 96 97 if err := start(); err != nil { 98 return err // failure to start 99 } 100 101 if err := util.WaitError(irrecoverableErr, done); err != nil { 102 // an irrecoverable error was encountered 103 stop() 104 105 // send error to the handler 106 switch result := handler(err); result { 107 case ErrorHandlingRestart: 108 continue 109 case ErrorHandlingStop: 110 return err 111 default: 112 panic(fmt.Sprintf("invalid error handling result: %v", result)) 113 } 114 } else if ctx.Err() != nil { 115 // the parent context was cancelled 116 stop() 117 return ctx.Err() 118 } 119 120 // clean completion 121 return nil 122 } 123 } 124 125 // ReadyFunc is called within a ComponentWorker function to indicate that the worker is ready 126 // ComponentManager's Ready channel is closed when all workers are ready. 127 type ReadyFunc func() 128 129 // ComponentWorker represents a worker routine of a component. 130 // It takes a SignalerContext which can be used to throw any irrecoverable errors it encounters, 131 // as well as a ReadyFunc which must be called to signal that it is ready. The ComponentManager 132 // waits until all workers have signaled that they are ready before closing its own Ready channel. 133 type ComponentWorker func(ctx irrecoverable.SignalerContext, ready ReadyFunc) 134 135 // NoopWorker is a worker routine which is immediately ready, does nothing, and 136 // exits when the context is done. 137 func NoopWorker(ctx irrecoverable.SignalerContext, ready ReadyFunc) { 138 ready() 139 <-ctx.Done() 140 } 141 142 // ComponentManagerBuilder provides a mechanism for building a ComponentManager 143 type ComponentManagerBuilder interface { 144 // AddWorker adds a worker routine for the ComponentManager 145 AddWorker(ComponentWorker) ComponentManagerBuilder 146 147 // Build builds and returns a new ComponentManager instance 148 Build() *ComponentManager 149 } 150 151 type componentManagerBuilderImpl struct { 152 workers []ComponentWorker 153 } 154 155 // NewComponentManagerBuilder returns a new ComponentManagerBuilder 156 func NewComponentManagerBuilder() ComponentManagerBuilder { 157 return &componentManagerBuilderImpl{} 158 } 159 160 // AddWorker adds a ComponentWorker closure to the ComponentManagerBuilder 161 // All worker functions will be run in parallel when the ComponentManager is started. 162 // Note: AddWorker is not concurrency-safe, and should only be called on an individual builder 163 // within a single goroutine. 164 func (c *componentManagerBuilderImpl) AddWorker(worker ComponentWorker) ComponentManagerBuilder { 165 c.workers = append(c.workers, worker) 166 return c 167 } 168 169 // Build returns a new ComponentManager instance with the configured workers 170 // Build may be called multiple times to create multiple individual ComponentManagers. This will 171 // result in the worker routines being called multiple times. If this is unsafe, do not call it 172 // more than once! 173 func (c *componentManagerBuilderImpl) Build() *ComponentManager { 174 return &ComponentManager{ 175 started: atomic.NewBool(false), 176 ready: make(chan struct{}), 177 done: make(chan struct{}), 178 workersDone: make(chan struct{}), 179 shutdownSignal: make(chan struct{}), 180 workers: c.workers, 181 } 182 } 183 184 var _ Component = (*ComponentManager)(nil) 185 186 // ComponentManager is used to manage the worker routines of a Component, and implements all of the 187 // methods required by the Component interface, abstracting them away from individual implementations. 188 // 189 // Since component manager implements the Component interface, its Ready() and Done() methods are 190 // idempotent, and can be called immediately after instantiation. The Ready() channel is closed when 191 // all worker functions have called their ReadyFunc, and its Done() channel is closed after all worker 192 // functions have returned. 193 // 194 // Shutdown is signalled by cancelling the irrecoverable.SignalerContext passed to Start(). This context 195 // is also used by workers to communicate irrecoverable errors. All irrecoverable errors are considered 196 // fatal and are propagated to the caller of Start() via the context's Throw method. 197 type ComponentManager struct { 198 started *atomic.Bool 199 ready chan struct{} 200 done chan struct{} 201 workersDone chan struct{} 202 shutdownSignal chan struct{} 203 204 workers []ComponentWorker 205 } 206 207 // Start initiates the ComponentManager by launching all worker routines. 208 // Start must only be called once. It will panic if called more than once. 209 func (c *ComponentManager) Start(parent irrecoverable.SignalerContext) { 210 // Make sure we only start once. atomically check if started is false then set it to true. 211 // If it was not false, panic 212 if !c.started.CompareAndSwap(false, true) { 213 panic(module.ErrMultipleStartup) 214 } 215 216 ctx, cancel := context.WithCancel(parent) 217 signalerCtx, errChan := irrecoverable.WithSignaler(ctx) 218 219 go c.waitForShutdownSignal(ctx.Done()) 220 221 // launch goroutine to propagate irrecoverable error 222 go func() { 223 // Closing the done channel here guarantees that any irrecoverable errors encountered will 224 // be propagated to the parent first. Otherwise, there's a race condition between when this 225 // goroutine and the parent's are scheduled. If the parent is scheduled first, any errors 226 // thrown within workers would not have propagated, and it would only receive the done signal 227 defer func() { 228 cancel() // shutdown all workers 229 // wait for shutdown signal before signalling the component is done 230 // this guarantees that ShutdownSignal is closed before Done 231 <-c.shutdownSignal 232 <-c.workersDone 233 close(c.done) 234 }() 235 236 // wait until the workersDone channel is closed or an irrecoverable error is encountered 237 if err := util.WaitError(errChan, c.workersDone); err != nil { 238 // propagate the error directly to the parent because a failure in a worker routine 239 // is considered irrecoverable 240 parent.Throw(err) 241 } 242 }() 243 244 var workersReady sync.WaitGroup 245 var workersDone sync.WaitGroup 246 workersReady.Add(len(c.workers)) 247 workersDone.Add(len(c.workers)) 248 249 // launch workers 250 for _, worker := range c.workers { 251 worker := worker 252 go func() { 253 defer workersDone.Done() 254 var readyOnce sync.Once 255 worker(signalerCtx, func() { 256 readyOnce.Do(func() { 257 workersReady.Done() 258 }) 259 }) 260 }() 261 } 262 263 // launch goroutine to close ready channel 264 go c.waitForReady(&workersReady) 265 266 // launch goroutine to close workersDone channel 267 go c.waitForDone(&workersDone) 268 } 269 270 func (c *ComponentManager) waitForShutdownSignal(shutdownSignal <-chan struct{}) { 271 <-shutdownSignal 272 close(c.shutdownSignal) 273 } 274 275 func (c *ComponentManager) waitForReady(workersReady *sync.WaitGroup) { 276 workersReady.Wait() 277 close(c.ready) 278 } 279 280 func (c *ComponentManager) waitForDone(workersDone *sync.WaitGroup) { 281 workersDone.Wait() 282 close(c.workersDone) 283 } 284 285 // Ready returns a channel which is closed once all the worker routines have been launched and are ready. 286 // If any worker routines exit before they indicate that they are ready, the channel returned from Ready will never close. 287 func (c *ComponentManager) Ready() <-chan struct{} { 288 return c.ready 289 } 290 291 // Done returns a channel which is closed once the ComponentManager has shut down. 292 // This happens after all worker routines have shut down (either gracefully or by throwing an error). 293 func (c *ComponentManager) Done() <-chan struct{} { 294 return c.done 295 } 296 297 // ShutdownSignal returns a channel that is closed when shutdown has commenced. 298 // This can happen either if the ComponentManager's context is canceled, or a worker routine encounters 299 // an irrecoverable error. 300 // If this is called before Start, a nil channel will be returned. 301 func (c *ComponentManager) ShutdownSignal() <-chan struct{} { 302 return c.shutdownSignal 303 }