github.com/koko1123/flow-go-1@v0.29.6/module/component/component.go (about) 1 package component 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 8 "go.uber.org/atomic" 9 10 "github.com/koko1123/flow-go-1/module" 11 "github.com/koko1123/flow-go-1/module/irrecoverable" 12 "github.com/koko1123/flow-go-1/module/util" 13 ) 14 15 // ErrComponentShutdown is returned by a component which has already been shut down. 16 var ErrComponentShutdown = fmt.Errorf("component has already shut down") 17 18 // Component represents a component which can be started and stopped, and exposes 19 // channels that close when startup and shutdown have completed. 20 // Once Start has been called, the channel returned by Done must close eventually, 21 // whether that be because of a graceful shutdown or an irrecoverable error. 22 type Component interface { 23 module.Startable 24 module.ReadyDoneAware 25 } 26 27 type ComponentFactory func() (Component, error) 28 29 // OnError reacts to an irrecoverable error 30 // It is meant to inspect the error, determining its type and seeing if e.g. a restart or some other measure is suitable, 31 // and then return an ErrorHandlingResult indicating how RunComponent should proceed. 32 // Before returning, it could also: 33 // - panic (in sandboxnet / benchmark) 34 // - log in various Error channels and / or send telemetry ... 35 type OnError = func(error) ErrorHandlingResult 36 37 type ErrorHandlingResult int 38 39 const ( 40 ErrorHandlingRestart ErrorHandlingResult = iota 41 ErrorHandlingStop 42 ) 43 44 // RunComponent repeatedly starts components returned from the given ComponentFactory, shutting them 45 // down when they encounter irrecoverable errors and passing those errors to the given error handler. 46 // If the given context is cancelled, it will wait for the current component instance to shutdown 47 // before returning. 48 // The returned error is either: 49 // - The context error if the context was canceled 50 // - The last error handled if the error handler returns ErrorHandlingStop 51 // - An error returned from componentFactory while generating an instance of component 52 func RunComponent(ctx context.Context, componentFactory ComponentFactory, handler OnError) error { 53 // reference to per-run signals for the component 54 var component Component 55 var cancel context.CancelFunc 56 var done <-chan struct{} 57 var irrecoverableErr <-chan error 58 59 start := func() error { 60 var err error 61 62 component, err = componentFactory() 63 if err != nil { 64 return err // failure to generate the component, should be handled out-of-band because a restart won't help 65 } 66 67 // context used to run the component 68 var runCtx context.Context 69 runCtx, cancel = context.WithCancel(ctx) 70 71 // signaler context used for irrecoverables 72 var signalCtx irrecoverable.SignalerContext 73 signalCtx, irrecoverableErr = irrecoverable.WithSignaler(runCtx) 74 75 // we start the component in a separate goroutine, since an irrecoverable error 76 // could be thrown with `signalCtx` which terminates the calling goroutine 77 go component.Start(signalCtx) 78 79 done = component.Done() 80 81 return nil 82 } 83 84 stop := func() { 85 // shutdown the component and wait until it's done 86 cancel() 87 <-done 88 } 89 90 for { 91 select { 92 case <-ctx.Done(): 93 return ctx.Err() 94 default: 95 } 96 97 if err := start(); err != nil { 98 return err // failure to start 99 } 100 101 if err := util.WaitError(irrecoverableErr, done); err != nil { 102 // an irrecoverable error was encountered 103 stop() 104 105 // send error to the handler 106 switch result := handler(err); result { 107 case ErrorHandlingRestart: 108 continue 109 case ErrorHandlingStop: 110 return err 111 default: 112 panic(fmt.Sprintf("invalid error handling result: %v", result)) 113 } 114 } else if ctx.Err() != nil { 115 // the parent context was cancelled 116 stop() 117 return ctx.Err() 118 } 119 120 // clean completion 121 return nil 122 } 123 } 124 125 // ReadyFunc is called within a ComponentWorker function to indicate that the worker is ready 126 // ComponentManager's Ready channel is closed when all workers are ready. 127 type ReadyFunc func() 128 129 // ComponentWorker represents a worker routine of a component. 130 // It takes a SignalerContext which can be used to throw any irrecoverable errors it encounters, 131 // as well as a ReadyFunc which must be called to signal that it is ready. The ComponentManager 132 // waits until all workers have signaled that they are ready before closing its own Ready channel. 133 type ComponentWorker func(ctx irrecoverable.SignalerContext, ready ReadyFunc) 134 135 // ComponentManagerBuilder provides a mechanism for building a ComponentManager 136 type ComponentManagerBuilder interface { 137 // AddWorker adds a worker routine for the ComponentManager 138 AddWorker(ComponentWorker) ComponentManagerBuilder 139 140 // Build builds and returns a new ComponentManager instance 141 Build() *ComponentManager 142 } 143 144 type componentManagerBuilderImpl struct { 145 workers []ComponentWorker 146 } 147 148 // NewComponentManagerBuilder returns a new ComponentManagerBuilder 149 func NewComponentManagerBuilder() ComponentManagerBuilder { 150 return &componentManagerBuilderImpl{} 151 } 152 153 // AddWorker adds a ComponentWorker closure to the ComponentManagerBuilder 154 // All worker functions will be run in parallel when the ComponentManager is started. 155 // Note: AddWorker is not concurrency-safe, and should only be called on an individual builder 156 // within a single goroutine. 157 func (c *componentManagerBuilderImpl) AddWorker(worker ComponentWorker) ComponentManagerBuilder { 158 c.workers = append(c.workers, worker) 159 return c 160 } 161 162 // Build returns a new ComponentManager instance with the configured workers 163 // Build may be called multiple times to create multiple individual ComponentManagers. This will 164 // result in the worker routines being called multiple times. If this is unsafe, do not call it 165 // more than once! 166 func (c *componentManagerBuilderImpl) Build() *ComponentManager { 167 return &ComponentManager{ 168 started: atomic.NewBool(false), 169 ready: make(chan struct{}), 170 done: make(chan struct{}), 171 workersDone: make(chan struct{}), 172 shutdownSignal: make(chan struct{}), 173 workers: c.workers, 174 } 175 } 176 177 var _ Component = (*ComponentManager)(nil) 178 179 // ComponentManager is used to manage the worker routines of a Component, and implements all of the 180 // methods required by the Component interface, abstracting them away from individual implementations. 181 // 182 // Since component manager implements the Component interface, its Ready() and Done() methods are 183 // idempotent, and can be called immediately after instantiation. The Ready() channel is closed when 184 // all worker functions have called their ReadyFunc, and its Done() channel is closed after all worker 185 // functions have returned. 186 // 187 // Shutdown is signalled by cancelling the irrecoverable.SignalerContext passed to Start(). This context 188 // is also used by workers to communicate irrecoverable errors. All irrecoverable errors are considered 189 // fatal and are propagated to the caller of Start() via the context's Throw method. 190 type ComponentManager struct { 191 started *atomic.Bool 192 ready chan struct{} 193 done chan struct{} 194 workersDone chan struct{} 195 shutdownSignal chan struct{} 196 197 workers []ComponentWorker 198 } 199 200 // Start initiates the ComponentManager by launching all worker routines. 201 // Start must only be called once. It will panic if called more than once. 202 func (c *ComponentManager) Start(parent irrecoverable.SignalerContext) { 203 // Make sure we only start once. atomically check if started is false then set it to true. 204 // If it was not false, panic 205 if !c.started.CompareAndSwap(false, true) { 206 panic(module.ErrMultipleStartup) 207 } 208 209 ctx, cancel := context.WithCancel(parent) 210 signalerCtx, errChan := irrecoverable.WithSignaler(ctx) 211 212 go c.waitForShutdownSignal(ctx.Done()) 213 214 // launch goroutine to propagate irrecoverable error 215 go func() { 216 // Closing the done channel here guarantees that any irrecoverable errors encountered will 217 // be propagated to the parent first. Otherwise, there's a race condition between when this 218 // goroutine and the parent's are scheduled. If the parent is scheduled first, any errors 219 // thrown within workers would not have propagated, and it would only receive the done signal 220 defer func() { 221 cancel() // shutdown all workers 222 // wait for shutdown signal before signalling the component is done 223 // this guarantees that ShutdownSignal is closed before Done 224 <-c.shutdownSignal 225 <-c.workersDone 226 close(c.done) 227 }() 228 229 // wait until the workersDone channel is closed or an irrecoverable error is encountered 230 if err := util.WaitError(errChan, c.workersDone); err != nil { 231 // propagate the error directly to the parent because a failure in a worker routine 232 // is considered irrecoverable 233 parent.Throw(err) 234 } 235 }() 236 237 var workersReady sync.WaitGroup 238 var workersDone sync.WaitGroup 239 workersReady.Add(len(c.workers)) 240 workersDone.Add(len(c.workers)) 241 242 // launch workers 243 for _, worker := range c.workers { 244 worker := worker 245 go func() { 246 defer workersDone.Done() 247 var readyOnce sync.Once 248 worker(signalerCtx, func() { 249 readyOnce.Do(func() { 250 workersReady.Done() 251 }) 252 }) 253 }() 254 } 255 256 // launch goroutine to close ready channel 257 go c.waitForReady(&workersReady) 258 259 // launch goroutine to close workersDone channel 260 go c.waitForDone(&workersDone) 261 } 262 263 func (c *ComponentManager) waitForShutdownSignal(shutdownSignal <-chan struct{}) { 264 <-shutdownSignal 265 close(c.shutdownSignal) 266 } 267 268 func (c *ComponentManager) waitForReady(workersReady *sync.WaitGroup) { 269 workersReady.Wait() 270 close(c.ready) 271 } 272 273 func (c *ComponentManager) waitForDone(workersDone *sync.WaitGroup) { 274 workersDone.Wait() 275 close(c.workersDone) 276 } 277 278 // Ready returns a channel which is closed once all the worker routines have been launched and are ready. 279 // If any worker routines exit before they indicate that they are ready, the channel returned from Ready will never close. 280 func (c *ComponentManager) Ready() <-chan struct{} { 281 return c.ready 282 } 283 284 // Done returns a channel which is closed once the ComponentManager has shut down. 285 // This happens after all worker routines have shut down (either gracefully or by throwing an error). 286 func (c *ComponentManager) Done() <-chan struct{} { 287 return c.done 288 } 289 290 // ShutdownSignal returns a channel that is closed when shutdown has commenced. 291 // This can happen either if the ComponentManager's context is canceled, or a worker routine encounters 292 // an irrecoverable error. 293 // If this is called before Start, a nil channel will be returned. 294 func (c *ComponentManager) ShutdownSignal() <-chan struct{} { 295 return c.shutdownSignal 296 }