github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/component/component.go (about)

     1  package component
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  
     8  	"go.uber.org/atomic"
     9  
    10  	"github.com/onflow/flow-go/module"
    11  	"github.com/onflow/flow-go/module/irrecoverable"
    12  	"github.com/onflow/flow-go/module/util"
    13  )
    14  
    15  // ErrComponentShutdown is returned by a component which has already been shut down.
    16  var ErrComponentShutdown = fmt.Errorf("component has already shut down")
    17  
    18  // Component represents a component which can be started and stopped, and exposes
    19  // channels that close when startup and shutdown have completed.
    20  // Once Start has been called, the channel returned by Done must close eventually,
    21  // whether that be because of a graceful shutdown or an irrecoverable error.
    22  type Component interface {
    23  	module.Startable
    24  	module.ReadyDoneAware
    25  }
    26  
    27  type ComponentFactory func() (Component, error)
    28  
    29  // OnError reacts to an irrecoverable error
    30  // It is meant to inspect the error, determining its type and seeing if e.g. a restart or some other measure is suitable,
    31  // and then return an ErrorHandlingResult indicating how RunComponent should proceed.
    32  // Before returning, it could also:
    33  // - panic (in sandboxnet / benchmark)
    34  // - log in various Error channels and / or send telemetry ...
    35  type OnError = func(error) ErrorHandlingResult
    36  
    37  type ErrorHandlingResult int
    38  
    39  const (
    40  	ErrorHandlingRestart ErrorHandlingResult = iota
    41  	ErrorHandlingStop
    42  )
    43  
    44  // RunComponent repeatedly starts components returned from the given ComponentFactory, shutting them
    45  // down when they encounter irrecoverable errors and passing those errors to the given error handler.
    46  // If the given context is cancelled, it will wait for the current component instance to shutdown
    47  // before returning.
    48  // The returned error is either:
    49  // - The context error if the context was canceled
    50  // - The last error handled if the error handler returns ErrorHandlingStop
    51  // - An error returned from componentFactory while generating an instance of component
    52  func RunComponent(ctx context.Context, componentFactory ComponentFactory, handler OnError) error {
    53  	// reference to per-run signals for the component
    54  	var component Component
    55  	var cancel context.CancelFunc
    56  	var done <-chan struct{}
    57  	var irrecoverableErr <-chan error
    58  
    59  	start := func() error {
    60  		var err error
    61  
    62  		component, err = componentFactory()
    63  		if err != nil {
    64  			return err // failure to generate the component, should be handled out-of-band because a restart won't help
    65  		}
    66  
    67  		// context used to run the component
    68  		var runCtx context.Context
    69  		runCtx, cancel = context.WithCancel(ctx)
    70  
    71  		// signaler context used for irrecoverables
    72  		var signalCtx irrecoverable.SignalerContext
    73  		signalCtx, irrecoverableErr = irrecoverable.WithSignaler(runCtx)
    74  
    75  		// we start the component in a separate goroutine, since an irrecoverable error
    76  		// could be thrown with `signalCtx` which terminates the calling goroutine
    77  		go component.Start(signalCtx)
    78  
    79  		done = component.Done()
    80  
    81  		return nil
    82  	}
    83  
    84  	stop := func() {
    85  		// shutdown the component and wait until it's done
    86  		cancel()
    87  		<-done
    88  	}
    89  
    90  	for {
    91  		select {
    92  		case <-ctx.Done():
    93  			return ctx.Err()
    94  		default:
    95  		}
    96  
    97  		if err := start(); err != nil {
    98  			return err // failure to start
    99  		}
   100  
   101  		if err := util.WaitError(irrecoverableErr, done); err != nil {
   102  			// an irrecoverable error was encountered
   103  			stop()
   104  
   105  			// send error to the handler
   106  			switch result := handler(err); result {
   107  			case ErrorHandlingRestart:
   108  				continue
   109  			case ErrorHandlingStop:
   110  				return err
   111  			default:
   112  				panic(fmt.Sprintf("invalid error handling result: %v", result))
   113  			}
   114  		} else if ctx.Err() != nil {
   115  			// the parent context was cancelled
   116  			stop()
   117  			return ctx.Err()
   118  		}
   119  
   120  		// clean completion
   121  		return nil
   122  	}
   123  }
   124  
   125  // ReadyFunc is called within a ComponentWorker function to indicate that the worker is ready
   126  // ComponentManager's Ready channel is closed when all workers are ready.
   127  type ReadyFunc func()
   128  
   129  // ComponentWorker represents a worker routine of a component.
   130  // It takes a SignalerContext which can be used to throw any irrecoverable errors it encounters,
   131  // as well as a ReadyFunc which must be called to signal that it is ready. The ComponentManager
   132  // waits until all workers have signaled that they are ready before closing its own Ready channel.
   133  type ComponentWorker func(ctx irrecoverable.SignalerContext, ready ReadyFunc)
   134  
   135  // NoopWorker is a worker routine which is immediately ready, does nothing, and
   136  // exits when the context is done.
   137  func NoopWorker(ctx irrecoverable.SignalerContext, ready ReadyFunc) {
   138  	ready()
   139  	<-ctx.Done()
   140  }
   141  
   142  // ComponentManagerBuilder provides a mechanism for building a ComponentManager
   143  type ComponentManagerBuilder interface {
   144  	// AddWorker adds a worker routine for the ComponentManager
   145  	AddWorker(ComponentWorker) ComponentManagerBuilder
   146  
   147  	// Build builds and returns a new ComponentManager instance
   148  	Build() *ComponentManager
   149  }
   150  
   151  type componentManagerBuilderImpl struct {
   152  	workers []ComponentWorker
   153  }
   154  
   155  // NewComponentManagerBuilder returns a new ComponentManagerBuilder
   156  func NewComponentManagerBuilder() ComponentManagerBuilder {
   157  	return &componentManagerBuilderImpl{}
   158  }
   159  
   160  // AddWorker adds a ComponentWorker closure to the ComponentManagerBuilder
   161  // All worker functions will be run in parallel when the ComponentManager is started.
   162  // Note: AddWorker is not concurrency-safe, and should only be called on an individual builder
   163  // within a single goroutine.
   164  func (c *componentManagerBuilderImpl) AddWorker(worker ComponentWorker) ComponentManagerBuilder {
   165  	c.workers = append(c.workers, worker)
   166  	return c
   167  }
   168  
   169  // Build returns a new ComponentManager instance with the configured workers
   170  // Build may be called multiple times to create multiple individual ComponentManagers. This will
   171  // result in the worker routines being called multiple times. If this is unsafe, do not call it
   172  // more than once!
   173  func (c *componentManagerBuilderImpl) Build() *ComponentManager {
   174  	return &ComponentManager{
   175  		started:        atomic.NewBool(false),
   176  		ready:          make(chan struct{}),
   177  		done:           make(chan struct{}),
   178  		workersDone:    make(chan struct{}),
   179  		shutdownSignal: make(chan struct{}),
   180  		workers:        c.workers,
   181  	}
   182  }
   183  
   184  var _ Component = (*ComponentManager)(nil)
   185  
   186  // ComponentManager is used to manage the worker routines of a Component, and implements all of the
   187  // methods required by the Component interface, abstracting them away from individual implementations.
   188  //
   189  // Since component manager implements the Component interface, its Ready() and Done() methods are
   190  // idempotent, and can be called immediately after instantiation. The Ready() channel is closed when
   191  // all worker functions have called their ReadyFunc, and its Done() channel is closed after all worker
   192  // functions have returned.
   193  //
   194  // Shutdown is signalled by cancelling the irrecoverable.SignalerContext passed to Start(). This context
   195  // is also used by workers to communicate irrecoverable errors. All irrecoverable errors are considered
   196  // fatal and are propagated to the caller of Start() via the context's Throw method.
   197  type ComponentManager struct {
   198  	started        *atomic.Bool
   199  	ready          chan struct{}
   200  	done           chan struct{}
   201  	workersDone    chan struct{}
   202  	shutdownSignal chan struct{}
   203  
   204  	workers []ComponentWorker
   205  }
   206  
   207  // Start initiates the ComponentManager by launching all worker routines.
   208  // Start must only be called once. It will panic if called more than once.
   209  func (c *ComponentManager) Start(parent irrecoverable.SignalerContext) {
   210  	// Make sure we only start once. atomically check if started is false then set it to true.
   211  	// If it was not false, panic
   212  	if !c.started.CompareAndSwap(false, true) {
   213  		panic(module.ErrMultipleStartup)
   214  	}
   215  
   216  	ctx, cancel := context.WithCancel(parent)
   217  	signalerCtx, errChan := irrecoverable.WithSignaler(ctx)
   218  
   219  	go c.waitForShutdownSignal(ctx.Done())
   220  
   221  	// launch goroutine to propagate irrecoverable error
   222  	go func() {
   223  		// Closing the done channel here guarantees that any irrecoverable errors encountered will
   224  		// be propagated to the parent first. Otherwise, there's a race condition between when this
   225  		// goroutine and the parent's are scheduled. If the parent is scheduled first, any errors
   226  		// thrown within workers would not have propagated, and it would only receive the done signal
   227  		defer func() {
   228  			cancel() // shutdown all workers
   229  			// wait for shutdown signal before signalling the component is done
   230  			// this guarantees that ShutdownSignal is closed before Done
   231  			<-c.shutdownSignal
   232  			<-c.workersDone
   233  			close(c.done)
   234  		}()
   235  
   236  		// wait until the workersDone channel is closed or an irrecoverable error is encountered
   237  		if err := util.WaitError(errChan, c.workersDone); err != nil {
   238  			// propagate the error directly to the parent because a failure in a worker routine
   239  			// is considered irrecoverable
   240  			parent.Throw(err)
   241  		}
   242  	}()
   243  
   244  	var workersReady sync.WaitGroup
   245  	var workersDone sync.WaitGroup
   246  	workersReady.Add(len(c.workers))
   247  	workersDone.Add(len(c.workers))
   248  
   249  	// launch workers
   250  	for _, worker := range c.workers {
   251  		worker := worker
   252  		go func() {
   253  			defer workersDone.Done()
   254  			var readyOnce sync.Once
   255  			worker(signalerCtx, func() {
   256  				readyOnce.Do(func() {
   257  					workersReady.Done()
   258  				})
   259  			})
   260  		}()
   261  	}
   262  
   263  	// launch goroutine to close ready channel
   264  	go c.waitForReady(&workersReady)
   265  
   266  	// launch goroutine to close workersDone channel
   267  	go c.waitForDone(&workersDone)
   268  }
   269  
   270  func (c *ComponentManager) waitForShutdownSignal(shutdownSignal <-chan struct{}) {
   271  	<-shutdownSignal
   272  	close(c.shutdownSignal)
   273  }
   274  
   275  func (c *ComponentManager) waitForReady(workersReady *sync.WaitGroup) {
   276  	workersReady.Wait()
   277  	close(c.ready)
   278  }
   279  
   280  func (c *ComponentManager) waitForDone(workersDone *sync.WaitGroup) {
   281  	workersDone.Wait()
   282  	close(c.workersDone)
   283  }
   284  
   285  // Ready returns a channel which is closed once all the worker routines have been launched and are ready.
   286  // If any worker routines exit before they indicate that they are ready, the channel returned from Ready will never close.
   287  func (c *ComponentManager) Ready() <-chan struct{} {
   288  	return c.ready
   289  }
   290  
   291  // Done returns a channel which is closed once the ComponentManager has shut down.
   292  // This happens after all worker routines have shut down (either gracefully or by throwing an error).
   293  func (c *ComponentManager) Done() <-chan struct{} {
   294  	return c.done
   295  }
   296  
   297  // ShutdownSignal returns a channel that is closed when shutdown has commenced.
   298  // This can happen either if the ComponentManager's context is canceled, or a worker routine encounters
   299  // an irrecoverable error.
   300  // If this is called before Start, a nil channel will be returned.
   301  func (c *ComponentManager) ShutdownSignal() <-chan struct{} {
   302  	return c.shutdownSignal
   303  }