github.com/koko1123/flow-go-1@v0.29.6/module/component/component.go (about)

     1  package component
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  
     8  	"go.uber.org/atomic"
     9  
    10  	"github.com/koko1123/flow-go-1/module"
    11  	"github.com/koko1123/flow-go-1/module/irrecoverable"
    12  	"github.com/koko1123/flow-go-1/module/util"
    13  )
    14  
    15  // ErrComponentShutdown is returned by a component which has already been shut down.
    16  var ErrComponentShutdown = fmt.Errorf("component has already shut down")
    17  
    18  // Component represents a component which can be started and stopped, and exposes
    19  // channels that close when startup and shutdown have completed.
    20  // Once Start has been called, the channel returned by Done must close eventually,
    21  // whether that be because of a graceful shutdown or an irrecoverable error.
    22  type Component interface {
    23  	module.Startable
    24  	module.ReadyDoneAware
    25  }
    26  
    27  type ComponentFactory func() (Component, error)
    28  
    29  // OnError reacts to an irrecoverable error
    30  // It is meant to inspect the error, determining its type and seeing if e.g. a restart or some other measure is suitable,
    31  // and then return an ErrorHandlingResult indicating how RunComponent should proceed.
    32  // Before returning, it could also:
    33  // - panic (in sandboxnet / benchmark)
    34  // - log in various Error channels and / or send telemetry ...
    35  type OnError = func(error) ErrorHandlingResult
    36  
    37  type ErrorHandlingResult int
    38  
    39  const (
    40  	ErrorHandlingRestart ErrorHandlingResult = iota
    41  	ErrorHandlingStop
    42  )
    43  
    44  // RunComponent repeatedly starts components returned from the given ComponentFactory, shutting them
    45  // down when they encounter irrecoverable errors and passing those errors to the given error handler.
    46  // If the given context is cancelled, it will wait for the current component instance to shutdown
    47  // before returning.
    48  // The returned error is either:
    49  // - The context error if the context was canceled
    50  // - The last error handled if the error handler returns ErrorHandlingStop
    51  // - An error returned from componentFactory while generating an instance of component
    52  func RunComponent(ctx context.Context, componentFactory ComponentFactory, handler OnError) error {
    53  	// reference to per-run signals for the component
    54  	var component Component
    55  	var cancel context.CancelFunc
    56  	var done <-chan struct{}
    57  	var irrecoverableErr <-chan error
    58  
    59  	start := func() error {
    60  		var err error
    61  
    62  		component, err = componentFactory()
    63  		if err != nil {
    64  			return err // failure to generate the component, should be handled out-of-band because a restart won't help
    65  		}
    66  
    67  		// context used to run the component
    68  		var runCtx context.Context
    69  		runCtx, cancel = context.WithCancel(ctx)
    70  
    71  		// signaler context used for irrecoverables
    72  		var signalCtx irrecoverable.SignalerContext
    73  		signalCtx, irrecoverableErr = irrecoverable.WithSignaler(runCtx)
    74  
    75  		// we start the component in a separate goroutine, since an irrecoverable error
    76  		// could be thrown with `signalCtx` which terminates the calling goroutine
    77  		go component.Start(signalCtx)
    78  
    79  		done = component.Done()
    80  
    81  		return nil
    82  	}
    83  
    84  	stop := func() {
    85  		// shutdown the component and wait until it's done
    86  		cancel()
    87  		<-done
    88  	}
    89  
    90  	for {
    91  		select {
    92  		case <-ctx.Done():
    93  			return ctx.Err()
    94  		default:
    95  		}
    96  
    97  		if err := start(); err != nil {
    98  			return err // failure to start
    99  		}
   100  
   101  		if err := util.WaitError(irrecoverableErr, done); err != nil {
   102  			// an irrecoverable error was encountered
   103  			stop()
   104  
   105  			// send error to the handler
   106  			switch result := handler(err); result {
   107  			case ErrorHandlingRestart:
   108  				continue
   109  			case ErrorHandlingStop:
   110  				return err
   111  			default:
   112  				panic(fmt.Sprintf("invalid error handling result: %v", result))
   113  			}
   114  		} else if ctx.Err() != nil {
   115  			// the parent context was cancelled
   116  			stop()
   117  			return ctx.Err()
   118  		}
   119  
   120  		// clean completion
   121  		return nil
   122  	}
   123  }
   124  
   125  // ReadyFunc is called within a ComponentWorker function to indicate that the worker is ready
   126  // ComponentManager's Ready channel is closed when all workers are ready.
   127  type ReadyFunc func()
   128  
   129  // ComponentWorker represents a worker routine of a component.
   130  // It takes a SignalerContext which can be used to throw any irrecoverable errors it encounters,
   131  // as well as a ReadyFunc which must be called to signal that it is ready. The ComponentManager
   132  // waits until all workers have signaled that they are ready before closing its own Ready channel.
   133  type ComponentWorker func(ctx irrecoverable.SignalerContext, ready ReadyFunc)
   134  
   135  // ComponentManagerBuilder provides a mechanism for building a ComponentManager
   136  type ComponentManagerBuilder interface {
   137  	// AddWorker adds a worker routine for the ComponentManager
   138  	AddWorker(ComponentWorker) ComponentManagerBuilder
   139  
   140  	// Build builds and returns a new ComponentManager instance
   141  	Build() *ComponentManager
   142  }
   143  
   144  type componentManagerBuilderImpl struct {
   145  	workers []ComponentWorker
   146  }
   147  
   148  // NewComponentManagerBuilder returns a new ComponentManagerBuilder
   149  func NewComponentManagerBuilder() ComponentManagerBuilder {
   150  	return &componentManagerBuilderImpl{}
   151  }
   152  
   153  // AddWorker adds a ComponentWorker closure to the ComponentManagerBuilder
   154  // All worker functions will be run in parallel when the ComponentManager is started.
   155  // Note: AddWorker is not concurrency-safe, and should only be called on an individual builder
   156  // within a single goroutine.
   157  func (c *componentManagerBuilderImpl) AddWorker(worker ComponentWorker) ComponentManagerBuilder {
   158  	c.workers = append(c.workers, worker)
   159  	return c
   160  }
   161  
   162  // Build returns a new ComponentManager instance with the configured workers
   163  // Build may be called multiple times to create multiple individual ComponentManagers. This will
   164  // result in the worker routines being called multiple times. If this is unsafe, do not call it
   165  // more than once!
   166  func (c *componentManagerBuilderImpl) Build() *ComponentManager {
   167  	return &ComponentManager{
   168  		started:        atomic.NewBool(false),
   169  		ready:          make(chan struct{}),
   170  		done:           make(chan struct{}),
   171  		workersDone:    make(chan struct{}),
   172  		shutdownSignal: make(chan struct{}),
   173  		workers:        c.workers,
   174  	}
   175  }
   176  
   177  var _ Component = (*ComponentManager)(nil)
   178  
   179  // ComponentManager is used to manage the worker routines of a Component, and implements all of the
   180  // methods required by the Component interface, abstracting them away from individual implementations.
   181  //
   182  // Since component manager implements the Component interface, its Ready() and Done() methods are
   183  // idempotent, and can be called immediately after instantiation. The Ready() channel is closed when
   184  // all worker functions have called their ReadyFunc, and its Done() channel is closed after all worker
   185  // functions have returned.
   186  //
   187  // Shutdown is signalled by cancelling the irrecoverable.SignalerContext passed to Start(). This context
   188  // is also used by workers to communicate irrecoverable errors. All irrecoverable errors are considered
   189  // fatal and are propagated to the caller of Start() via the context's Throw method.
   190  type ComponentManager struct {
   191  	started        *atomic.Bool
   192  	ready          chan struct{}
   193  	done           chan struct{}
   194  	workersDone    chan struct{}
   195  	shutdownSignal chan struct{}
   196  
   197  	workers []ComponentWorker
   198  }
   199  
   200  // Start initiates the ComponentManager by launching all worker routines.
   201  // Start must only be called once. It will panic if called more than once.
   202  func (c *ComponentManager) Start(parent irrecoverable.SignalerContext) {
   203  	// Make sure we only start once. atomically check if started is false then set it to true.
   204  	// If it was not false, panic
   205  	if !c.started.CompareAndSwap(false, true) {
   206  		panic(module.ErrMultipleStartup)
   207  	}
   208  
   209  	ctx, cancel := context.WithCancel(parent)
   210  	signalerCtx, errChan := irrecoverable.WithSignaler(ctx)
   211  
   212  	go c.waitForShutdownSignal(ctx.Done())
   213  
   214  	// launch goroutine to propagate irrecoverable error
   215  	go func() {
   216  		// Closing the done channel here guarantees that any irrecoverable errors encountered will
   217  		// be propagated to the parent first. Otherwise, there's a race condition between when this
   218  		// goroutine and the parent's are scheduled. If the parent is scheduled first, any errors
   219  		// thrown within workers would not have propagated, and it would only receive the done signal
   220  		defer func() {
   221  			cancel() // shutdown all workers
   222  			// wait for shutdown signal before signalling the component is done
   223  			// this guarantees that ShutdownSignal is closed before Done
   224  			<-c.shutdownSignal
   225  			<-c.workersDone
   226  			close(c.done)
   227  		}()
   228  
   229  		// wait until the workersDone channel is closed or an irrecoverable error is encountered
   230  		if err := util.WaitError(errChan, c.workersDone); err != nil {
   231  			// propagate the error directly to the parent because a failure in a worker routine
   232  			// is considered irrecoverable
   233  			parent.Throw(err)
   234  		}
   235  	}()
   236  
   237  	var workersReady sync.WaitGroup
   238  	var workersDone sync.WaitGroup
   239  	workersReady.Add(len(c.workers))
   240  	workersDone.Add(len(c.workers))
   241  
   242  	// launch workers
   243  	for _, worker := range c.workers {
   244  		worker := worker
   245  		go func() {
   246  			defer workersDone.Done()
   247  			var readyOnce sync.Once
   248  			worker(signalerCtx, func() {
   249  				readyOnce.Do(func() {
   250  					workersReady.Done()
   251  				})
   252  			})
   253  		}()
   254  	}
   255  
   256  	// launch goroutine to close ready channel
   257  	go c.waitForReady(&workersReady)
   258  
   259  	// launch goroutine to close workersDone channel
   260  	go c.waitForDone(&workersDone)
   261  }
   262  
   263  func (c *ComponentManager) waitForShutdownSignal(shutdownSignal <-chan struct{}) {
   264  	<-shutdownSignal
   265  	close(c.shutdownSignal)
   266  }
   267  
   268  func (c *ComponentManager) waitForReady(workersReady *sync.WaitGroup) {
   269  	workersReady.Wait()
   270  	close(c.ready)
   271  }
   272  
   273  func (c *ComponentManager) waitForDone(workersDone *sync.WaitGroup) {
   274  	workersDone.Wait()
   275  	close(c.workersDone)
   276  }
   277  
   278  // Ready returns a channel which is closed once all the worker routines have been launched and are ready.
   279  // If any worker routines exit before they indicate that they are ready, the channel returned from Ready will never close.
   280  func (c *ComponentManager) Ready() <-chan struct{} {
   281  	return c.ready
   282  }
   283  
   284  // Done returns a channel which is closed once the ComponentManager has shut down.
   285  // This happens after all worker routines have shut down (either gracefully or by throwing an error).
   286  func (c *ComponentManager) Done() <-chan struct{} {
   287  	return c.done
   288  }
   289  
   290  // ShutdownSignal returns a channel that is closed when shutdown has commenced.
   291  // This can happen either if the ComponentManager's context is canceled, or a worker routine encounters
   292  // an irrecoverable error.
   293  // If this is called before Start, a nil channel will be returned.
   294  func (c *ComponentManager) ShutdownSignal() <-chan struct{} {
   295  	return c.shutdownSignal
   296  }