github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/consensus/hotstuff/follower_loop.go (about)

     1  package hotstuff
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/rs/zerolog"
     8  
     9  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    10  	"github.com/onflow/flow-go/module"
    11  	"github.com/onflow/flow-go/module/component"
    12  	"github.com/onflow/flow-go/module/irrecoverable"
    13  	"github.com/onflow/flow-go/module/metrics"
    14  	"github.com/onflow/flow-go/utils/logging"
    15  )
    16  
    17  // FollowerLoop implements interface module.HotStuffFollower.
    18  // FollowerLoop buffers all incoming events to the hotstuff FollowerLogic, and feeds FollowerLogic one event at a time
    19  // using a worker thread.
    20  // Concurrency safe.
    21  type FollowerLoop struct {
    22  	*component.ComponentManager
    23  	log             zerolog.Logger
    24  	mempoolMetrics  module.MempoolMetrics
    25  	certifiedBlocks chan *model.CertifiedBlock
    26  	forks           Forks
    27  }
    28  
    29  var _ component.Component = (*FollowerLoop)(nil)
    30  var _ module.HotStuffFollower = (*FollowerLoop)(nil)
    31  
    32  // NewFollowerLoop creates an instance of HotStuffFollower
    33  func NewFollowerLoop(log zerolog.Logger, mempoolMetrics module.MempoolMetrics, forks Forks) (*FollowerLoop, error) {
    34  	// We can't afford to drop messages since it undermines liveness, but we also want to avoid blocking
    35  	// the compliance layer. Generally, the follower loop should be able to process inbound blocks faster
    36  	// than they pass through the compliance layer. Nevertheless, in the worst case we will fill the
    37  	// channel and block the compliance layer's workers. Though, that should happen only if compliance
    38  	// engine receives large number of blocks in short periods of time (e.g. when catching up).
    39  	certifiedBlocks := make(chan *model.CertifiedBlock, 1000)
    40  
    41  	fl := &FollowerLoop{
    42  		log:             log.With().Str("hotstuff", "FollowerLoop").Logger(),
    43  		mempoolMetrics:  mempoolMetrics,
    44  		certifiedBlocks: certifiedBlocks,
    45  		forks:           forks,
    46  	}
    47  
    48  	fl.ComponentManager = component.NewComponentManagerBuilder().
    49  		AddWorker(fl.loop).
    50  		Build()
    51  
    52  	return fl, nil
    53  }
    54  
    55  // AddCertifiedBlock appends the given certified block to the tree of pending
    56  // blocks and updates the latest finalized block (if finalization progressed).
    57  // Unless the parent is below the pruning threshold (latest finalized view), we
    58  // require that the parent has previously been added.
    59  //
    60  // Notes:
    61  //   - Under normal operations, this method is non-blocking. The follower internally
    62  //     queues incoming blocks and processes them in its own worker routine. However,
    63  //     when the inbound queue is, we block until there is space in the queue. This
    64  //     behavior is intentional, because we cannot drop blocks (otherwise, we would
    65  //     cause disconnected blocks). Instead, we simply block the compliance layer to
    66  //     avoid any pathological edge cases.
    67  //   - Blocks whose views are below the latest finalized view are dropped.
    68  //   - Inputs are idempotent (repetitions are no-ops).
    69  func (fl *FollowerLoop) AddCertifiedBlock(certifiedBlock *model.CertifiedBlock) {
    70  	received := time.Now()
    71  
    72  	select {
    73  	case fl.certifiedBlocks <- certifiedBlock:
    74  	case <-fl.ComponentManager.ShutdownSignal():
    75  		return
    76  	}
    77  
    78  	// the busy duration is measured as how long it takes from a block being
    79  	// received to a block being handled by the event handler.
    80  	busyDuration := time.Since(received)
    81  
    82  	blocksQueued := uint(len(fl.certifiedBlocks))
    83  	fl.mempoolMetrics.MempoolEntries(metrics.ResourceFollowerLoopCertifiedBlocksChannel, blocksQueued)
    84  	fl.log.Debug().Hex("block_id", logging.ID(certifiedBlock.ID())).
    85  		Uint64("view", certifiedBlock.View()).
    86  		Uint("blocks_queued", blocksQueued).
    87  		Dur("wait_time", busyDuration).
    88  		Msg("wait time to queue inbound certified block")
    89  }
    90  
    91  // loop will synchronously process all events.
    92  // All errors from FollowerLogic are fatal:
    93  //   - known critical error: some prerequisites of the HotStuff follower have been broken
    94  //   - unknown critical error: bug-related
    95  func (fl *FollowerLoop) loop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
    96  	ready()
    97  	shutdownSignal := fl.ComponentManager.ShutdownSignal()
    98  	for {
    99  		select { // to ensure we are not skipping over a termination signal
   100  		case <-shutdownSignal:
   101  			return
   102  		default:
   103  		}
   104  
   105  		select {
   106  		case b := <-fl.certifiedBlocks:
   107  			err := fl.forks.AddCertifiedBlock(b)
   108  			if err != nil { // all errors are fatal
   109  				err = fmt.Errorf("finalization logic failes to process certified block %v: %w", b.ID(), err)
   110  				fl.log.Error().
   111  					Hex("block_id", logging.ID(b.ID())).
   112  					Uint64("view", b.View()).
   113  					Err(err).
   114  					Msg("irrecoverable follower loop error")
   115  				ctx.Throw(err)
   116  			}
   117  		case <-shutdownSignal:
   118  			return
   119  		}
   120  	}
   121  }