github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/common/follower/compliance_engine.go (about)

     1  package follower
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/rs/zerolog"
     7  
     8  	"github.com/onflow/flow-go/consensus/hotstuff/model"
     9  	"github.com/onflow/flow-go/consensus/hotstuff/tracker"
    10  	"github.com/onflow/flow-go/engine"
    11  	"github.com/onflow/flow-go/engine/common/fifoqueue"
    12  	"github.com/onflow/flow-go/engine/consensus"
    13  	"github.com/onflow/flow-go/model/flow"
    14  	"github.com/onflow/flow-go/model/messages"
    15  	"github.com/onflow/flow-go/module"
    16  	"github.com/onflow/flow-go/module/compliance"
    17  	"github.com/onflow/flow-go/module/component"
    18  	"github.com/onflow/flow-go/module/irrecoverable"
    19  	"github.com/onflow/flow-go/module/metrics"
    20  	"github.com/onflow/flow-go/network"
    21  	"github.com/onflow/flow-go/network/channels"
    22  	"github.com/onflow/flow-go/storage"
    23  )
    24  
    25  type EngineOption func(*ComplianceEngine)
    26  
    27  // WithChannel sets the channel the follower engine will use to receive blocks.
    28  func WithChannel(channel channels.Channel) EngineOption {
    29  	return func(e *ComplianceEngine) {
    30  		e.channel = channel
    31  	}
    32  }
    33  
    34  // WithComplianceConfigOpt applies compliance config opt to internal config
    35  func WithComplianceConfigOpt(opt compliance.Opt) EngineOption {
    36  	return func(e *ComplianceEngine) {
    37  		opt(&e.config)
    38  	}
    39  }
    40  
    41  // defaultBatchProcessingWorkers number of concurrent workers that process incoming blocks.
    42  const defaultBatchProcessingWorkers = 4
    43  
    44  // defaultPendingBlockQueueCapacity maximum capacity of inbound queue for blocks directly received from other nodes.
    45  // Small capacity is suitable here, as there will be hardly any pending blocks during normal operations. If the node
    46  // is so overloaded that it can't keep up with the newest blocks within 10 seconds (processing them with priority),
    47  // it is probably better to fall back on synchronization anyway.
    48  const defaultPendingBlockQueueCapacity = 10
    49  
    50  // defaultSyncedBlockQueueCapacity maximum capacity of inbound queue for batches of synced blocks.
    51  // While catching up, we want to be able to buffer a bit larger amount of work.
    52  const defaultSyncedBlockQueueCapacity = 100
    53  
    54  // defaultPendingConnectedBlocksChanCapacity capacity of buffered channel that is used to receive pending blocks that form a sequence.
    55  const defaultPendingConnectedBlocksChanCapacity = 100
    56  
    57  // ComplianceEngine is the highest level structure that consumes events from other components.
    58  // It's an entry point to the follower engine which follows and maintains the local copy of the protocol state.
    59  // It is a passive (read-only) version of the compliance engine. The compliance engine
    60  // is employed by consensus nodes (active consensus participants) where the
    61  // Follower engine is employed by all other node roles.
    62  // ComplianceEngine is responsible for:
    63  //  1. Consuming events from external sources such as sync engine.
    64  //  2. Splitting incoming batches in batches of connected blocks.
    65  //  3. Providing worker goroutines for concurrent processing of batches of connected blocks.
    66  //  4. Handling of finalization events.
    67  //
    68  // See interface `complianceCore` (this package) for detailed documentation of the algorithm.
    69  // Implements consensus.Compliance interface.
    70  type ComplianceEngine struct {
    71  	*component.ComponentManager
    72  	log                        zerolog.Logger
    73  	me                         module.Local
    74  	engMetrics                 module.EngineMetrics
    75  	con                        network.Conduit
    76  	config                     compliance.Config
    77  	channel                    channels.Channel
    78  	headers                    storage.Headers
    79  	pendingProposals           *fifoqueue.FifoQueue        // queue for fresh proposals
    80  	syncedBlocks               *fifoqueue.FifoQueue        // queue for processing inbound batches of synced blocks
    81  	blocksAvailableNotifier    engine.Notifier             // notifies that new blocks are ready to be processed
    82  	finalizedBlockTracker      *tracker.NewestBlockTracker // tracks the latest finalization block
    83  	finalizedBlockNotifier     engine.Notifier             // notifies when the latest finalized block changes
    84  	pendingConnectedBlocksChan chan flow.Slashable[[]*flow.Block]
    85  	core                       complianceCore // performs actual processing of incoming messages.
    86  }
    87  
    88  var _ network.MessageProcessor = (*ComplianceEngine)(nil)
    89  var _ consensus.Compliance = (*ComplianceEngine)(nil)
    90  
    91  // NewComplianceLayer instantiates th compliance layer for the consensus follower. See
    92  // interface `complianceCore` (this package) for detailed documentation of the algorithm.
    93  func NewComplianceLayer(
    94  	log zerolog.Logger,
    95  	net network.EngineRegistry,
    96  	me module.Local,
    97  	engMetrics module.EngineMetrics,
    98  	headers storage.Headers,
    99  	finalized *flow.Header,
   100  	core complianceCore,
   101  	config compliance.Config,
   102  	opts ...EngineOption,
   103  ) (*ComplianceEngine, error) {
   104  	// FIFO queue for inbound block proposals
   105  	pendingBlocks, err := fifoqueue.NewFifoQueue(defaultPendingBlockQueueCapacity)
   106  	if err != nil {
   107  		return nil, fmt.Errorf("failed to create queue for inbound blocks: %w", err)
   108  	}
   109  	// FIFO queue for synced blocks
   110  	syncedBlocks, err := fifoqueue.NewFifoQueue(defaultSyncedBlockQueueCapacity)
   111  	if err != nil {
   112  		return nil, fmt.Errorf("failed to create queue for inbound blocks: %w", err)
   113  	}
   114  
   115  	e := &ComplianceEngine{
   116  		log:                        log.With().Str("engine", "follower").Logger(),
   117  		me:                         me,
   118  		engMetrics:                 engMetrics,
   119  		config:                     config,
   120  		channel:                    channels.ReceiveBlocks,
   121  		pendingProposals:           pendingBlocks,
   122  		syncedBlocks:               syncedBlocks,
   123  		blocksAvailableNotifier:    engine.NewNotifier(),
   124  		pendingConnectedBlocksChan: make(chan flow.Slashable[[]*flow.Block], defaultPendingConnectedBlocksChanCapacity),
   125  		finalizedBlockTracker:      tracker.NewNewestBlockTracker(),
   126  		finalizedBlockNotifier:     engine.NewNotifier(),
   127  		headers:                    headers,
   128  		core:                       core,
   129  	}
   130  	e.finalizedBlockTracker.Track(model.BlockFromFlow(finalized))
   131  
   132  	for _, apply := range opts {
   133  		apply(e)
   134  	}
   135  
   136  	con, err := net.Register(e.channel, e)
   137  	if err != nil {
   138  		return nil, fmt.Errorf("could not register engine to network: %w", err)
   139  	}
   140  	e.con = con
   141  
   142  	cmBuilder := component.NewComponentManagerBuilder().
   143  		AddWorker(e.finalizationProcessingLoop).
   144  		AddWorker(e.processBlocksLoop)
   145  
   146  	cmBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   147  		// start internal component
   148  		e.core.Start(ctx)
   149  		// wait for it to be ready
   150  		<-e.core.Ready()
   151  
   152  		// report that we are ready to process events
   153  		ready()
   154  
   155  		// wait for shutdown to be commenced
   156  		<-ctx.Done()
   157  		// wait for core to shut down
   158  		<-e.core.Done()
   159  	})
   160  
   161  	for i := 0; i < defaultBatchProcessingWorkers; i++ {
   162  		cmBuilder.AddWorker(e.processConnectedBatch)
   163  	}
   164  	e.ComponentManager = cmBuilder.Build()
   165  
   166  	return e, nil
   167  }
   168  
   169  // OnBlockProposal queues *untrusted* proposals for further processing and notifies the Engine's
   170  // internal workers. This method is intended for fresh proposals received directly from leaders.
   171  // It can ingest synced blocks as well, but is less performant compared to method `OnSyncedBlocks`.
   172  func (e *ComplianceEngine) OnBlockProposal(proposal flow.Slashable[*messages.BlockProposal]) {
   173  	e.engMetrics.MessageReceived(metrics.EngineFollower, metrics.MessageBlockProposal)
   174  	// queue proposal
   175  	if e.pendingProposals.Push(proposal) {
   176  		e.blocksAvailableNotifier.Notify()
   177  	}
   178  }
   179  
   180  // OnSyncedBlocks is an optimized consumer for *untrusted* synced blocks. It is specifically
   181  // efficient for batches of continuously connected blocks (honest nodes supply finalized blocks
   182  // in suitable sequences where possible). Nevertheless, the method tolerates blocks in arbitrary
   183  // order (less efficient), making it robust against byzantine nodes.
   184  func (e *ComplianceEngine) OnSyncedBlocks(blocks flow.Slashable[[]*messages.BlockProposal]) {
   185  	e.engMetrics.MessageReceived(metrics.EngineFollower, metrics.MessageSyncedBlocks)
   186  	// The synchronization engine feeds the follower with batches of blocks. The field `Slashable.OriginID`
   187  	// states which node forwarded the batch to us. Each block contains its proposer and signature.
   188  
   189  	if e.syncedBlocks.Push(blocks) {
   190  		e.blocksAvailableNotifier.Notify()
   191  	}
   192  }
   193  
   194  // OnFinalizedBlock informs the compliance layer about finalization of a new block. It does not block
   195  // and asynchronously executes the internal pruning logic. We accept inputs out of order, and only act
   196  // on inputs with strictly monotonously increasing views.
   197  //
   198  // Implements the `OnFinalizedBlock` callback from the `hotstuff.FinalizationConsumer`
   199  // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages
   200  // from external nodes cannot be considered as inputs to this function.
   201  func (e *ComplianceEngine) OnFinalizedBlock(block *model.Block) {
   202  	if e.finalizedBlockTracker.Track(block) {
   203  		e.finalizedBlockNotifier.Notify()
   204  	}
   205  }
   206  
   207  // Process processes the given event from the node with the given origin ID in
   208  // a blocking manner. It returns the potential processing error when done.
   209  // This method is intended to be used as a callback by the networking layer,
   210  // notifying us about fresh proposals directly from the consensus leaders.
   211  func (e *ComplianceEngine) Process(channel channels.Channel, originID flow.Identifier, message interface{}) error {
   212  	switch msg := message.(type) {
   213  	case *messages.BlockProposal:
   214  		e.OnBlockProposal(flow.Slashable[*messages.BlockProposal]{
   215  			OriginID: originID,
   216  			Message:  msg,
   217  		})
   218  	default:
   219  		e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, message, channel)
   220  	}
   221  	return nil
   222  }
   223  
   224  // processBlocksLoop processes available blocks as they are queued.
   225  // Implements `component.ComponentWorker` signature.
   226  func (e *ComplianceEngine) processBlocksLoop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   227  	ready()
   228  
   229  	doneSignal := ctx.Done()
   230  	newPendingBlockSignal := e.blocksAvailableNotifier.Channel()
   231  	for {
   232  		select {
   233  		case <-doneSignal:
   234  			return
   235  		case <-newPendingBlockSignal:
   236  			err := e.processQueuedBlocks(doneSignal) // no errors expected during normal operations
   237  			if err != nil {
   238  				ctx.Throw(err)
   239  			}
   240  		}
   241  	}
   242  }
   243  
   244  // processQueuedBlocks processes any available messages until the message queue is empty.
   245  // Only returns when all inbound queues are empty (or the engine is terminated).
   246  // Prioritization: In a nutshell, we prioritize the resilience of the happy path over
   247  // performance gains on the recovery path. Details:
   248  //   - We prioritize new proposals. Thereby, it becomes much harder for a malicious node
   249  //     to overwhelm another node through synchronization messages and drown out new blocks
   250  //     for a node that is up-to-date.
   251  //   - On the flip side, new proposals are relatively infrequent compared to the load that
   252  //     synchronization produces for a node that is catching up. In other words, prioritizing
   253  //     the few new proposals first is probably not going to be much of a distraction.
   254  //     Proposals too far in the future are dropped (see parameter `SkipNewProposalsThreshold`
   255  //     in `compliance.Config`), to prevent memory overflow.
   256  //
   257  // No errors are expected during normal operation. All returned exceptions are potential
   258  // symptoms of internal state corruption and should be fatal.
   259  func (e *ComplianceEngine) processQueuedBlocks(doneSignal <-chan struct{}) error {
   260  	for {
   261  		select {
   262  		case <-doneSignal:
   263  			return nil
   264  		default:
   265  		}
   266  
   267  		// Priority 1: ingest fresh proposals
   268  		msg, ok := e.pendingProposals.Pop()
   269  		if ok {
   270  			blockMsg := msg.(flow.Slashable[*messages.BlockProposal])
   271  			block := blockMsg.Message.Block.ToInternal()
   272  			log := e.log.With().
   273  				Hex("origin_id", blockMsg.OriginID[:]).
   274  				Str("chain_id", block.Header.ChainID.String()).
   275  				Uint64("view", block.Header.View).
   276  				Uint64("height", block.Header.Height).
   277  				Logger()
   278  			latestFinalizedView := e.finalizedBlockTracker.NewestBlock().View
   279  			e.submitConnectedBatch(log, latestFinalizedView, blockMsg.OriginID, []*flow.Block{block})
   280  			e.engMetrics.MessageHandled(metrics.EngineFollower, metrics.MessageBlockProposal)
   281  			continue
   282  		}
   283  
   284  		// Priority 2: ingest synced blocks
   285  		msg, ok = e.syncedBlocks.Pop()
   286  		if !ok {
   287  			// when there are no more messages in the queue, back to the processQueuedBlocks to wait
   288  			// for the next incoming message to arrive.
   289  			return nil
   290  		}
   291  
   292  		batch := msg.(flow.Slashable[[]*messages.BlockProposal])
   293  		if len(batch.Message) < 1 {
   294  			continue
   295  		}
   296  		blocks := make([]*flow.Block, 0, len(batch.Message))
   297  		for _, block := range batch.Message {
   298  			blocks = append(blocks, block.Block.ToInternal())
   299  		}
   300  
   301  		firstBlock := blocks[0].Header
   302  		lastBlock := blocks[len(blocks)-1].Header
   303  		log := e.log.With().
   304  			Hex("origin_id", batch.OriginID[:]).
   305  			Str("chain_id", lastBlock.ChainID.String()).
   306  			Uint64("first_block_height", firstBlock.Height).
   307  			Uint64("first_block_view", firstBlock.View).
   308  			Uint64("last_block_height", lastBlock.Height).
   309  			Uint64("last_block_view", lastBlock.View).
   310  			Int("range_length", len(blocks)).
   311  			Logger()
   312  
   313  		// extract sequences of connected blocks and schedule them for further processing
   314  		// we assume the sender has already ordered blocks into connected ranges if possible
   315  		latestFinalizedView := e.finalizedBlockTracker.NewestBlock().View
   316  		parentID := blocks[0].ID()
   317  		indexOfLastConnected := 0
   318  		for i, block := range blocks {
   319  			if block.Header.ParentID != parentID {
   320  				e.submitConnectedBatch(log, latestFinalizedView, batch.OriginID, blocks[indexOfLastConnected:i])
   321  				indexOfLastConnected = i
   322  			}
   323  			parentID = block.Header.ID()
   324  		}
   325  		e.submitConnectedBatch(log, latestFinalizedView, batch.OriginID, blocks[indexOfLastConnected:])
   326  		e.engMetrics.MessageHandled(metrics.EngineFollower, metrics.MessageSyncedBlocks)
   327  	}
   328  }
   329  
   330  // submitConnectedBatch checks if batch is still pending and submits it via channel for further processing by worker goroutines.
   331  func (e *ComplianceEngine) submitConnectedBatch(log zerolog.Logger, latestFinalizedView uint64, originID flow.Identifier, blocks []*flow.Block) {
   332  	if len(blocks) < 1 {
   333  		return
   334  	}
   335  	// if latest block of batch is already finalized we can drop such input.
   336  	lastBlock := blocks[len(blocks)-1].Header
   337  	if lastBlock.View < latestFinalizedView {
   338  		log.Debug().Msgf("dropping range [%d, %d] below finalized view %d", blocks[0].Header.View, lastBlock.View, latestFinalizedView)
   339  		return
   340  	}
   341  	skipNewProposalsThreshold := e.config.GetSkipNewProposalsThreshold()
   342  	if lastBlock.View > latestFinalizedView+skipNewProposalsThreshold {
   343  		log.Debug().
   344  			Uint64("skip_new_proposals_threshold", skipNewProposalsThreshold).
   345  			Msgf("dropping range [%d, %d] too far ahead of locally finalized view %d",
   346  				blocks[0].Header.View, lastBlock.View, latestFinalizedView)
   347  		return
   348  	}
   349  	log.Debug().Msgf("submitting sub-range with views [%d, %d] for further processing", blocks[0].Header.View, lastBlock.View)
   350  
   351  	select {
   352  	case e.pendingConnectedBlocksChan <- flow.Slashable[[]*flow.Block]{
   353  		OriginID: originID,
   354  		Message:  blocks,
   355  	}:
   356  	case <-e.ComponentManager.ShutdownSignal():
   357  	}
   358  }
   359  
   360  // processConnectedBatch is a worker goroutine which concurrently consumes connected batches that will be processed by ComplianceCore.
   361  func (e *ComplianceEngine) processConnectedBatch(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   362  	ready()
   363  	for {
   364  		select {
   365  		case <-ctx.Done():
   366  			return
   367  		case msg := <-e.pendingConnectedBlocksChan:
   368  			err := e.core.OnBlockRange(msg.OriginID, msg.Message)
   369  			if err != nil {
   370  				ctx.Throw(err)
   371  			}
   372  		}
   373  	}
   374  }
   375  
   376  // finalizationProcessingLoop is a separate goroutine that performs processing of finalization events.
   377  // Implements `component.ComponentWorker` signature.
   378  func (e *ComplianceEngine) finalizationProcessingLoop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   379  	ready()
   380  
   381  	doneSignal := ctx.Done()
   382  	blockFinalizedSignal := e.finalizedBlockNotifier.Channel()
   383  	for {
   384  		select {
   385  		case <-doneSignal:
   386  			return
   387  		case <-blockFinalizedSignal:
   388  			// retrieve the latest finalized header, so we know the height
   389  			finalHeader, err := e.headers.ByBlockID(e.finalizedBlockTracker.NewestBlock().BlockID)
   390  			if err != nil { // no expected errors
   391  				ctx.Throw(err)
   392  			}
   393  			e.core.OnFinalizedBlock(finalHeader)
   394  		}
   395  	}
   396  }