github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/common/follower/compliance_core.go (about)

     1  package follower
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  
     8  	"github.com/rs/zerolog"
     9  
    10  	"github.com/onflow/flow-go/consensus/hotstuff"
    11  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    12  	"github.com/onflow/flow-go/engine/common/follower/cache"
    13  	"github.com/onflow/flow-go/engine/common/follower/pending_tree"
    14  	"github.com/onflow/flow-go/model/flow"
    15  	"github.com/onflow/flow-go/module"
    16  	"github.com/onflow/flow-go/module/component"
    17  	"github.com/onflow/flow-go/module/irrecoverable"
    18  	"github.com/onflow/flow-go/module/trace"
    19  	"github.com/onflow/flow-go/state/protocol"
    20  )
    21  
    22  // CertifiedBlocks is a connected list of certified blocks, in ascending height order.
    23  type CertifiedBlocks []flow.CertifiedBlock
    24  
    25  // defaultCertifiedRangeChannelCapacity maximum capacity of buffered channel that is used to transfer ranges of
    26  // certified blocks to specific worker.
    27  // Channel buffers ranges which consist of multiple blocks, so the real capacity of channel is larger
    28  const defaultCertifiedRangeChannelCapacity = 20
    29  
    30  // defaultFinalizedBlocksChannelCapacity maximum capacity of buffered channel that is used to transfer
    31  // finalized blocks to specific worker.
    32  const defaultFinalizedBlocksChannelCapacity = 10
    33  
    34  // defaultPendingBlocksCacheCapacity maximum capacity of cache for pending blocks.
    35  const defaultPendingBlocksCacheCapacity = 1000
    36  
    37  // ComplianceCore implements main processing logic for follower engine.
    38  // Generally is NOT concurrency safe but some functions can be used in concurrent setup.
    39  type ComplianceCore struct {
    40  	*component.ComponentManager
    41  	log                       zerolog.Logger
    42  	mempoolMetrics            module.MempoolMetrics
    43  	tracer                    module.Tracer
    44  	proposalViolationNotifier hotstuff.ProposalViolationConsumer
    45  	pendingCache              *cache.Cache
    46  	pendingTree               *pending_tree.PendingTree
    47  	state                     protocol.FollowerState
    48  	follower                  module.HotStuffFollower
    49  	validator                 hotstuff.Validator
    50  	sync                      module.BlockRequester
    51  	certifiedRangesChan       chan CertifiedBlocks // delivers ranges of certified blocks to main core worker
    52  	finalizedBlocksChan       chan *flow.Header    // delivers finalized blocks to main core worker.
    53  }
    54  
    55  var _ complianceCore = (*ComplianceCore)(nil)
    56  
    57  // NewComplianceCore creates new instance of ComplianceCore.
    58  // No errors expected during normal operations.
    59  func NewComplianceCore(log zerolog.Logger,
    60  	mempoolMetrics module.MempoolMetrics,
    61  	heroCacheCollector module.HeroCacheMetrics,
    62  	followerConsumer hotstuff.FollowerConsumer,
    63  	state protocol.FollowerState,
    64  	follower module.HotStuffFollower,
    65  	validator hotstuff.Validator,
    66  	sync module.BlockRequester,
    67  	tracer module.Tracer,
    68  ) (*ComplianceCore, error) {
    69  	finalizedBlock, err := state.Final().Head()
    70  	if err != nil {
    71  		return nil, fmt.Errorf("could not query finalized block: %w", err)
    72  	}
    73  
    74  	c := &ComplianceCore{
    75  		log:                       log.With().Str("engine", "follower_core").Logger(),
    76  		mempoolMetrics:            mempoolMetrics,
    77  		state:                     state,
    78  		proposalViolationNotifier: followerConsumer,
    79  		pendingCache:              cache.NewCache(log, defaultPendingBlocksCacheCapacity, heroCacheCollector, followerConsumer),
    80  		pendingTree:               pending_tree.NewPendingTree(finalizedBlock),
    81  		follower:                  follower,
    82  		validator:                 validator,
    83  		sync:                      sync,
    84  		tracer:                    tracer,
    85  		certifiedRangesChan:       make(chan CertifiedBlocks, defaultCertifiedRangeChannelCapacity),
    86  		finalizedBlocksChan:       make(chan *flow.Header, defaultFinalizedBlocksChannelCapacity),
    87  	}
    88  
    89  	// prune cache to latest finalized view
    90  	c.pendingCache.PruneUpToView(finalizedBlock.View)
    91  
    92  	c.ComponentManager = component.NewComponentManagerBuilder().
    93  		AddWorker(c.processCoreSeqEvents).
    94  		Build()
    95  
    96  	return c, nil
    97  }
    98  
    99  // OnBlockRange processes a range of connected blocks. It validates the incoming batch, adds it to cache of pending
   100  // blocks and schedules certified blocks for further processing. The input list must be sequentially ordered forming
   101  // a chain, i.e. connectedRange[i] is the parent of connectedRange[i+1]. Submitting a disconnected batch results in
   102  // an `ErrDisconnectedBatch` error and the batch is dropped (no-op).
   103  // This method is safe to use in concurrent environment.
   104  // Caution: method might block if internally too many certified blocks are queued in the channel `certifiedRangesChan`.
   105  // Expected errors during normal operations:
   106  //   - cache.ErrDisconnectedBatch
   107  func (c *ComplianceCore) OnBlockRange(originID flow.Identifier, batch []*flow.Block) error {
   108  	if len(batch) < 1 {
   109  		return nil
   110  	}
   111  
   112  	firstBlock := batch[0].Header
   113  	lastBlock := batch[len(batch)-1].Header
   114  	hotstuffProposal := model.ProposalFromFlow(lastBlock)
   115  	log := c.log.With().
   116  		Hex("origin_id", originID[:]).
   117  		Str("chain_id", lastBlock.ChainID.String()).
   118  		Uint64("first_block_height", firstBlock.Height).
   119  		Uint64("first_block_view", firstBlock.View).
   120  		Uint64("last_block_height", lastBlock.Height).
   121  		Uint64("last_block_view", lastBlock.View).
   122  		Hex("last_block_id", hotstuffProposal.Block.BlockID[:]).
   123  		Int("range_length", len(batch)).
   124  		Logger()
   125  
   126  	log.Info().Msg("processing block range")
   127  
   128  	if c.pendingCache.Peek(hotstuffProposal.Block.BlockID) == nil {
   129  		log.Debug().Msg("block not found in cache, performing validation")
   130  		// Caution: we are _not_ checking the proposal's full validity here. Instead, we need to check
   131  		// the following two critical properties:
   132  		// 1. The block has been signed by the legitimate primary for the view. This is important in case
   133  		//    there are multiple blocks for the view. We need to differentiate the following byzantine cases:
   134  		//     (i) Some other consensus node that is _not_ primary is trying to publish a block.
   135  		//         This would result in the validation below failing with an `InvalidProposalError`.
   136  		//    (ii) The legitimate primary for the view is equivocating. In this case, the validity check
   137  		//         below would pass. Though, the `PendingTree` would eventually notice this, when we connect
   138  		//         the equivocating blocks to the latest finalized block.
   139  		// 2. The QC within the block is valid. A valid QC proves validity of all ancestors.
   140  		err := c.validator.ValidateProposal(hotstuffProposal)
   141  		if err != nil {
   142  			if invalidBlockError, ok := model.AsInvalidProposalError(err); ok {
   143  				c.proposalViolationNotifier.OnInvalidBlockDetected(flow.Slashable[model.InvalidProposalError]{
   144  					OriginID: originID,
   145  					Message:  *invalidBlockError,
   146  				})
   147  				return nil
   148  			}
   149  			if errors.Is(err, model.ErrViewForUnknownEpoch) {
   150  				// We have received a proposal, but we don't know the epoch its view is within.
   151  				// Conceptually, there are three scenarios that could lead to this edge-case:
   152  				//  1. the proposer maliciously created the block for a view very far in the future (it's invalid)
   153  				//     -> in this case we can disregard the block
   154  				//  2. This node is very far behind and hasn't processed enough blocks to observe the EpochCommit
   155  				//     service event.
   156  				//     -> in this case we can disregard the block
   157  				//     Note: we could eliminate this edge case by dropping future blocks, iff their _view_
   158  				//           is strictly larger than `V + EpochCommitSafetyThreshold`, where `V` denotes
   159  				//           the latest finalized block known to this node.
   160  				//  3. No blocks have been finalized for the last `EpochCommitSafetyThreshold` views. This breaks
   161  				//     a critical liveness assumption - see EpochCommitSafetyThreshold in protocol.Params for details.
   162  				//     -> In this case, it is ok for the protocol to halt. Consequently, we can just disregard
   163  				//        the block, which will probably lead to this node eventually halting.
   164  				log.Err(err).Msg("unable to validate proposal with view from unknown epoch")
   165  				return nil
   166  			}
   167  			return fmt.Errorf("unexpected error validating proposal: %w", err)
   168  		}
   169  	}
   170  
   171  	certifiedBatch, certifyingQC, err := c.pendingCache.AddBlocks(batch)
   172  	if err != nil {
   173  		return fmt.Errorf("could not add a range of pending blocks: %w", err) // ErrDisconnectedBatch or exception
   174  	}
   175  	log.Debug().Msgf("caching block range resulted in %d certified blocks (possibly including additional cached blocks)", len(certifiedBatch))
   176  
   177  	if len(certifiedBatch) < 1 {
   178  		return nil
   179  	}
   180  	certifiedRange, err := rangeToCertifiedBlocks(certifiedBatch, certifyingQC)
   181  	if err != nil {
   182  		return fmt.Errorf("converting the certified batch to list of certified blocks failed: %w", err)
   183  	}
   184  
   185  	// in case we have already stopped our worker, we use a select statement to avoid
   186  	// blocking since there is no active consumer for this channel
   187  	select {
   188  	case c.certifiedRangesChan <- certifiedRange:
   189  	case <-c.ComponentManager.ShutdownSignal():
   190  	}
   191  	return nil
   192  }
   193  
   194  // processCoreSeqEvents processes events that need to be dispatched on dedicated core's goroutine.
   195  // Here we process events that need to be sequentially ordered(processing certified blocks and new finalized blocks).
   196  // Implements `component.ComponentWorker` signature.
   197  // Is NOT concurrency safe: should be executed by _single dedicated_ goroutine.
   198  func (c *ComplianceCore) processCoreSeqEvents(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   199  	ready()
   200  
   201  	doneSignal := ctx.Done()
   202  	for {
   203  		select {
   204  		case <-doneSignal:
   205  			return
   206  		case finalized := <-c.finalizedBlocksChan:
   207  			err := c.processFinalizedBlock(ctx, finalized) // no errors expected during normal operations
   208  			if err != nil {
   209  				ctx.Throw(err)
   210  			}
   211  		case blocks := <-c.certifiedRangesChan:
   212  			err := c.processCertifiedBlocks(ctx, blocks) // no errors expected during normal operations
   213  			if err != nil {
   214  				ctx.Throw(err)
   215  			}
   216  		}
   217  	}
   218  }
   219  
   220  // OnFinalizedBlock updates local state of pendingCache tree using received finalized block and queues finalized block
   221  // to be processed by internal goroutine.
   222  // This function is safe to use in concurrent environment.
   223  // CAUTION: this function blocks and hence is not compliant with the `FinalizationConsumer.OnFinalizedBlock` interface.
   224  func (c *ComplianceCore) OnFinalizedBlock(final *flow.Header) {
   225  	c.pendingCache.PruneUpToView(final.View)
   226  
   227  	// in-case we have already stopped our worker we use a select statement to avoid
   228  	// blocking since there is no active consumer for this channel
   229  	select {
   230  	case c.finalizedBlocksChan <- final:
   231  	case <-c.ComponentManager.ShutdownSignal():
   232  	}
   233  }
   234  
   235  // processCertifiedBlocks processes the batch of certified blocks:
   236  //  1. We add the certified blocks to the PendingTree. This might causes the pending PendingTree to detect
   237  //     additional blocks as now being connected to the latest finalized block. Specifically, the PendingTree
   238  //     returns the list `connectedBlocks`, which contains the subset of `blocks` that are connect to the
   239  //     finalized block plus all of their connected descendants. The list `connectedBlocks` is in 'parent first'
   240  //     order, i.e. a block is listed before any of its descendants. The PendingTree guarantees that all
   241  //     ancestors are listed, _unless_ the ancestor is the finalized block or the ancestor has been returned
   242  //     by a previous call to `PendingTree.AddBlocks`.
   243  //  2. We extend the protocol state with the connected certified blocks from step 1.
   244  //  3. We submit the connected certified blocks from step 1 to the consensus follower.
   245  //
   246  // Is NOT concurrency safe: should be executed by _single dedicated_ goroutine.
   247  // No errors expected during normal operations.
   248  func (c *ComplianceCore) processCertifiedBlocks(ctx context.Context, blocks CertifiedBlocks) error {
   249  	span, ctx := c.tracer.StartSpanFromContext(ctx, trace.FollowerProcessCertifiedBlocks)
   250  	defer span.End()
   251  
   252  	// Step 1: add blocks to our PendingTree of certified blocks
   253  	pendingTreeSpan, _ := c.tracer.StartSpanFromContext(ctx, trace.FollowerExtendPendingTree)
   254  	connectedBlocks, err := c.pendingTree.AddBlocks(blocks)
   255  	pendingTreeSpan.End()
   256  	if err != nil {
   257  		return fmt.Errorf("could not process batch of certified blocks: %w", err)
   258  	}
   259  
   260  	// Step 2 & 3: extend protocol state with connected certified blocks and forward them to consensus follower
   261  	for _, certifiedBlock := range connectedBlocks {
   262  		s, _ := c.tracer.StartBlockSpan(ctx, certifiedBlock.ID(), trace.FollowerExtendProtocolState)
   263  		err = c.state.ExtendCertified(ctx, certifiedBlock.Block, certifiedBlock.CertifyingQC)
   264  		s.End()
   265  		if err != nil {
   266  			return fmt.Errorf("could not extend protocol state with certified block: %w", err)
   267  		}
   268  
   269  		b, err := model.NewCertifiedBlock(model.BlockFromFlow(certifiedBlock.Block.Header), certifiedBlock.CertifyingQC)
   270  		if err != nil {
   271  			return fmt.Errorf("failed to convert certified block %v to HotStuff type: %w", certifiedBlock.Block.ID(), err)
   272  		}
   273  		c.follower.AddCertifiedBlock(&b) // submit the model to follower for async processing
   274  	}
   275  	return nil
   276  }
   277  
   278  // processFinalizedBlock informs the PendingTree about finalization of the given block.
   279  // Is NOT concurrency safe: should be executed by _single dedicated_ goroutine.
   280  // No errors expected during normal operations.
   281  func (c *ComplianceCore) processFinalizedBlock(ctx context.Context, finalized *flow.Header) error {
   282  	span, _ := c.tracer.StartSpanFromContext(ctx, trace.FollowerProcessFinalizedBlock)
   283  	defer span.End()
   284  
   285  	connectedBlocks, err := c.pendingTree.FinalizeFork(finalized)
   286  	if err != nil {
   287  		return fmt.Errorf("could not process finalized fork at view %d: %w", finalized.View, err)
   288  	}
   289  	// The pending tree allows to skip ahead, which makes the algorithm more general and simplifies its implementation.
   290  	// However, here we are implementing the consensus follower, which cannot skip ahead. This is because the consensus
   291  	// follower locally determines finality and therefore must ingest every block. In other words: ever block that is
   292  	// later finalized must have been connected before. Otherwise, the block would never have been forwarded to the
   293  	// HotStuff follower and no finalization notification would have been triggered.
   294  	// Therefore, from the perspective of the consensus follower, receiving a _non-empty_ `connectedBlocks` is a
   295  	// symptom of internal state corruption or a bug.
   296  	if len(connectedBlocks) > 0 {
   297  		return fmt.Errorf("finalizing block %v caused the PendingTree to connect additional blocks, which is a symptom of internal state corruption or a bug", finalized.ID())
   298  	}
   299  	return nil
   300  }
   301  
   302  // rangeToCertifiedBlocks transform batch of connected blocks and a QC that certifies last block to a range of
   303  // certified and connected blocks.
   304  // Pure function (side-effect free). No errors expected during normal operations.
   305  func rangeToCertifiedBlocks(certifiedRange []*flow.Block, certifyingQC *flow.QuorumCertificate) (CertifiedBlocks, error) {
   306  	certifiedBlocks := make(CertifiedBlocks, 0, len(certifiedRange))
   307  	lastIndex := len(certifiedRange) - 1
   308  	for i, block := range certifiedRange {
   309  		var qc *flow.QuorumCertificate
   310  		if i < lastIndex {
   311  			qc = certifiedRange[i+1].Header.QuorumCertificate()
   312  		} else {
   313  			qc = certifyingQC
   314  		}
   315  
   316  		// bundle block and its certifying QC to `CertifiedBlock`:
   317  		certBlock, err := flow.NewCertifiedBlock(block, qc)
   318  		if err != nil {
   319  			return nil, fmt.Errorf("constructing certified root block failed: %w", err)
   320  		}
   321  		certifiedBlocks = append(certifiedBlocks, certBlock)
   322  	}
   323  	return certifiedBlocks, nil
   324  }