github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/collection/compliance/core.go (about)

     1  package compliance
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/rs/zerolog"
     9  
    10  	"github.com/onflow/flow-go/consensus/hotstuff"
    11  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    12  	"github.com/onflow/flow-go/engine"
    13  	"github.com/onflow/flow-go/model/cluster"
    14  	"github.com/onflow/flow-go/model/flow"
    15  	"github.com/onflow/flow-go/model/messages"
    16  	"github.com/onflow/flow-go/module"
    17  	"github.com/onflow/flow-go/module/compliance"
    18  	"github.com/onflow/flow-go/module/counters"
    19  	"github.com/onflow/flow-go/module/irrecoverable"
    20  	"github.com/onflow/flow-go/module/mempool"
    21  	"github.com/onflow/flow-go/module/metrics"
    22  	"github.com/onflow/flow-go/state"
    23  	clusterkv "github.com/onflow/flow-go/state/cluster"
    24  	"github.com/onflow/flow-go/storage"
    25  	"github.com/onflow/flow-go/utils/logging"
    26  )
    27  
    28  // Core contains the central business logic for the collector clusters' compliance engine.
    29  // It is responsible for handling communication for the embedded consensus algorithm.
    30  // CAUTION with CONCURRENCY:
    31  //   - At the moment, compliance.Core _can not_ process blocks concurrently. Callers of `OnBlockProposal`
    32  //     need to ensure single-threaded access.
    33  //   - The only exception is calls to `ProcessFinalizedView`, which is the only concurrency-safe
    34  //     method of compliance.Core
    35  type Core struct {
    36  	log                       zerolog.Logger // used to log relevant actions with context
    37  	config                    compliance.Config
    38  	engineMetrics             module.EngineMetrics
    39  	mempoolMetrics            module.MempoolMetrics
    40  	hotstuffMetrics           module.HotstuffMetrics
    41  	collectionMetrics         module.CollectionMetrics
    42  	proposalViolationNotifier hotstuff.ProposalViolationConsumer
    43  	headers                   storage.Headers
    44  	state                     clusterkv.MutableState
    45  	// track latest finalized view/height - used to efficiently drop outdated or too-far-ahead blocks
    46  	finalizedView     counters.StrictMonotonousCounter
    47  	finalizedHeight   counters.StrictMonotonousCounter
    48  	pending           module.PendingClusterBlockBuffer // pending block cache
    49  	sync              module.BlockRequester
    50  	hotstuff          module.HotStuff
    51  	validator         hotstuff.Validator
    52  	voteAggregator    hotstuff.VoteAggregator
    53  	timeoutAggregator hotstuff.TimeoutAggregator
    54  }
    55  
    56  // NewCore instantiates the business logic for the collector clusters' compliance engine.
    57  func NewCore(
    58  	log zerolog.Logger,
    59  	collector module.EngineMetrics,
    60  	mempool module.MempoolMetrics,
    61  	hotstuffMetrics module.HotstuffMetrics,
    62  	collectionMetrics module.CollectionMetrics,
    63  	proposalViolationNotifier hotstuff.ProposalViolationConsumer,
    64  	headers storage.Headers,
    65  	state clusterkv.MutableState,
    66  	pending module.PendingClusterBlockBuffer,
    67  	sync module.BlockRequester,
    68  	validator hotstuff.Validator,
    69  	hotstuff module.HotStuff,
    70  	voteAggregator hotstuff.VoteAggregator,
    71  	timeoutAggregator hotstuff.TimeoutAggregator,
    72  	config compliance.Config,
    73  ) (*Core, error) {
    74  	c := &Core{
    75  		log:                       log.With().Str("cluster_compliance", "core").Logger(),
    76  		config:                    config,
    77  		engineMetrics:             collector,
    78  		mempoolMetrics:            mempool,
    79  		hotstuffMetrics:           hotstuffMetrics,
    80  		collectionMetrics:         collectionMetrics,
    81  		proposalViolationNotifier: proposalViolationNotifier,
    82  		headers:                   headers,
    83  		state:                     state,
    84  		pending:                   pending,
    85  		sync:                      sync,
    86  		hotstuff:                  hotstuff,
    87  		validator:                 validator,
    88  		voteAggregator:            voteAggregator,
    89  		timeoutAggregator:         timeoutAggregator,
    90  	}
    91  
    92  	// initialize finalized boundary cache
    93  	final, err := c.state.Final().Head()
    94  	if err != nil {
    95  		return nil, fmt.Errorf("could not initialized finalized boundary cache: %w", err)
    96  	}
    97  	c.ProcessFinalizedBlock(final)
    98  
    99  	// log the mempool size off the bat
   100  	c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   101  
   102  	return c, nil
   103  }
   104  
   105  // OnBlockProposal handles incoming block proposals.
   106  // No errors are expected during normal operation.
   107  func (c *Core) OnBlockProposal(proposal flow.Slashable[*messages.ClusterBlockProposal]) error {
   108  	startTime := time.Now()
   109  	defer func() {
   110  		c.hotstuffMetrics.BlockProcessingDuration(time.Since(startTime))
   111  	}()
   112  
   113  	block := flow.Slashable[*cluster.Block]{
   114  		OriginID: proposal.OriginID,
   115  		Message:  proposal.Message.Block.ToInternal(),
   116  	}
   117  	header := block.Message.Header
   118  	blockID := header.ID()
   119  	finalHeight := c.finalizedHeight.Value()
   120  	finalView := c.finalizedView.Value()
   121  
   122  	log := c.log.With().
   123  		Hex("origin_id", proposal.OriginID[:]).
   124  		Str("chain_id", header.ChainID.String()).
   125  		Uint64("block_height", header.Height).
   126  		Uint64("block_view", header.View).
   127  		Hex("block_id", blockID[:]).
   128  		Hex("parent_id", header.ParentID[:]).
   129  		Hex("ref_block_id", block.Message.Payload.ReferenceBlockID[:]).
   130  		Hex("collection_id", logging.Entity(block.Message.Payload.Collection)).
   131  		Int("tx_count", block.Message.Payload.Collection.Len()).
   132  		Time("timestamp", header.Timestamp).
   133  		Hex("proposer", header.ProposerID[:]).
   134  		Hex("parent_signer_indices", header.ParentVoterIndices).
   135  		Uint64("finalized_height", finalHeight).
   136  		Uint64("finalized_view", finalView).
   137  		Logger()
   138  	if log.Debug().Enabled() {
   139  		log = log.With().Strs("tx_ids",
   140  			flow.IdentifierList(block.Message.Payload.Collection.Light().Transactions).Strings()).Logger()
   141  	}
   142  	log.Info().Msg("block proposal received")
   143  
   144  	// drop proposals below the finalized threshold
   145  	if header.Height <= finalHeight || header.View <= finalView {
   146  		log.Debug().Msg("dropping block below finalized boundary")
   147  		return nil
   148  	}
   149  
   150  	skipNewProposalsThreshold := c.config.GetSkipNewProposalsThreshold()
   151  	// ignore proposals which are too far ahead of our local finalized state
   152  	// instead, rely on sync engine to catch up finalization more effectively, and avoid
   153  	// large subtree of blocks to be cached.
   154  	if header.View > finalView+skipNewProposalsThreshold {
   155  		log.Debug().
   156  			Uint64("skip_new_proposals_threshold", skipNewProposalsThreshold).
   157  			Msg("dropping block too far ahead of locally finalized view")
   158  		return nil
   159  	}
   160  
   161  	// first, we reject all blocks that we don't need to process:
   162  	// 1) blocks already in the cache; they will already be processed later
   163  	// 2) blocks already on disk; they were processed and await finalization
   164  
   165  	// ignore proposals that are already cached
   166  	_, cached := c.pending.ByID(blockID)
   167  	if cached {
   168  		log.Debug().Msg("skipping already cached proposal")
   169  		return nil
   170  	}
   171  
   172  	// ignore proposals that were already processed
   173  	_, err := c.headers.ByBlockID(blockID)
   174  	if err == nil {
   175  		log.Debug().Msg("skipping already processed proposal")
   176  		return nil
   177  	}
   178  	if !errors.Is(err, storage.ErrNotFound) {
   179  		return fmt.Errorf("could not check proposal: %w", err)
   180  	}
   181  
   182  	// there are two possibilities if the proposal is neither already pending
   183  	// processing in the cache, nor has already been processed:
   184  	// 1) the proposal is unverifiable because the parent is unknown
   185  	// => we cache the proposal
   186  	// 2) the proposal is connected to finalized state through an unbroken chain
   187  	// => we verify the proposal and forward it to hotstuff if valid
   188  
   189  	// if the parent is a pending block (disconnected from the incorporated state), we cache this block as well.
   190  	// we don't have to request its parent block or its ancestor again, because as a
   191  	// pending block, its parent block must have been requested.
   192  	// if there was problem requesting its parent or ancestors, the sync engine's forward
   193  	// syncing with range requests for finalized blocks will request for the blocks.
   194  	_, found := c.pending.ByID(header.ParentID)
   195  	if found {
   196  		// add the block to the cache
   197  		_ = c.pending.Add(block)
   198  		c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   199  
   200  		return nil
   201  	}
   202  
   203  	// if the proposal is connected to a block that is neither in the cache, nor
   204  	// in persistent storage, its direct parent is missing; cache the proposal
   205  	// and request the parent
   206  	exists, err := c.headers.Exists(header.ParentID)
   207  	if err != nil {
   208  		return fmt.Errorf("could not check parent exists: %w", err)
   209  	}
   210  	if !exists {
   211  		_ = c.pending.Add(block)
   212  		c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   213  
   214  		c.sync.RequestBlock(header.ParentID, header.Height-1)
   215  		log.Debug().Msg("requesting missing parent for proposal")
   216  		return nil
   217  	}
   218  	// At this point, we should be able to connect the proposal to the finalized
   219  	// state and should process it to see whether to forward to hotstuff or not.
   220  	// processBlockAndDescendants is a recursive function. Here we trace the
   221  	// execution of the entire recursion, which might include processing the
   222  	// proposal's pending children. There is another span within
   223  	// processBlockProposal that measures the time spent for a single proposal.
   224  	err = c.processBlockAndDescendants(block)
   225  	c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   226  	if err != nil {
   227  		return fmt.Errorf("could not process block proposal: %w", err)
   228  	}
   229  
   230  	return nil
   231  }
   232  
   233  // processBlockAndDescendants is a recursive function that processes a block and
   234  // its pending descendants. By induction, any child block of a
   235  // valid proposal is itself connected to the finalized state and can be
   236  // processed as well.
   237  func (c *Core) processBlockAndDescendants(proposal flow.Slashable[*cluster.Block]) error {
   238  	header := proposal.Message.Header
   239  	blockID := header.ID()
   240  	log := c.log.With().
   241  		Str("block_id", blockID.String()).
   242  		Uint64("block_height", header.Height).
   243  		Uint64("block_view", header.View).
   244  		Uint64("parent_view", header.ParentView).
   245  		Logger()
   246  
   247  	// process block itself
   248  	err := c.processBlockProposal(proposal.Message)
   249  	if err != nil {
   250  		if checkForAndLogOutdatedInputError(err, log) || checkForAndLogUnverifiableInputError(err, log) {
   251  			return nil
   252  		}
   253  		if invalidBlockErr, ok := model.AsInvalidProposalError(err); ok {
   254  			log.Err(err).Msg("received invalid block from other node (potential slashing evidence?)")
   255  
   256  			// notify consumers about invalid block
   257  			c.proposalViolationNotifier.OnInvalidBlockDetected(flow.Slashable[model.InvalidProposalError]{
   258  				OriginID: proposal.OriginID,
   259  				Message:  *invalidBlockErr,
   260  			})
   261  
   262  			// notify VoteAggregator about the invalid block
   263  			err = c.voteAggregator.InvalidBlock(model.ProposalFromFlow(header))
   264  			if err != nil {
   265  				if mempool.IsBelowPrunedThresholdError(err) {
   266  					log.Warn().Msg("received invalid block, but is below pruned threshold")
   267  					return nil
   268  				}
   269  				return fmt.Errorf("unexpected error notifying vote aggregator about invalid block: %w", err)
   270  			}
   271  			return nil
   272  		}
   273  		// unexpected error: potentially corrupted internal state => abort processing and escalate error
   274  		return fmt.Errorf("failed to process block %x: %w", blockID, err)
   275  	}
   276  
   277  	// process all children
   278  	// do not break on invalid or outdated blocks as they should not prevent us
   279  	// from processing other valid children
   280  	children, has := c.pending.ByParentID(blockID)
   281  	if !has {
   282  		return nil
   283  	}
   284  	for _, child := range children {
   285  		cpr := c.processBlockAndDescendants(child)
   286  		if cpr != nil {
   287  			// unexpected error: potentially corrupted internal state => abort processing and escalate error
   288  			return cpr
   289  		}
   290  	}
   291  
   292  	// drop all the children that should have been processed now
   293  	c.pending.DropForParent(blockID)
   294  
   295  	return nil
   296  }
   297  
   298  // processBlockProposal processes the given block proposal. The proposal must connect to
   299  // the finalized state.
   300  // Expected errors during normal operations:
   301  //   - engine.OutdatedInputError if the block proposal is outdated (e.g. orphaned)
   302  //   - model.InvalidProposalError if the block proposal is invalid
   303  //   - engine.UnverifiableInputError if the proposal cannot be validated
   304  func (c *Core) processBlockProposal(proposal *cluster.Block) error {
   305  	header := proposal.Header
   306  	blockID := header.ID()
   307  	log := c.log.With().
   308  		Str("chain_id", header.ChainID.String()).
   309  		Uint64("block_height", header.Height).
   310  		Uint64("block_view", header.View).
   311  		Hex("block_id", blockID[:]).
   312  		Hex("parent_id", header.ParentID[:]).
   313  		Hex("payload_hash", header.PayloadHash[:]).
   314  		Time("timestamp", header.Timestamp).
   315  		Hex("proposer", header.ProposerID[:]).
   316  		Hex("parent_signer_indices", header.ParentVoterIndices).
   317  		Logger()
   318  	log.Info().Msg("processing block proposal")
   319  
   320  	hotstuffProposal := model.ProposalFromFlow(header)
   321  	err := c.validator.ValidateProposal(hotstuffProposal)
   322  	if err != nil {
   323  		if model.IsInvalidProposalError(err) {
   324  			return err
   325  		}
   326  		if errors.Is(err, model.ErrViewForUnknownEpoch) {
   327  			// The cluster committee never returns ErrViewForUnknownEpoch, therefore this case
   328  			// is an unexpected error in cluster consensus.
   329  			return fmt.Errorf("unexpected error: cluster committee reported unknown epoch : %w", irrecoverable.NewException(err))
   330  		}
   331  		return fmt.Errorf("unexpected error validating proposal: %w", err)
   332  	}
   333  
   334  	// see if the block is a valid extension of the protocol state
   335  	err = c.state.Extend(proposal)
   336  	if err != nil {
   337  		if state.IsInvalidExtensionError(err) {
   338  			// if the block proposes an invalid extension of the cluster state, then the block is invalid
   339  			return model.NewInvalidProposalErrorf(hotstuffProposal, "invalid extension of cluster state (block: %x, height: %d): %w", blockID, header.Height, err)
   340  		} else if state.IsOutdatedExtensionError(err) {
   341  			// cluster state aborted processing of block as it is on an abandoned fork: block is outdated
   342  			return engine.NewOutdatedInputErrorf("outdated extension of cluster state: %w", err)
   343  		} else if state.IsUnverifiableExtensionError(err) {
   344  			return engine.NewUnverifiableInputError("unverifiable extension of cluster state (block_id: %x, height: %d): %w",
   345  				header.ID(), header.Height, err)
   346  		} else {
   347  			// unexpected error: potentially corrupted internal state => abort processing and escalate error
   348  			return fmt.Errorf("unexpected exception while extending cluster state with block %x at height %d: %w", blockID, header.Height, err)
   349  		}
   350  	}
   351  
   352  	// notify vote aggregator about a new block, so that it can start verifying
   353  	// votes for it.
   354  	c.voteAggregator.AddBlock(hotstuffProposal)
   355  
   356  	// submit the model to hotstuff for processing
   357  	// TODO replace with pubsub https://github.com/dapperlabs/flow-go/issues/6395
   358  	log.Info().Msg("forwarding block proposal to hotstuff")
   359  	c.hotstuff.SubmitProposal(hotstuffProposal)
   360  
   361  	return nil
   362  }
   363  
   364  // ProcessFinalizedBlock performs pruning of stale data based on finalization event
   365  // removes pending blocks below the finalized view
   366  func (c *Core) ProcessFinalizedBlock(finalized *flow.Header) {
   367  	// remove all pending blocks at or below the finalized view
   368  	c.pending.PruneByView(finalized.View)
   369  	c.finalizedHeight.Set(finalized.Height)
   370  	c.finalizedView.Set(finalized.View)
   371  
   372  	// always record the metric
   373  	c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   374  }
   375  
   376  // checkForAndLogOutdatedInputError checks whether error is an `engine.OutdatedInputError`.
   377  // If this is the case, we emit a log message and return true.
   378  // For any error other than `engine.OutdatedInputError`, this function is a no-op
   379  // and returns false.
   380  func checkForAndLogOutdatedInputError(err error, log zerolog.Logger) bool {
   381  	if engine.IsOutdatedInputError(err) {
   382  		// child is outdated by the time we started processing it
   383  		// => node was probably behind and is catching up. Log as warning
   384  		log.Info().Msg("dropped processing of abandoned fork; this might be an indicator that the node is slightly behind")
   385  		return true
   386  	}
   387  	return false
   388  }
   389  
   390  // checkForAndLogUnverifiableInputError checks whether error is an `engine.UnverifiableInputError`.
   391  // If this is the case, we emit a log message and return true.
   392  // For any error other than `engine.UnverifiableInputError`, this function is a no-op
   393  // and returns false.
   394  func checkForAndLogUnverifiableInputError(err error, log zerolog.Logger) bool {
   395  	if engine.IsUnverifiableInputError(err) {
   396  		// the block cannot be validated
   397  		log.Warn().Err(err).Msg("received collection proposal with unknown reference block; " +
   398  			"this might be an indicator that the node is slightly behind or the proposer published an invalid collection")
   399  		return true
   400  	}
   401  	return false
   402  }