github.com/onflow/flow-go@v0.33.17/engine/collection/compliance/core.go (about)

     1  // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED
     2  
     3  package compliance
     4  
     5  import (
     6  	"errors"
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/rs/zerolog"
    11  
    12  	"github.com/onflow/flow-go/consensus/hotstuff"
    13  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    14  	"github.com/onflow/flow-go/engine"
    15  	"github.com/onflow/flow-go/model/cluster"
    16  	"github.com/onflow/flow-go/model/flow"
    17  	"github.com/onflow/flow-go/model/messages"
    18  	"github.com/onflow/flow-go/module"
    19  	"github.com/onflow/flow-go/module/compliance"
    20  	"github.com/onflow/flow-go/module/counters"
    21  	"github.com/onflow/flow-go/module/irrecoverable"
    22  	"github.com/onflow/flow-go/module/mempool"
    23  	"github.com/onflow/flow-go/module/metrics"
    24  	"github.com/onflow/flow-go/state"
    25  	clusterkv "github.com/onflow/flow-go/state/cluster"
    26  	"github.com/onflow/flow-go/storage"
    27  	"github.com/onflow/flow-go/utils/logging"
    28  )
    29  
    30  // Core contains the central business logic for the collector clusters' compliance engine.
    31  // It is responsible for handling communication for the embedded consensus algorithm.
    32  // CAUTION with CONCURRENCY:
    33  //   - At the moment, compliance.Core _can not_ process blocks concurrently. Callers of `OnBlockProposal`
    34  //     need to ensure single-threaded access.
    35  //   - The only exception is calls to `ProcessFinalizedView`, which is the only concurrency-safe
    36  //     method of compliance.Core
    37  type Core struct {
    38  	log                       zerolog.Logger // used to log relevant actions with context
    39  	config                    compliance.Config
    40  	engineMetrics             module.EngineMetrics
    41  	mempoolMetrics            module.MempoolMetrics
    42  	hotstuffMetrics           module.HotstuffMetrics
    43  	collectionMetrics         module.CollectionMetrics
    44  	proposalViolationNotifier hotstuff.ProposalViolationConsumer
    45  	headers                   storage.Headers
    46  	state                     clusterkv.MutableState
    47  	// track latest finalized view/height - used to efficiently drop outdated or too-far-ahead blocks
    48  	finalizedView     counters.StrictMonotonousCounter
    49  	finalizedHeight   counters.StrictMonotonousCounter
    50  	pending           module.PendingClusterBlockBuffer // pending block cache
    51  	sync              module.BlockRequester
    52  	hotstuff          module.HotStuff
    53  	validator         hotstuff.Validator
    54  	voteAggregator    hotstuff.VoteAggregator
    55  	timeoutAggregator hotstuff.TimeoutAggregator
    56  }
    57  
    58  // NewCore instantiates the business logic for the collector clusters' compliance engine.
    59  func NewCore(
    60  	log zerolog.Logger,
    61  	collector module.EngineMetrics,
    62  	mempool module.MempoolMetrics,
    63  	hotstuffMetrics module.HotstuffMetrics,
    64  	collectionMetrics module.CollectionMetrics,
    65  	proposalViolationNotifier hotstuff.ProposalViolationConsumer,
    66  	headers storage.Headers,
    67  	state clusterkv.MutableState,
    68  	pending module.PendingClusterBlockBuffer,
    69  	sync module.BlockRequester,
    70  	validator hotstuff.Validator,
    71  	hotstuff module.HotStuff,
    72  	voteAggregator hotstuff.VoteAggregator,
    73  	timeoutAggregator hotstuff.TimeoutAggregator,
    74  	config compliance.Config,
    75  ) (*Core, error) {
    76  	c := &Core{
    77  		log:                       log.With().Str("cluster_compliance", "core").Logger(),
    78  		config:                    config,
    79  		engineMetrics:             collector,
    80  		mempoolMetrics:            mempool,
    81  		hotstuffMetrics:           hotstuffMetrics,
    82  		collectionMetrics:         collectionMetrics,
    83  		proposalViolationNotifier: proposalViolationNotifier,
    84  		headers:                   headers,
    85  		state:                     state,
    86  		pending:                   pending,
    87  		sync:                      sync,
    88  		hotstuff:                  hotstuff,
    89  		validator:                 validator,
    90  		voteAggregator:            voteAggregator,
    91  		timeoutAggregator:         timeoutAggregator,
    92  	}
    93  
    94  	// initialize finalized boundary cache
    95  	final, err := c.state.Final().Head()
    96  	if err != nil {
    97  		return nil, fmt.Errorf("could not initialized finalized boundary cache: %w", err)
    98  	}
    99  	c.ProcessFinalizedBlock(final)
   100  
   101  	// log the mempool size off the bat
   102  	c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   103  
   104  	return c, nil
   105  }
   106  
   107  // OnBlockProposal handles incoming block proposals.
   108  // No errors are expected during normal operation.
   109  func (c *Core) OnBlockProposal(proposal flow.Slashable[*messages.ClusterBlockProposal]) error {
   110  	startTime := time.Now()
   111  	defer func() {
   112  		c.hotstuffMetrics.BlockProcessingDuration(time.Since(startTime))
   113  	}()
   114  
   115  	block := flow.Slashable[*cluster.Block]{
   116  		OriginID: proposal.OriginID,
   117  		Message:  proposal.Message.Block.ToInternal(),
   118  	}
   119  	header := block.Message.Header
   120  	blockID := header.ID()
   121  	finalHeight := c.finalizedHeight.Value()
   122  	finalView := c.finalizedView.Value()
   123  
   124  	log := c.log.With().
   125  		Hex("origin_id", proposal.OriginID[:]).
   126  		Str("chain_id", header.ChainID.String()).
   127  		Uint64("block_height", header.Height).
   128  		Uint64("block_view", header.View).
   129  		Hex("block_id", blockID[:]).
   130  		Hex("parent_id", header.ParentID[:]).
   131  		Hex("ref_block_id", block.Message.Payload.ReferenceBlockID[:]).
   132  		Hex("collection_id", logging.Entity(block.Message.Payload.Collection)).
   133  		Int("tx_count", block.Message.Payload.Collection.Len()).
   134  		Time("timestamp", header.Timestamp).
   135  		Hex("proposer", header.ProposerID[:]).
   136  		Hex("parent_signer_indices", header.ParentVoterIndices).
   137  		Uint64("finalized_height", finalHeight).
   138  		Uint64("finalized_view", finalView).
   139  		Logger()
   140  	if log.Debug().Enabled() {
   141  		log = log.With().Strs("tx_ids",
   142  			flow.IdentifierList(block.Message.Payload.Collection.Light().Transactions).Strings()).Logger()
   143  	}
   144  	log.Info().Msg("block proposal received")
   145  
   146  	// drop proposals below the finalized threshold
   147  	if header.Height <= finalHeight || header.View <= finalView {
   148  		log.Debug().Msg("dropping block below finalized boundary")
   149  		return nil
   150  	}
   151  
   152  	skipNewProposalsThreshold := c.config.GetSkipNewProposalsThreshold()
   153  	// ignore proposals which are too far ahead of our local finalized state
   154  	// instead, rely on sync engine to catch up finalization more effectively, and avoid
   155  	// large subtree of blocks to be cached.
   156  	if header.View > finalView+skipNewProposalsThreshold {
   157  		log.Debug().
   158  			Uint64("skip_new_proposals_threshold", skipNewProposalsThreshold).
   159  			Msg("dropping block too far ahead of locally finalized view")
   160  		return nil
   161  	}
   162  
   163  	// first, we reject all blocks that we don't need to process:
   164  	// 1) blocks already in the cache; they will already be processed later
   165  	// 2) blocks already on disk; they were processed and await finalization
   166  
   167  	// ignore proposals that are already cached
   168  	_, cached := c.pending.ByID(blockID)
   169  	if cached {
   170  		log.Debug().Msg("skipping already cached proposal")
   171  		return nil
   172  	}
   173  
   174  	// ignore proposals that were already processed
   175  	_, err := c.headers.ByBlockID(blockID)
   176  	if err == nil {
   177  		log.Debug().Msg("skipping already processed proposal")
   178  		return nil
   179  	}
   180  	if !errors.Is(err, storage.ErrNotFound) {
   181  		return fmt.Errorf("could not check proposal: %w", err)
   182  	}
   183  
   184  	// there are two possibilities if the proposal is neither already pending
   185  	// processing in the cache, nor has already been processed:
   186  	// 1) the proposal is unverifiable because the parent is unknown
   187  	// => we cache the proposal
   188  	// 2) the proposal is connected to finalized state through an unbroken chain
   189  	// => we verify the proposal and forward it to hotstuff if valid
   190  
   191  	// if the parent is a pending block (disconnected from the incorporated state), we cache this block as well.
   192  	// we don't have to request its parent block or its ancestor again, because as a
   193  	// pending block, its parent block must have been requested.
   194  	// if there was problem requesting its parent or ancestors, the sync engine's forward
   195  	// syncing with range requests for finalized blocks will request for the blocks.
   196  	_, found := c.pending.ByID(header.ParentID)
   197  	if found {
   198  		// add the block to the cache
   199  		_ = c.pending.Add(block)
   200  		c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   201  
   202  		return nil
   203  	}
   204  
   205  	// if the proposal is connected to a block that is neither in the cache, nor
   206  	// in persistent storage, its direct parent is missing; cache the proposal
   207  	// and request the parent
   208  	exists, err := c.headers.Exists(header.ParentID)
   209  	if err != nil {
   210  		return fmt.Errorf("could not check parent exists: %w", err)
   211  	}
   212  	if !exists {
   213  		_ = c.pending.Add(block)
   214  		c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   215  
   216  		c.sync.RequestBlock(header.ParentID, header.Height-1)
   217  		log.Debug().Msg("requesting missing parent for proposal")
   218  		return nil
   219  	}
   220  	// At this point, we should be able to connect the proposal to the finalized
   221  	// state and should process it to see whether to forward to hotstuff or not.
   222  	// processBlockAndDescendants is a recursive function. Here we trace the
   223  	// execution of the entire recursion, which might include processing the
   224  	// proposal's pending children. There is another span within
   225  	// processBlockProposal that measures the time spent for a single proposal.
   226  	err = c.processBlockAndDescendants(block)
   227  	c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   228  	if err != nil {
   229  		return fmt.Errorf("could not process block proposal: %w", err)
   230  	}
   231  
   232  	return nil
   233  }
   234  
   235  // processBlockAndDescendants is a recursive function that processes a block and
   236  // its pending descendants. By induction, any child block of a
   237  // valid proposal is itself connected to the finalized state and can be
   238  // processed as well.
   239  func (c *Core) processBlockAndDescendants(proposal flow.Slashable[*cluster.Block]) error {
   240  	header := proposal.Message.Header
   241  	blockID := header.ID()
   242  	log := c.log.With().
   243  		Str("block_id", blockID.String()).
   244  		Uint64("block_height", header.Height).
   245  		Uint64("block_view", header.View).
   246  		Uint64("parent_view", header.ParentView).
   247  		Logger()
   248  
   249  	// process block itself
   250  	err := c.processBlockProposal(proposal.Message)
   251  	if err != nil {
   252  		if checkForAndLogOutdatedInputError(err, log) || checkForAndLogUnverifiableInputError(err, log) {
   253  			return nil
   254  		}
   255  		if invalidBlockErr, ok := model.AsInvalidProposalError(err); ok {
   256  			log.Err(err).Msg("received invalid block from other node (potential slashing evidence?)")
   257  
   258  			// notify consumers about invalid block
   259  			c.proposalViolationNotifier.OnInvalidBlockDetected(flow.Slashable[model.InvalidProposalError]{
   260  				OriginID: proposal.OriginID,
   261  				Message:  *invalidBlockErr,
   262  			})
   263  
   264  			// notify VoteAggregator about the invalid block
   265  			err = c.voteAggregator.InvalidBlock(model.ProposalFromFlow(header))
   266  			if err != nil {
   267  				if mempool.IsBelowPrunedThresholdError(err) {
   268  					log.Warn().Msg("received invalid block, but is below pruned threshold")
   269  					return nil
   270  				}
   271  				return fmt.Errorf("unexpected error notifying vote aggregator about invalid block: %w", err)
   272  			}
   273  			return nil
   274  		}
   275  		// unexpected error: potentially corrupted internal state => abort processing and escalate error
   276  		return fmt.Errorf("failed to process block %x: %w", blockID, err)
   277  	}
   278  
   279  	// process all children
   280  	// do not break on invalid or outdated blocks as they should not prevent us
   281  	// from processing other valid children
   282  	children, has := c.pending.ByParentID(blockID)
   283  	if !has {
   284  		return nil
   285  	}
   286  	for _, child := range children {
   287  		cpr := c.processBlockAndDescendants(child)
   288  		if cpr != nil {
   289  			// unexpected error: potentially corrupted internal state => abort processing and escalate error
   290  			return cpr
   291  		}
   292  	}
   293  
   294  	// drop all the children that should have been processed now
   295  	c.pending.DropForParent(blockID)
   296  
   297  	return nil
   298  }
   299  
   300  // processBlockProposal processes the given block proposal. The proposal must connect to
   301  // the finalized state.
   302  // Expected errors during normal operations:
   303  //   - engine.OutdatedInputError if the block proposal is outdated (e.g. orphaned)
   304  //   - model.InvalidProposalError if the block proposal is invalid
   305  //   - engine.UnverifiableInputError if the proposal cannot be validated
   306  func (c *Core) processBlockProposal(proposal *cluster.Block) error {
   307  	header := proposal.Header
   308  	blockID := header.ID()
   309  	log := c.log.With().
   310  		Str("chain_id", header.ChainID.String()).
   311  		Uint64("block_height", header.Height).
   312  		Uint64("block_view", header.View).
   313  		Hex("block_id", blockID[:]).
   314  		Hex("parent_id", header.ParentID[:]).
   315  		Hex("payload_hash", header.PayloadHash[:]).
   316  		Time("timestamp", header.Timestamp).
   317  		Hex("proposer", header.ProposerID[:]).
   318  		Hex("parent_signer_indices", header.ParentVoterIndices).
   319  		Logger()
   320  	log.Info().Msg("processing block proposal")
   321  
   322  	hotstuffProposal := model.ProposalFromFlow(header)
   323  	err := c.validator.ValidateProposal(hotstuffProposal)
   324  	if err != nil {
   325  		if model.IsInvalidProposalError(err) {
   326  			return err
   327  		}
   328  		if errors.Is(err, model.ErrViewForUnknownEpoch) {
   329  			// The cluster committee never returns ErrViewForUnknownEpoch, therefore this case
   330  			// is an unexpected error in cluster consensus.
   331  			return fmt.Errorf("unexpected error: cluster committee reported unknown epoch : %w", irrecoverable.NewException(err))
   332  		}
   333  		return fmt.Errorf("unexpected error validating proposal: %w", err)
   334  	}
   335  
   336  	// see if the block is a valid extension of the protocol state
   337  	err = c.state.Extend(proposal)
   338  	if err != nil {
   339  		if state.IsInvalidExtensionError(err) {
   340  			// if the block proposes an invalid extension of the cluster state, then the block is invalid
   341  			return model.NewInvalidProposalErrorf(hotstuffProposal, "invalid extension of cluster state (block: %x, height: %d): %w", blockID, header.Height, err)
   342  		} else if state.IsOutdatedExtensionError(err) {
   343  			// cluster state aborted processing of block as it is on an abandoned fork: block is outdated
   344  			return engine.NewOutdatedInputErrorf("outdated extension of cluster state: %w", err)
   345  		} else if state.IsUnverifiableExtensionError(err) {
   346  			return engine.NewUnverifiableInputError("unverifiable extension of cluster state (block_id: %x, height: %d): %w",
   347  				header.ID(), header.Height, err)
   348  		} else {
   349  			// unexpected error: potentially corrupted internal state => abort processing and escalate error
   350  			return fmt.Errorf("unexpected exception while extending cluster state with block %x at height %d: %w", blockID, header.Height, err)
   351  		}
   352  	}
   353  
   354  	// notify vote aggregator about a new block, so that it can start verifying
   355  	// votes for it.
   356  	c.voteAggregator.AddBlock(hotstuffProposal)
   357  
   358  	// submit the model to hotstuff for processing
   359  	// TODO replace with pubsub https://github.com/dapperlabs/flow-go/issues/6395
   360  	log.Info().Msg("forwarding block proposal to hotstuff")
   361  	c.hotstuff.SubmitProposal(hotstuffProposal)
   362  
   363  	return nil
   364  }
   365  
   366  // ProcessFinalizedBlock performs pruning of stale data based on finalization event
   367  // removes pending blocks below the finalized view
   368  func (c *Core) ProcessFinalizedBlock(finalized *flow.Header) {
   369  	// remove all pending blocks at or below the finalized view
   370  	c.pending.PruneByView(finalized.View)
   371  	c.finalizedHeight.Set(finalized.Height)
   372  	c.finalizedView.Set(finalized.View)
   373  
   374  	// always record the metric
   375  	c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size())
   376  }
   377  
   378  // checkForAndLogOutdatedInputError checks whether error is an `engine.OutdatedInputError`.
   379  // If this is the case, we emit a log message and return true.
   380  // For any error other than `engine.OutdatedInputError`, this function is a no-op
   381  // and returns false.
   382  func checkForAndLogOutdatedInputError(err error, log zerolog.Logger) bool {
   383  	if engine.IsOutdatedInputError(err) {
   384  		// child is outdated by the time we started processing it
   385  		// => node was probably behind and is catching up. Log as warning
   386  		log.Info().Msg("dropped processing of abandoned fork; this might be an indicator that the node is slightly behind")
   387  		return true
   388  	}
   389  	return false
   390  }
   391  
   392  // checkForAndLogUnverifiableInputError checks whether error is an `engine.UnverifiableInputError`.
   393  // If this is the case, we emit a log message and return true.
   394  // For any error other than `engine.UnverifiableInputError`, this function is a no-op
   395  // and returns false.
   396  func checkForAndLogUnverifiableInputError(err error, log zerolog.Logger) bool {
   397  	if engine.IsUnverifiableInputError(err) {
   398  		// the block cannot be validated
   399  		log.Warn().Err(err).Msg("received collection proposal with unknown reference block; " +
   400  			"this might be an indicator that the node is slightly behind or the proposer published an invalid collection")
   401  		return true
   402  	}
   403  	return false
   404  }