github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/consensus/compliance/core.go (about)

     1  package compliance
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/rs/zerolog"
    10  	"go.opentelemetry.io/otel/attribute"
    11  
    12  	"github.com/onflow/flow-go/consensus/hotstuff"
    13  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    14  	"github.com/onflow/flow-go/engine"
    15  	"github.com/onflow/flow-go/model/flow"
    16  	"github.com/onflow/flow-go/model/messages"
    17  	"github.com/onflow/flow-go/module"
    18  	"github.com/onflow/flow-go/module/compliance"
    19  	"github.com/onflow/flow-go/module/counters"
    20  	"github.com/onflow/flow-go/module/mempool"
    21  	"github.com/onflow/flow-go/module/metrics"
    22  	"github.com/onflow/flow-go/module/trace"
    23  	"github.com/onflow/flow-go/state"
    24  	"github.com/onflow/flow-go/state/protocol"
    25  	"github.com/onflow/flow-go/storage"
    26  	"github.com/onflow/flow-go/utils/logging"
    27  )
    28  
    29  // Core contains the central business logic for the main consensus' compliance engine.
    30  // It is responsible for handling communication for the embedded consensus algorithm.
    31  // CAUTION with CONCURRENCY:
    32  //   - At the moment, compliance.Core _can not_ process blocks concurrently. Callers of `OnBlockProposal`
    33  //     need to ensure single-threaded access.
    34  //   - The only exception is calls to `ProcessFinalizedView`, which is the only concurrency-safe
    35  //     method of compliance.Core
    36  type Core struct {
    37  	log                       zerolog.Logger // used to log relevant actions with context
    38  	config                    compliance.Config
    39  	engineMetrics             module.EngineMetrics
    40  	mempoolMetrics            module.MempoolMetrics
    41  	hotstuffMetrics           module.HotstuffMetrics
    42  	complianceMetrics         module.ComplianceMetrics
    43  	proposalViolationNotifier hotstuff.ProposalViolationConsumer
    44  	tracer                    module.Tracer
    45  	headers                   storage.Headers
    46  	payloads                  storage.Payloads
    47  	state                     protocol.ParticipantState
    48  	// track latest finalized view/height - used to efficiently drop outdated or too-far-ahead blocks
    49  	finalizedView     counters.StrictMonotonousCounter
    50  	finalizedHeight   counters.StrictMonotonousCounter
    51  	pending           module.PendingBlockBuffer // pending block cache
    52  	sync              module.BlockRequester
    53  	hotstuff          module.HotStuff
    54  	validator         hotstuff.Validator
    55  	voteAggregator    hotstuff.VoteAggregator
    56  	timeoutAggregator hotstuff.TimeoutAggregator
    57  }
    58  
    59  // NewCore instantiates the business logic for the main consensus' compliance engine.
    60  func NewCore(
    61  	log zerolog.Logger,
    62  	collector module.EngineMetrics,
    63  	mempool module.MempoolMetrics,
    64  	hotstuffMetrics module.HotstuffMetrics,
    65  	complianceMetrics module.ComplianceMetrics,
    66  	proposalViolationNotifier hotstuff.ProposalViolationConsumer,
    67  	tracer module.Tracer,
    68  	headers storage.Headers,
    69  	payloads storage.Payloads,
    70  	state protocol.ParticipantState,
    71  	pending module.PendingBlockBuffer,
    72  	sync module.BlockRequester,
    73  	validator hotstuff.Validator,
    74  	hotstuff module.HotStuff,
    75  	voteAggregator hotstuff.VoteAggregator,
    76  	timeoutAggregator hotstuff.TimeoutAggregator,
    77  	config compliance.Config,
    78  ) (*Core, error) {
    79  
    80  	c := &Core{
    81  		log:                       log.With().Str("compliance", "core").Logger(),
    82  		config:                    config,
    83  		engineMetrics:             collector,
    84  		tracer:                    tracer,
    85  		mempoolMetrics:            mempool,
    86  		hotstuffMetrics:           hotstuffMetrics,
    87  		complianceMetrics:         complianceMetrics,
    88  		proposalViolationNotifier: proposalViolationNotifier,
    89  		headers:                   headers,
    90  		payloads:                  payloads,
    91  		state:                     state,
    92  		pending:                   pending,
    93  		sync:                      sync,
    94  		hotstuff:                  hotstuff,
    95  		validator:                 validator,
    96  		voteAggregator:            voteAggregator,
    97  		timeoutAggregator:         timeoutAggregator,
    98  	}
    99  
   100  	// initialize finalized boundary cache
   101  	final, err := c.state.Final().Head()
   102  	if err != nil {
   103  		return nil, fmt.Errorf("could not initialized finalized boundary cache: %w", err)
   104  	}
   105  	c.ProcessFinalizedBlock(final)
   106  
   107  	c.mempoolMetrics.MempoolEntries(metrics.ResourceProposal, c.pending.Size())
   108  
   109  	return c, nil
   110  }
   111  
   112  // OnBlockProposal handles incoming block proposals.
   113  // No errors are expected during normal operation. All returned exceptions
   114  // are potential symptoms of internal state corruption and should be fatal.
   115  func (c *Core) OnBlockProposal(proposal flow.Slashable[*messages.BlockProposal]) error {
   116  	block := flow.Slashable[*flow.Block]{
   117  		OriginID: proposal.OriginID,
   118  		Message:  proposal.Message.Block.ToInternal(),
   119  	}
   120  	header := block.Message.Header
   121  	blockID := header.ID()
   122  	finalHeight := c.finalizedHeight.Value()
   123  	finalView := c.finalizedView.Value()
   124  
   125  	span, _ := c.tracer.StartBlockSpan(context.Background(), header.ID(), trace.CONCompOnBlockProposal)
   126  	span.SetAttributes(
   127  		attribute.Int64("view", int64(header.View)),
   128  		attribute.String("origin_id", proposal.OriginID.String()),
   129  		attribute.String("proposer", header.ProposerID.String()),
   130  	)
   131  	traceID := span.SpanContext().TraceID().String()
   132  	defer span.End()
   133  
   134  	log := c.log.With().
   135  		Hex("origin_id", proposal.OriginID[:]).
   136  		Str("chain_id", header.ChainID.String()).
   137  		Uint64("block_height", header.Height).
   138  		Uint64("block_view", header.View).
   139  		Hex("block_id", logging.Entity(header)).
   140  		Hex("parent_id", header.ParentID[:]).
   141  		Hex("payload_hash", header.PayloadHash[:]).
   142  		Time("timestamp", header.Timestamp).
   143  		Hex("proposer", header.ProposerID[:]).
   144  		Hex("parent_signer_indices", header.ParentVoterIndices).
   145  		Str("traceID", traceID). // traceID is used to connect logs to traces
   146  		Uint64("finalized_height", finalHeight).
   147  		Uint64("finalized_view", finalView).
   148  		Logger()
   149  	log.Info().Msg("block proposal received")
   150  
   151  	// drop proposals below the finalized threshold
   152  	if header.Height <= finalHeight || header.View <= finalView {
   153  		log.Debug().Msg("dropping block below finalized boundary")
   154  		return nil
   155  	}
   156  
   157  	skipNewProposalsThreshold := c.config.GetSkipNewProposalsThreshold()
   158  	// ignore proposals which are too far ahead of our local finalized state
   159  	// instead, rely on sync engine to catch up finalization more effectively, and avoid
   160  	// large subtree of blocks to be cached.
   161  	if header.View > finalView+skipNewProposalsThreshold {
   162  		log.Debug().
   163  			Uint64("skip_new_proposals_threshold", skipNewProposalsThreshold).
   164  			Msg("dropping block too far ahead of locally finalized view")
   165  		return nil
   166  	}
   167  
   168  	// first, we reject all blocks that we don't need to process:
   169  	// 1) blocks already in the cache; they will already be processed later
   170  	// 2) blocks already on disk; they were processed and await finalization
   171  
   172  	// ignore proposals that are already cached
   173  	_, cached := c.pending.ByID(blockID)
   174  	if cached {
   175  		log.Debug().Msg("skipping already cached proposal")
   176  		return nil
   177  	}
   178  
   179  	// ignore proposals that were already processed
   180  	_, err := c.headers.ByBlockID(blockID)
   181  	if err == nil {
   182  		log.Debug().Msg("skipping already processed proposal")
   183  		return nil
   184  	}
   185  	if !errors.Is(err, storage.ErrNotFound) {
   186  		return fmt.Errorf("could not check proposal: %w", err)
   187  	}
   188  
   189  	// there are two possibilities if the proposal is neither already pending
   190  	// processing in the cache, nor has already been processed:
   191  	// 1) the proposal is unverifiable because the parent is unknown
   192  	// => we cache the proposal
   193  	// 2) the proposal is connected to finalized state through an unbroken chain
   194  	// => we verify the proposal and forward it to hotstuff if valid
   195  
   196  	// if the parent is a pending block (disconnected from the incorporated state), we cache this block as well.
   197  	// we don't have to request its parent block or its ancestor again, because as a
   198  	// pending block, its parent block must have been requested.
   199  	// if there was problem requesting its parent or ancestors, the sync engine's forward
   200  	// syncing with range requests for finalized blocks will request for the blocks.
   201  	_, found := c.pending.ByID(header.ParentID)
   202  	if found {
   203  		// add the block to the cache
   204  		_ = c.pending.Add(block)
   205  		c.mempoolMetrics.MempoolEntries(metrics.ResourceProposal, c.pending.Size())
   206  
   207  		return nil
   208  	}
   209  
   210  	// if the proposal is connected to a block that is neither in the cache, nor
   211  	// in persistent storage, its direct parent is missing; cache the proposal
   212  	// and request the parent
   213  	exists, err := c.headers.Exists(header.ParentID)
   214  	if err != nil {
   215  		return fmt.Errorf("could not check parent exists: %w", err)
   216  	}
   217  	if !exists {
   218  		_ = c.pending.Add(block)
   219  		c.mempoolMetrics.MempoolEntries(metrics.ResourceProposal, c.pending.Size())
   220  
   221  		c.sync.RequestBlock(header.ParentID, header.Height-1)
   222  		log.Debug().Msg("requesting missing parent for proposal")
   223  		return nil
   224  	}
   225  
   226  	// At this point, we should be able to connect the proposal to the finalized
   227  	// state and should process it to see whether to forward to hotstuff or not.
   228  	// processBlockAndDescendants is a recursive function. Here we trace the
   229  	// execution of the entire recursion, which might include processing the
   230  	// proposal's pending children. There is another span within
   231  	// processBlockProposal that measures the time spent for a single proposal.
   232  	err = c.processBlockAndDescendants(block)
   233  	c.mempoolMetrics.MempoolEntries(metrics.ResourceProposal, c.pending.Size())
   234  	if err != nil {
   235  		return fmt.Errorf("could not process block proposal: %w", err)
   236  	}
   237  
   238  	return nil
   239  }
   240  
   241  // processBlockAndDescendants is a recursive function that processes a block and
   242  // its pending proposals for its children. By induction, any children connected
   243  // to a valid proposal are validly connected to the finalized state and can be
   244  // processed as well.
   245  // No errors are expected during normal operation. All returned exceptions
   246  // are potential symptoms of internal state corruption and should be fatal.
   247  func (c *Core) processBlockAndDescendants(proposal flow.Slashable[*flow.Block]) error {
   248  	header := proposal.Message.Header
   249  	blockID := header.ID()
   250  
   251  	log := c.log.With().
   252  		Str("block_id", blockID.String()).
   253  		Uint64("block_height", header.Height).
   254  		Uint64("block_view", header.View).
   255  		Uint64("parent_view", header.ParentView).
   256  		Logger()
   257  
   258  	// process block itself
   259  	err := c.processBlockProposal(proposal.Message)
   260  	if err != nil {
   261  		if checkForAndLogOutdatedInputError(err, log) || checkForAndLogUnverifiableInputError(err, log) {
   262  			return nil
   263  		}
   264  		if invalidBlockErr, ok := model.AsInvalidProposalError(err); ok {
   265  			log.Err(err).Msg("received invalid block from other node (potential slashing evidence?)")
   266  
   267  			// notify consumers about invalid block
   268  			c.proposalViolationNotifier.OnInvalidBlockDetected(flow.Slashable[model.InvalidProposalError]{
   269  				OriginID: proposal.OriginID,
   270  				Message:  *invalidBlockErr,
   271  			})
   272  
   273  			// notify VoteAggregator about the invalid block
   274  			err = c.voteAggregator.InvalidBlock(model.ProposalFromFlow(header))
   275  			if err != nil {
   276  				if mempool.IsBelowPrunedThresholdError(err) {
   277  					log.Warn().Msg("received invalid block, but is below pruned threshold")
   278  					return nil
   279  				}
   280  				return fmt.Errorf("unexpected error notifying vote aggregator about invalid block: %w", err)
   281  			}
   282  			return nil
   283  		}
   284  		// unexpected error: potentially corrupted internal state => abort processing and escalate error
   285  		return fmt.Errorf("failed to process block %x: %w", blockID, err)
   286  	}
   287  
   288  	// process all children
   289  	// do not break on invalid or outdated blocks as they should not prevent us
   290  	// from processing other valid children
   291  	children, has := c.pending.ByParentID(blockID)
   292  	if !has {
   293  		return nil
   294  	}
   295  	for _, child := range children {
   296  		cpr := c.processBlockAndDescendants(child)
   297  		if cpr != nil {
   298  			// unexpected error: potentially corrupted internal state => abort processing and escalate error
   299  			return cpr
   300  		}
   301  	}
   302  
   303  	// drop all the children that should have been processed now
   304  	c.pending.DropForParent(blockID)
   305  
   306  	return nil
   307  }
   308  
   309  // processBlockProposal processes the given block proposal. The proposal must connect to
   310  // the finalized state.
   311  // Expected errors during normal operations:
   312  //   - engine.OutdatedInputError if the block proposal is outdated (e.g. orphaned)
   313  //   - model.InvalidProposalError if the block proposal is invalid
   314  //   - engine.UnverifiableInputError if the block proposal cannot be verified
   315  func (c *Core) processBlockProposal(proposal *flow.Block) error {
   316  	startTime := time.Now()
   317  	defer func() {
   318  		c.hotstuffMetrics.BlockProcessingDuration(time.Since(startTime))
   319  	}()
   320  
   321  	header := proposal.Header
   322  	blockID := header.ID()
   323  
   324  	span, ctx := c.tracer.StartBlockSpan(context.Background(), blockID, trace.ConCompProcessBlockProposal)
   325  	span.SetAttributes(
   326  		attribute.String("proposer", header.ProposerID.String()),
   327  	)
   328  	defer span.End()
   329  
   330  	hotstuffProposal := model.ProposalFromFlow(header)
   331  	err := c.validator.ValidateProposal(hotstuffProposal)
   332  	if err != nil {
   333  		if model.IsInvalidProposalError(err) {
   334  			return err
   335  		}
   336  		if errors.Is(err, model.ErrViewForUnknownEpoch) {
   337  			// We have received a proposal, but we don't know the epoch its view is within.
   338  			// We know:
   339  			//  - the parent of this block is valid and was appended to the state (ie. we knew the epoch for it)
   340  			//  - if we then see this for the child, one of two things must have happened:
   341  			//    1. the proposer maliciously created the block for a view very far in the future (it's invalid)
   342  			//      -> in this case we can disregard the block
   343  			//    2. no blocks have been finalized within the epoch commitment deadline, and the epoch ended
   344  			//       (breaking a critical assumption - see EpochCommitSafetyThreshold in protocol.Params for details)
   345  			//      -> in this case, the network has encountered a critical failure
   346  			//  - we assume in general that Case 2 will not happen, therefore this must be Case 1 - an invalid block
   347  			return engine.NewUnverifiableInputError("unverifiable proposal with view from unknown epoch: %w", err)
   348  		}
   349  		return fmt.Errorf("unexpected error validating proposal: %w", err)
   350  	}
   351  
   352  	log := c.log.With().
   353  		Str("chain_id", header.ChainID.String()).
   354  		Uint64("block_height", header.Height).
   355  		Uint64("block_view", header.View).
   356  		Hex("block_id", blockID[:]).
   357  		Hex("parent_id", header.ParentID[:]).
   358  		Hex("payload_hash", header.PayloadHash[:]).
   359  		Time("timestamp", header.Timestamp).
   360  		Hex("proposer", header.ProposerID[:]).
   361  		Hex("parent_signer_indices", header.ParentVoterIndices).
   362  		Logger()
   363  	log.Info().Msg("processing block proposal")
   364  
   365  	// see if the block is a valid extension of the protocol state
   366  	block := &flow.Block{
   367  		Header:  proposal.Header,
   368  		Payload: proposal.Payload,
   369  	}
   370  	err = c.state.Extend(ctx, block)
   371  	if err != nil {
   372  		if state.IsInvalidExtensionError(err) {
   373  			// if the block proposes an invalid extension of the protocol state, then the block is invalid
   374  			return model.NewInvalidProposalErrorf(hotstuffProposal, "invalid extension of protocol state (block: %x, height: %d): %w", blockID, header.Height, err)
   375  		}
   376  		if state.IsOutdatedExtensionError(err) {
   377  			// protocol state aborted processing of block as it is on an abandoned fork: block is outdated
   378  			return engine.NewOutdatedInputErrorf("outdated extension of protocol state: %w", err)
   379  		}
   380  		// unexpected error: potentially corrupted internal state => abort processing and escalate error
   381  		return fmt.Errorf("unexpected exception while extending protocol state with block %x at height %d: %w", blockID, header.Height, err)
   382  	}
   383  
   384  	// notify vote aggregator about a new block, so that it can start verifying
   385  	// votes for it.
   386  	c.voteAggregator.AddBlock(hotstuffProposal)
   387  
   388  	// submit the model to hotstuff for processing
   389  	// TODO replace with pubsub https://github.com/dapperlabs/flow-go/issues/6395
   390  	log.Info().Msg("forwarding block proposal to hotstuff")
   391  	c.hotstuff.SubmitProposal(hotstuffProposal)
   392  
   393  	return nil
   394  }
   395  
   396  // ProcessFinalizedBlock performs pruning of stale data based on finalization event
   397  // removes pending blocks below the finalized view
   398  func (c *Core) ProcessFinalizedBlock(finalized *flow.Header) {
   399  	// remove all pending blocks at or below the finalized view
   400  	c.pending.PruneByView(finalized.View)
   401  	c.finalizedHeight.Set(finalized.Height)
   402  	c.finalizedView.Set(finalized.View)
   403  
   404  	// always record the metric
   405  	c.mempoolMetrics.MempoolEntries(metrics.ResourceProposal, c.pending.Size())
   406  }
   407  
   408  // checkForAndLogOutdatedInputError checks whether error is an `engine.OutdatedInputError`.
   409  // If this is the case, we emit a log message and return true.
   410  // For any error other than `engine.OutdatedInputError`, this function is a no-op
   411  // and returns false.
   412  func checkForAndLogOutdatedInputError(err error, log zerolog.Logger) bool {
   413  	if engine.IsOutdatedInputError(err) {
   414  		// child is outdated by the time we started processing it
   415  		// => node was probably behind and is catching up. Log as warning
   416  		log.Info().Msg("dropped processing of abandoned fork; this might be an indicator that the node is slightly behind")
   417  		return true
   418  	}
   419  	return false
   420  }
   421  
   422  // checkForAndLogUnverifiableInputError checks whether error is an `engine.UnverifiableInputError`.
   423  // If this is the case, we emit a log message and return true.
   424  // For any error other than `engine.UnverifiableInputError`, this function is a no-op
   425  // and returns false.
   426  func checkForAndLogUnverifiableInputError(err error, log zerolog.Logger) bool {
   427  	if engine.IsUnverifiableInputError(err) {
   428  		// the block cannot be validated
   429  		log.Warn().Err(err).Msg("received unverifiable block proposal; " +
   430  			"this might be an indicator that a malicious proposer is generating detached blocks very far ahead")
   431  		return true
   432  	}
   433  	return false
   434  }