github.com/koko1123/flow-go-1@v0.29.6/engine/common/follower/engine.go (about)

     1  package follower
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  
     8  	"github.com/hashicorp/go-multierror"
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/koko1123/flow-go-1/engine"
    12  	"github.com/koko1123/flow-go-1/model/events"
    13  	"github.com/koko1123/flow-go-1/model/flow"
    14  	"github.com/koko1123/flow-go-1/model/messages"
    15  	"github.com/koko1123/flow-go-1/module"
    16  	"github.com/koko1123/flow-go-1/module/compliance"
    17  	"github.com/koko1123/flow-go-1/module/metrics"
    18  	"github.com/koko1123/flow-go-1/module/trace"
    19  	"github.com/koko1123/flow-go-1/network"
    20  	"github.com/koko1123/flow-go-1/network/channels"
    21  	"github.com/koko1123/flow-go-1/state"
    22  	"github.com/koko1123/flow-go-1/state/protocol"
    23  	"github.com/koko1123/flow-go-1/storage"
    24  	"github.com/koko1123/flow-go-1/utils/logging"
    25  )
    26  
    27  type Engine struct {
    28  	unit           *engine.Unit
    29  	log            zerolog.Logger
    30  	config         compliance.Config
    31  	me             module.Local
    32  	engMetrics     module.EngineMetrics
    33  	mempoolMetrics module.MempoolMetrics
    34  	cleaner        storage.Cleaner
    35  	headers        storage.Headers
    36  	payloads       storage.Payloads
    37  	state          protocol.MutableState
    38  	pending        module.PendingBlockBuffer
    39  	follower       module.HotStuffFollower
    40  	con            network.Conduit
    41  	sync           module.BlockRequester
    42  	tracer         module.Tracer
    43  	channel        channels.Channel
    44  }
    45  
    46  type Option func(*Engine)
    47  
    48  // WithComplianceOptions sets options for the engine's compliance config
    49  func WithComplianceOptions(opts ...compliance.Opt) Option {
    50  	return func(e *Engine) {
    51  		for _, apply := range opts {
    52  			apply(&e.config)
    53  		}
    54  	}
    55  }
    56  
    57  // WithChannel sets the channel the follower engine will use to receive blocks.
    58  func WithChannel(channel channels.Channel) Option {
    59  	return func(e *Engine) {
    60  		e.channel = channel
    61  	}
    62  }
    63  
    64  func New(
    65  	log zerolog.Logger,
    66  	net network.Network,
    67  	me module.Local,
    68  	engMetrics module.EngineMetrics,
    69  	mempoolMetrics module.MempoolMetrics,
    70  	cleaner storage.Cleaner,
    71  	headers storage.Headers,
    72  	payloads storage.Payloads,
    73  	state protocol.MutableState,
    74  	pending module.PendingBlockBuffer,
    75  	follower module.HotStuffFollower,
    76  	sync module.BlockRequester,
    77  	tracer module.Tracer,
    78  	opts ...Option,
    79  ) (*Engine, error) {
    80  	e := &Engine{
    81  		unit:           engine.NewUnit(),
    82  		log:            log.With().Str("engine", "follower").Logger(),
    83  		config:         compliance.DefaultConfig(),
    84  		me:             me,
    85  		engMetrics:     engMetrics,
    86  		mempoolMetrics: mempoolMetrics,
    87  		cleaner:        cleaner,
    88  		headers:        headers,
    89  		payloads:       payloads,
    90  		state:          state,
    91  		pending:        pending,
    92  		follower:       follower,
    93  		sync:           sync,
    94  		tracer:         tracer,
    95  		channel:        channels.ReceiveBlocks,
    96  	}
    97  
    98  	for _, apply := range opts {
    99  		apply(e)
   100  	}
   101  
   102  	con, err := net.Register(e.channel, e)
   103  	if err != nil {
   104  		return nil, fmt.Errorf("could not register engine to network: %w", err)
   105  	}
   106  	e.con = con
   107  
   108  	return e, nil
   109  }
   110  
   111  // Ready returns a ready channel that is closed once the engine has fully
   112  // started. For consensus engine, this is true once the underlying consensus
   113  // algorithm has started.
   114  func (e *Engine) Ready() <-chan struct{} {
   115  	return e.unit.Ready(func() {
   116  		<-e.follower.Ready()
   117  	})
   118  }
   119  
   120  // Done returns a done channel that is closed once the engine has fully stopped.
   121  // For the consensus engine, we wait for hotstuff to finish.
   122  func (e *Engine) Done() <-chan struct{} {
   123  	return e.unit.Done(func() {
   124  		<-e.follower.Done()
   125  	})
   126  }
   127  
   128  // SubmitLocal submits an event originating on the local node.
   129  func (e *Engine) SubmitLocal(event interface{}) {
   130  	e.unit.Launch(func() {
   131  		err := e.process(e.me.NodeID(), event)
   132  		if err != nil {
   133  			engine.LogError(e.log, err)
   134  		}
   135  	})
   136  }
   137  
   138  // Submit submits the given event from the node with the given origin ID
   139  // for processing in a non-blocking manner. It returns instantly and logs
   140  // a potential processing error internally when done.
   141  func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) {
   142  	e.unit.Launch(func() {
   143  		err := e.Process(channel, originID, event)
   144  		if err != nil {
   145  			engine.LogError(e.log, err)
   146  		}
   147  	})
   148  }
   149  
   150  // ProcessLocal processes an event originating on the local node.
   151  func (e *Engine) ProcessLocal(event interface{}) error {
   152  	return e.unit.Do(func() error {
   153  		return e.process(e.me.NodeID(), event)
   154  	})
   155  }
   156  
   157  // Process processes the given event from the node with the given origin ID in
   158  // a blocking manner. It returns the potential processing error when done.
   159  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   160  	return e.unit.Do(func() error {
   161  		return e.process(originID, event)
   162  	})
   163  }
   164  
   165  func (e *Engine) process(originID flow.Identifier, input interface{}) error {
   166  	switch v := input.(type) {
   167  	case *messages.BlockResponse:
   168  		e.engMetrics.MessageReceived(metrics.EngineFollower, metrics.MessageBlockResponse)
   169  		defer e.engMetrics.MessageHandled(metrics.EngineFollower, metrics.MessageBlockResponse)
   170  		e.unit.Lock()
   171  		defer e.unit.Unlock()
   172  		return e.onBlockResponse(originID, v)
   173  	case *events.SyncedBlock:
   174  		e.engMetrics.MessageReceived(metrics.EngineFollower, metrics.MessageSyncedBlock)
   175  		defer e.engMetrics.MessageHandled(metrics.EngineFollower, metrics.MessageSyncedBlock)
   176  		e.unit.Lock()
   177  		defer e.unit.Unlock()
   178  		return e.onSyncedBlock(originID, v)
   179  	case *messages.BlockProposal:
   180  		e.engMetrics.MessageReceived(metrics.EngineFollower, metrics.MessageBlockProposal)
   181  		defer e.engMetrics.MessageHandled(metrics.EngineFollower, metrics.MessageBlockProposal)
   182  		e.unit.Lock()
   183  		defer e.unit.Unlock()
   184  		return e.onBlockProposal(originID, v, false)
   185  	default:
   186  		return fmt.Errorf("invalid event type (%T)", input)
   187  	}
   188  }
   189  
   190  func (e *Engine) onSyncedBlock(originID flow.Identifier, synced *events.SyncedBlock) error {
   191  
   192  	// a block that is synced has to come locally, from the synchronization engine
   193  	// the block itself will contain the proposer to indicate who created it
   194  	if originID != e.me.NodeID() {
   195  		return fmt.Errorf("synced block with non-local origin (local: %x, origin: %x)", e.me.NodeID(), originID)
   196  	}
   197  
   198  	// process as proposal
   199  	proposal := &messages.BlockProposal{
   200  		Block: synced.Block,
   201  	}
   202  	return e.onBlockProposal(originID, proposal, false)
   203  }
   204  
   205  func (e *Engine) onBlockResponse(originID flow.Identifier, res *messages.BlockResponse) error {
   206  	for i, block := range res.Blocks {
   207  		proposal := &messages.BlockProposal{
   208  			Block: block,
   209  		}
   210  
   211  		// process block proposal with a wait
   212  		if err := e.onBlockProposal(originID, proposal, true); err != nil {
   213  			return fmt.Errorf("fail to process the block at index %v in a range block response that contains %v blocks: %w", i, len(res.Blocks), err)
   214  		}
   215  	}
   216  
   217  	return nil
   218  }
   219  
   220  // onBlockProposal handles incoming block proposals. inRangeBlockResponse will determine whether or not we should wait in processBlockAndDescendants
   221  func (e *Engine) onBlockProposal(originID flow.Identifier, proposal *messages.BlockProposal, inRangeBlockResponse bool) error {
   222  	block := proposal.Block.ToInternal()
   223  	header := block.Header
   224  
   225  	span, ctx := e.tracer.StartBlockSpan(context.Background(), header.ID(), trace.FollowerOnBlockProposal)
   226  	defer span.End()
   227  
   228  	log := e.log.With().
   229  		Hex("origin_id", originID[:]).
   230  		Str("chain_id", header.ChainID.String()).
   231  		Uint64("block_height", header.Height).
   232  		Uint64("block_view", header.View).
   233  		Hex("block_id", logging.Entity(header)).
   234  		Hex("parent_id", header.ParentID[:]).
   235  		Hex("payload_hash", header.PayloadHash[:]).
   236  		Time("timestamp", header.Timestamp).
   237  		Hex("proposer", header.ProposerID[:]).
   238  		Logger()
   239  
   240  	log.Info().Msg("block proposal received")
   241  
   242  	e.prunePendingCache()
   243  
   244  	// first, we reject all blocks that we don't need to process:
   245  	// 1) blocks already in the cache; they will already be processed later
   246  	// 2) blocks already on disk; they were processed and await finalization
   247  	// 3) blocks at a height below finalized height; they can not be finalized
   248  
   249  	// ignore proposals that are already cached
   250  	_, cached := e.pending.ByID(header.ID())
   251  	if cached {
   252  		log.Debug().Msg("skipping already cached proposal")
   253  		return nil
   254  	}
   255  
   256  	// ignore proposals that were already processed
   257  	_, err := e.headers.ByBlockID(header.ID())
   258  	if err == nil {
   259  		log.Debug().Msg("skipping already processed proposal")
   260  		return nil
   261  	}
   262  	if !errors.Is(err, storage.ErrNotFound) {
   263  		return fmt.Errorf("could not check proposal: %w", err)
   264  	}
   265  
   266  	// ignore proposals which are too far ahead of our local finalized state
   267  	// instead, rely on sync engine to catch up finalization more effectively, and avoid
   268  	// large subtree of blocks to be cached.
   269  	final, err := e.state.Final().Head()
   270  	if err != nil {
   271  		return fmt.Errorf("could not get latest finalized header: %w", err)
   272  	}
   273  	if header.Height > final.Height && header.Height-final.Height > e.config.SkipNewProposalsThreshold {
   274  		log.Debug().
   275  			Uint64("final_height", final.Height).
   276  			Msg("dropping block too far ahead of locally finalized height")
   277  		return nil
   278  	}
   279  
   280  	// there are two possibilities if the proposal is neither already pending
   281  	// processing in the cache, nor has already been processed:
   282  	// 1) the proposal is unverifiable because parent or ancestor is unknown
   283  	// => we cache the proposal and request the missing link
   284  	// 2) the proposal is connected to finalized state through an unbroken chain
   285  	// => we verify the proposal and forward it to hotstuff if valid
   286  
   287  	// if the parent is a pending block (disconnected from the incorporated state), we cache this block as well.
   288  	// we don't have to request its parent block or its ancestor again, because as a
   289  	// pending block, its parent block must have been requested.
   290  	// if there was problem requesting its parent or ancestors, the sync engine's forward
   291  	// syncing with range requests for finalized blocks will request for the blocks.
   292  	_, found := e.pending.ByID(header.ParentID)
   293  	if found {
   294  
   295  		// add the block to the cache
   296  		_ = e.pending.Add(originID, block)
   297  		e.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, e.pending.Size())
   298  
   299  		return nil
   300  	}
   301  
   302  	// if the proposal is connected to a block that is neither in the cache, nor
   303  	// in persistent storage, its direct parent is missing; cache the proposal
   304  	// and request the parent
   305  	_, err = e.headers.ByBlockID(header.ParentID)
   306  	if errors.Is(err, storage.ErrNotFound) {
   307  
   308  		_ = e.pending.Add(originID, block)
   309  
   310  		log.Debug().Msg("requesting missing parent for proposal")
   311  
   312  		e.sync.RequestBlock(header.ParentID, header.Height-1)
   313  
   314  		return nil
   315  	}
   316  	if err != nil {
   317  		return fmt.Errorf("could not check parent: %w", err)
   318  	}
   319  
   320  	// at this point, we should be able to connect the proposal to the finalized
   321  	// state and should process it to see whether to forward to hotstuff or not
   322  	err = e.processBlockAndDescendants(ctx, block, inRangeBlockResponse)
   323  	if err != nil {
   324  		return fmt.Errorf("could not process block proposal: %w", err)
   325  	}
   326  
   327  	// most of the heavy database checks are done at this point, so this is a
   328  	// good moment to potentially kick-off a garbage collection of the DB
   329  	// NOTE: this is only effectively run every 1000th calls, which corresponds
   330  	// to every 1000th successfully processed block
   331  	e.cleaner.RunGC()
   332  
   333  	return nil
   334  }
   335  
   336  // processBlockAndDescendants processes `proposal` and its pending descendants recursively.
   337  // The function assumes that `proposal` is connected to the finalized state. By induction,
   338  // any children are therefore also connected to the finalized state and can be processed as well.
   339  // No errors are expected during normal operations.
   340  func (e *Engine) processBlockAndDescendants(ctx context.Context, block *flow.Block, inRangeBlockResponse bool) error {
   341  	header := block.Header
   342  
   343  	span, ctx := e.tracer.StartSpanFromContext(ctx, trace.FollowerProcessBlockProposal)
   344  	defer span.End()
   345  
   346  	log := e.log.With().
   347  		Str("chain_id", header.ChainID.String()).
   348  		Uint64("block_height", header.Height).
   349  		Uint64("block_view", header.View).
   350  		Hex("block_id", logging.Entity(header)).
   351  		Hex("parent_id", header.ParentID[:]).
   352  		Hex("payload_hash", header.PayloadHash[:]).
   353  		Time("timestamp", header.Timestamp).
   354  		Hex("proposer", header.ProposerID[:]).
   355  		Logger()
   356  
   357  	log.Info().Msg("processing block proposal")
   358  
   359  	// check whether the block is a valid extension of the chain.
   360  	// it only checks the block header, since checking block body is expensive.
   361  	// The full block check is done by the consensus participants.
   362  	err := e.state.Extend(ctx, block)
   363  	if err != nil {
   364  		// block is outdated by the time we started processing it
   365  		// => some other node generating the proposal is probably behind is catching up.
   366  		if state.IsOutdatedExtensionError(err) {
   367  			log.Info().Err(err).Msg("dropped processing of abandoned fork; this might be an indicator that some consensus node is behind")
   368  			return nil
   369  		}
   370  		// the block is invalid; log as error as we desire honest participation
   371  		// ToDo: potential slashing
   372  		if state.IsInvalidExtensionError(err) {
   373  			log.Warn().
   374  				Err(err).
   375  				Msg("received invalid block from other node (potential slashing evidence?)")
   376  			return nil
   377  		}
   378  
   379  		return fmt.Errorf("could not extend protocol state: %w", err)
   380  	}
   381  
   382  	// retrieve the parent
   383  	parent, err := e.headers.ByBlockID(header.ParentID)
   384  	if err != nil {
   385  		return fmt.Errorf("could not retrieve proposal parent: %w", err)
   386  	}
   387  
   388  	log.Info().Msg("forwarding block proposal to hotstuff")
   389  
   390  	// submit the model to follower for processing
   391  	if inRangeBlockResponse {
   392  		<-e.follower.SubmitProposal(header, parent.View)
   393  	} else {
   394  		// ignore returned channel to avoid waiting
   395  		e.follower.SubmitProposal(header, parent.View)
   396  	}
   397  
   398  	// check for any descendants of the block to process
   399  	err = e.processPendingChildren(ctx, header, inRangeBlockResponse)
   400  	if err != nil {
   401  		return fmt.Errorf("could not process pending children: %w", err)
   402  	}
   403  
   404  	return nil
   405  }
   406  
   407  // processPendingChildren checks if there are proposals connected to the given
   408  // parent block that was just processed; if this is the case, they should now
   409  // all be validly connected to the finalized state and we should process them.
   410  func (e *Engine) processPendingChildren(ctx context.Context, header *flow.Header, inRangeBlockResponse bool) error {
   411  
   412  	span, ctx := e.tracer.StartSpanFromContext(ctx, trace.FollowerProcessPendingChildren)
   413  	defer span.End()
   414  
   415  	blockID := header.ID()
   416  
   417  	// check if there are any children for this parent in the cache
   418  	children, has := e.pending.ByParentID(blockID)
   419  	if !has {
   420  		return nil
   421  	}
   422  
   423  	// then try to process children only this once
   424  	var result *multierror.Error
   425  	for _, child := range children {
   426  		err := e.processBlockAndDescendants(ctx, child.Message, inRangeBlockResponse)
   427  		if err != nil {
   428  			result = multierror.Append(result, err)
   429  		}
   430  	}
   431  
   432  	// drop all of the children that should have been processed now
   433  	e.pending.DropForParent(blockID)
   434  
   435  	return result.ErrorOrNil()
   436  }
   437  
   438  // prunePendingCache prunes the pending block cache.
   439  func (e *Engine) prunePendingCache() {
   440  
   441  	// retrieve the finalized height
   442  	final, err := e.state.Final().Head()
   443  	if err != nil {
   444  		e.log.Warn().Err(err).Msg("could not get finalized head to prune pending blocks")
   445  		return
   446  	}
   447  
   448  	// remove all pending blocks at or below the finalized view
   449  	e.pending.PruneByView(final.View)
   450  
   451  	// always record the metric
   452  	e.mempoolMetrics.MempoolEntries(metrics.ResourceProposal, e.pending.Size())
   453  }