github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/consensus/sealing/engine.go (about)

     1  package sealing
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/gammazero/workerpool"
     7  	"github.com/rs/zerolog"
     8  
     9  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    10  	"github.com/onflow/flow-go/engine"
    11  	"github.com/onflow/flow-go/engine/common/fifoqueue"
    12  	"github.com/onflow/flow-go/engine/consensus"
    13  	"github.com/onflow/flow-go/model/flow"
    14  	"github.com/onflow/flow-go/model/messages"
    15  	"github.com/onflow/flow-go/module"
    16  	"github.com/onflow/flow-go/module/mempool"
    17  	"github.com/onflow/flow-go/module/metrics"
    18  	msig "github.com/onflow/flow-go/module/signature"
    19  	"github.com/onflow/flow-go/network"
    20  	"github.com/onflow/flow-go/network/channels"
    21  	"github.com/onflow/flow-go/state/protocol"
    22  	"github.com/onflow/flow-go/storage"
    23  )
    24  
    25  type Event struct {
    26  	OriginID flow.Identifier
    27  	Msg      interface{}
    28  }
    29  
    30  // defaultApprovalQueueCapacity maximum capacity of approvals queue
    31  const defaultApprovalQueueCapacity = 10000
    32  
    33  // defaultApprovalResponseQueueCapacity maximum capacity of approval requests queue
    34  const defaultApprovalResponseQueueCapacity = 10000
    35  
    36  // defaultSealingEngineWorkers number of workers to dispatch events for sealing core
    37  const defaultSealingEngineWorkers = 8
    38  
    39  // defaultAssignmentCollectorsWorkerPoolCapacity is the default number of workers that is available for worker pool which is used
    40  // by assignment collector state machine to do transitions
    41  const defaultAssignmentCollectorsWorkerPoolCapacity = 4
    42  
    43  // defaultIncorporatedBlockQueueCapacity maximum capacity for queuing incorporated blocks
    44  // Caution: We cannot drop incorporated blocks, as there is no way that results included in the block
    45  // can be re-added later once dropped. Missing any incorporated result can undermine sealing liveness!
    46  // Therefore, the queue capacity should be large _and_ there should be logic for crashing the node
    47  // in case queueing an incorporated block fails.
    48  const defaultIncorporatedBlockQueueCapacity = 10000
    49  
    50  // defaultIncorporatedResultQueueCapacity maximum capacity for queuing incorporated results
    51  // Caution: We cannot drop incorporated results, as there is no way that an incorporated result
    52  // can be re-added later once dropped. Missing incorporated results can undermine sealing liveness!
    53  // Therefore, the queue capacity should be large _and_ there should be logic for crashing the node
    54  // in case queueing an incorporated result fails.
    55  const defaultIncorporatedResultQueueCapacity = 80000
    56  
    57  type (
    58  	EventSink chan *Event // Channel to push pending events
    59  )
    60  
    61  // Engine is a wrapper for approval processing `Core` which implements logic for
    62  // queuing and filtering network messages which later will be processed by sealing engine.
    63  // Purpose of this struct is to provide an efficient way how to consume messages from network layer and pass
    64  // them to `Core`. Engine runs 2 separate gorourtines that perform pre-processing and consuming messages by Core.
    65  type Engine struct {
    66  	unit                       *engine.Unit
    67  	workerPool                 *workerpool.WorkerPool
    68  	core                       consensus.SealingCore
    69  	log                        zerolog.Logger
    70  	me                         module.Local
    71  	headers                    storage.Headers
    72  	results                    storage.ExecutionResults
    73  	index                      storage.Index
    74  	state                      protocol.State
    75  	cacheMetrics               module.MempoolMetrics
    76  	engineMetrics              module.EngineMetrics
    77  	pendingApprovals           engine.MessageStore
    78  	pendingRequestedApprovals  engine.MessageStore
    79  	pendingIncorporatedResults *fifoqueue.FifoQueue
    80  	pendingIncorporatedBlocks  *fifoqueue.FifoQueue
    81  	inboundEventsNotifier      engine.Notifier
    82  	finalizationEventsNotifier engine.Notifier
    83  	blockIncorporatedNotifier  engine.Notifier
    84  	messageHandler             *engine.MessageHandler
    85  	rootHeader                 *flow.Header
    86  }
    87  
    88  // NewEngine constructs new `Engine` which runs on it's own unit.
    89  func NewEngine(log zerolog.Logger,
    90  	tracer module.Tracer,
    91  	conMetrics module.ConsensusMetrics,
    92  	engineMetrics module.EngineMetrics,
    93  	mempool module.MempoolMetrics,
    94  	sealingTracker consensus.SealingTracker,
    95  	net network.EngineRegistry,
    96  	me module.Local,
    97  	headers storage.Headers,
    98  	payloads storage.Payloads,
    99  	results storage.ExecutionResults,
   100  	index storage.Index,
   101  	state protocol.State,
   102  	sealsDB storage.Seals,
   103  	assigner module.ChunkAssigner,
   104  	sealsMempool mempool.IncorporatedResultSeals,
   105  	requiredApprovalsForSealConstructionGetter module.SealingConfigsGetter,
   106  ) (*Engine, error) {
   107  	rootHeader := state.Params().FinalizedRoot()
   108  
   109  	unit := engine.NewUnit()
   110  	e := &Engine{
   111  		unit:          unit,
   112  		workerPool:    workerpool.New(defaultAssignmentCollectorsWorkerPoolCapacity),
   113  		log:           log.With().Str("engine", "sealing.Engine").Logger(),
   114  		me:            me,
   115  		state:         state,
   116  		engineMetrics: engineMetrics,
   117  		cacheMetrics:  mempool,
   118  		headers:       headers,
   119  		results:       results,
   120  		index:         index,
   121  		rootHeader:    rootHeader,
   122  	}
   123  
   124  	err := e.setupTrustedInboundQueues()
   125  	if err != nil {
   126  		return nil, fmt.Errorf("initialization of inbound queues for trusted inputs failed: %w", err)
   127  	}
   128  
   129  	err = e.setupMessageHandler(requiredApprovalsForSealConstructionGetter)
   130  	if err != nil {
   131  		return nil, fmt.Errorf("could not initialize message handler for untrusted inputs: %w", err)
   132  	}
   133  
   134  	// register engine with the approval provider
   135  	_, err = net.Register(channels.ReceiveApprovals, e)
   136  	if err != nil {
   137  		return nil, fmt.Errorf("could not register for approvals: %w", err)
   138  	}
   139  
   140  	// register engine to the channel for requesting missing approvals
   141  	approvalConduit, err := net.Register(channels.RequestApprovalsByChunk, e)
   142  	if err != nil {
   143  		return nil, fmt.Errorf("could not register for requesting approvals: %w", err)
   144  	}
   145  
   146  	signatureHasher := msig.NewBLSHasher(msig.ResultApprovalTag)
   147  	core, err := NewCore(log, e.workerPool, tracer, conMetrics, sealingTracker, unit, headers, state, sealsDB, assigner, signatureHasher, sealsMempool, approvalConduit, requiredApprovalsForSealConstructionGetter)
   148  	if err != nil {
   149  		return nil, fmt.Errorf("failed to init sealing engine: %w", err)
   150  	}
   151  
   152  	err = core.RepopulateAssignmentCollectorTree(payloads)
   153  	if err != nil {
   154  		return nil, fmt.Errorf("could not repopulate assignment collectors tree: %w", err)
   155  	}
   156  	e.core = core
   157  
   158  	return e, nil
   159  }
   160  
   161  // setupTrustedInboundQueues initializes inbound queues for TRUSTED INPUTS (from other components within the
   162  // consensus node). We deliberately separate the queues for trusted inputs from the MessageHandler, which
   163  // handles external, untrusted inputs. This reduces the attack surface, as it makes it impossible for an external
   164  // attacker to feed values into the inbound channels for trusted inputs, even in the presence of bugs in
   165  // the networking layer or message handler
   166  func (e *Engine) setupTrustedInboundQueues() error {
   167  	e.finalizationEventsNotifier = engine.NewNotifier()
   168  	e.blockIncorporatedNotifier = engine.NewNotifier()
   169  	var err error
   170  	e.pendingIncorporatedResults, err = fifoqueue.NewFifoQueue(defaultIncorporatedResultQueueCapacity)
   171  	if err != nil {
   172  		return fmt.Errorf("failed to create queue for incorporated results: %w", err)
   173  	}
   174  	e.pendingIncorporatedBlocks, err = fifoqueue.NewFifoQueue(defaultIncorporatedBlockQueueCapacity)
   175  	if err != nil {
   176  		return fmt.Errorf("failed to create queue for incorporated blocks: %w", err)
   177  	}
   178  	return nil
   179  }
   180  
   181  // setupMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED INPUTS.
   182  func (e *Engine) setupMessageHandler(getSealingConfigs module.SealingConfigsGetter) error {
   183  	// FIFO queue for broadcasted approvals
   184  	pendingApprovalsQueue, err := fifoqueue.NewFifoQueue(
   185  		defaultApprovalQueueCapacity,
   186  		fifoqueue.WithLengthObserver(func(len int) { e.cacheMetrics.MempoolEntries(metrics.ResourceApprovalQueue, uint(len)) }),
   187  	)
   188  	if err != nil {
   189  		return fmt.Errorf("failed to create queue for inbound approvals: %w", err)
   190  	}
   191  	e.pendingApprovals = &engine.FifoMessageStore{
   192  		FifoQueue: pendingApprovalsQueue,
   193  	}
   194  
   195  	// FiFo queue for requested approvals
   196  	pendingRequestedApprovalsQueue, err := fifoqueue.NewFifoQueue(
   197  		defaultApprovalResponseQueueCapacity,
   198  		fifoqueue.WithLengthObserver(func(len int) { e.cacheMetrics.MempoolEntries(metrics.ResourceApprovalResponseQueue, uint(len)) }),
   199  	)
   200  	if err != nil {
   201  		return fmt.Errorf("failed to create queue for requested approvals: %w", err)
   202  	}
   203  	e.pendingRequestedApprovals = &engine.FifoMessageStore{
   204  		FifoQueue: pendingRequestedApprovalsQueue,
   205  	}
   206  
   207  	e.inboundEventsNotifier = engine.NewNotifier()
   208  	// define message queueing behaviour
   209  	e.messageHandler = engine.NewMessageHandler(
   210  		e.log,
   211  		e.inboundEventsNotifier,
   212  		engine.Pattern{
   213  			Match: func(msg *engine.Message) bool {
   214  				_, ok := msg.Payload.(*flow.ResultApproval)
   215  				if ok {
   216  					e.engineMetrics.MessageReceived(metrics.EngineSealing, metrics.MessageResultApproval)
   217  				}
   218  				return ok
   219  			},
   220  			Map: func(msg *engine.Message) (*engine.Message, bool) {
   221  				if getSealingConfigs.RequireApprovalsForSealConstructionDynamicValue() < 1 {
   222  					// if we don't require approvals to construct a seal, don't even process approvals.
   223  					return nil, false
   224  				}
   225  
   226  				return msg, true
   227  			},
   228  			Store: e.pendingApprovals,
   229  		},
   230  		engine.Pattern{
   231  			Match: func(msg *engine.Message) bool {
   232  				_, ok := msg.Payload.(*messages.ApprovalResponse)
   233  				if ok {
   234  					e.engineMetrics.MessageReceived(metrics.EngineSealing, metrics.MessageResultApproval)
   235  				}
   236  				return ok
   237  			},
   238  			Map: func(msg *engine.Message) (*engine.Message, bool) {
   239  				if getSealingConfigs.RequireApprovalsForSealConstructionDynamicValue() < 1 {
   240  					// if we don't require approvals to construct a seal, don't even process approvals.
   241  					return nil, false
   242  				}
   243  
   244  				approval := msg.Payload.(*messages.ApprovalResponse).Approval
   245  				return &engine.Message{
   246  					OriginID: msg.OriginID,
   247  					Payload:  &approval,
   248  				}, true
   249  			},
   250  			Store: e.pendingRequestedApprovals,
   251  		},
   252  	)
   253  
   254  	return nil
   255  }
   256  
   257  // Process sends event into channel with pending events. Generally speaking shouldn't lock for too long.
   258  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   259  	err := e.messageHandler.Process(originID, event)
   260  	if err != nil {
   261  		if engine.IsIncompatibleInputTypeError(err) {
   262  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   263  			return nil
   264  		}
   265  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   266  	}
   267  	return nil
   268  }
   269  
   270  // processAvailableMessages is processor of pending events which drives events from networking layer to business logic in `Core`.
   271  // Effectively consumes messages from networking layer and dispatches them into corresponding sinks which are connected with `Core`.
   272  func (e *Engine) processAvailableMessages() error {
   273  	for {
   274  		select {
   275  		case <-e.unit.Quit():
   276  			return nil
   277  		default:
   278  		}
   279  
   280  		event, ok := e.pendingIncorporatedResults.Pop()
   281  		if ok {
   282  			e.log.Debug().Msg("got new incorporated result")
   283  
   284  			err := e.processIncorporatedResult(event.(*flow.IncorporatedResult))
   285  			if err != nil {
   286  				return fmt.Errorf("could not process incorporated result: %w", err)
   287  			}
   288  			continue
   289  		}
   290  
   291  		// TODO prioritization
   292  		// eg: msg := engine.SelectNextMessage()
   293  		msg, ok := e.pendingRequestedApprovals.Get()
   294  		if !ok {
   295  			msg, ok = e.pendingApprovals.Get()
   296  		}
   297  		if ok {
   298  			e.log.Debug().Msg("got new result approval")
   299  
   300  			err := e.onApproval(msg.OriginID, msg.Payload.(*flow.ResultApproval))
   301  			if err != nil {
   302  				return fmt.Errorf("could not process result approval: %w", err)
   303  			}
   304  			continue
   305  		}
   306  
   307  		// when there is no more messages in the queue, back to the loop to wait
   308  		// for the next incoming message to arrive.
   309  		return nil
   310  	}
   311  }
   312  
   313  // finalizationProcessingLoop is a separate goroutine that performs processing of finalization events
   314  func (e *Engine) finalizationProcessingLoop() {
   315  	finalizationNotifier := e.finalizationEventsNotifier.Channel()
   316  	for {
   317  		select {
   318  		case <-e.unit.Quit():
   319  			return
   320  		case <-finalizationNotifier:
   321  			finalized, err := e.state.Final().Head()
   322  			if err != nil {
   323  				e.log.Fatal().Err(err).Msg("could not retrieve last finalized block")
   324  			}
   325  			err = e.core.ProcessFinalizedBlock(finalized.ID())
   326  			if err != nil {
   327  				e.log.Fatal().Err(err).Msgf("could not process finalized block %v", finalized.ID())
   328  			}
   329  		}
   330  	}
   331  }
   332  
   333  // blockIncorporatedEventsProcessingLoop is a separate goroutine for processing block incorporated events
   334  func (e *Engine) blockIncorporatedEventsProcessingLoop() {
   335  	c := e.blockIncorporatedNotifier.Channel()
   336  
   337  	for {
   338  		select {
   339  		case <-e.unit.Quit():
   340  			return
   341  		case <-c:
   342  			err := e.processBlockIncorporatedEvents()
   343  			if err != nil {
   344  				e.log.Fatal().Err(err).Msg("internal error processing block incorporated queued message")
   345  			}
   346  		}
   347  	}
   348  }
   349  
   350  func (e *Engine) loop() {
   351  	notifier := e.inboundEventsNotifier.Channel()
   352  	for {
   353  		select {
   354  		case <-e.unit.Quit():
   355  			return
   356  		case <-notifier:
   357  			err := e.processAvailableMessages()
   358  			if err != nil {
   359  				e.log.Fatal().Err(err).Msg("internal error processing queued message")
   360  			}
   361  		}
   362  	}
   363  }
   364  
   365  // processIncorporatedResult is a function that creates incorporated result and submits it for processing
   366  // to sealing core. In phase 2, incorporated result is incorporated at same block that is being executed.
   367  // This will be changed in phase 3.
   368  func (e *Engine) processIncorporatedResult(incorporatedResult *flow.IncorporatedResult) error {
   369  	err := e.core.ProcessIncorporatedResult(incorporatedResult)
   370  	e.engineMetrics.MessageHandled(metrics.EngineSealing, metrics.MessageExecutionReceipt)
   371  	return err
   372  }
   373  
   374  func (e *Engine) onApproval(originID flow.Identifier, approval *flow.ResultApproval) error {
   375  	// don't process approval if originID is mismatched
   376  	if originID != approval.Body.ApproverID {
   377  		return nil
   378  	}
   379  
   380  	err := e.core.ProcessApproval(approval)
   381  	e.engineMetrics.MessageHandled(metrics.EngineSealing, metrics.MessageResultApproval)
   382  	if err != nil {
   383  		return fmt.Errorf("fatal internal error in sealing core logic")
   384  	}
   385  	return nil
   386  }
   387  
   388  // SubmitLocal submits an event originating on the local node.
   389  func (e *Engine) SubmitLocal(event interface{}) {
   390  	err := e.ProcessLocal(event)
   391  	if err != nil {
   392  		// receiving an input of incompatible type from a trusted internal component is fatal
   393  		e.log.Fatal().Err(err).Msg("internal error processing event")
   394  	}
   395  }
   396  
   397  // Submit submits the given event from the node with the given origin ID
   398  // for processing in a non-blocking manner. It returns instantly and logs
   399  // a potential processing error internally when done.
   400  func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) {
   401  	err := e.Process(channel, originID, event)
   402  	if err != nil {
   403  		e.log.Fatal().Err(err).Msg("internal error processing event")
   404  	}
   405  }
   406  
   407  // ProcessLocal processes an event originating on the local node.
   408  func (e *Engine) ProcessLocal(event interface{}) error {
   409  	return e.messageHandler.Process(e.me.NodeID(), event)
   410  }
   411  
   412  // Ready returns a ready channel that is closed once the engine has fully
   413  // started. For the propagation engine, we consider the engine up and running
   414  // upon initialization.
   415  func (e *Engine) Ready() <-chan struct{} {
   416  	// launch as many workers as we need
   417  	for i := 0; i < defaultSealingEngineWorkers; i++ {
   418  		e.unit.Launch(e.loop)
   419  	}
   420  	e.unit.Launch(e.finalizationProcessingLoop)
   421  	e.unit.Launch(e.blockIncorporatedEventsProcessingLoop)
   422  	return e.unit.Ready()
   423  }
   424  
   425  func (e *Engine) Done() <-chan struct{} {
   426  	return e.unit.Done(func() {
   427  		e.workerPool.StopWait()
   428  	})
   429  }
   430  
   431  // OnFinalizedBlock implements the `OnFinalizedBlock` callback from the `hotstuff.FinalizationConsumer`
   432  // It informs sealing.Core about finalization of respective block.
   433  //
   434  // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages
   435  // from external nodes cannot be considered as inputs to this function
   436  func (e *Engine) OnFinalizedBlock(*model.Block) {
   437  	e.finalizationEventsNotifier.Notify()
   438  }
   439  
   440  // OnBlockIncorporated implements `OnBlockIncorporated` from the `hotstuff.FinalizationConsumer`
   441  // It processes all execution results that were incorporated in parent block payload.
   442  //
   443  // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages
   444  // from external nodes cannot be considered as inputs to this function
   445  func (e *Engine) OnBlockIncorporated(incorporatedBlock *model.Block) {
   446  	added := e.pendingIncorporatedBlocks.Push(incorporatedBlock.BlockID)
   447  	if !added {
   448  		// Not being able to queue an incorporated block is a fatal edge case. It might happen, if the
   449  		// queue capacity is depleted. However, we cannot drop incorporated blocks, because there
   450  		// is no way that any contained incorporated result would be re-added later once dropped.
   451  		e.log.Fatal().Msgf("failed to queue incorporated block %v", incorporatedBlock.BlockID)
   452  	}
   453  	e.blockIncorporatedNotifier.Notify()
   454  }
   455  
   456  // processIncorporatedBlock selects receipts that were included into incorporated block and submits them
   457  // for further processing to sealing core. No errors expected during normal operations.
   458  func (e *Engine) processIncorporatedBlock(incorporatedBlockID flow.Identifier) error {
   459  	// In order to process a block within the sealing engine, we need the block's source of
   460  	// randomness (to compute the chunk assignment). The source of randomness can be taken from _any_
   461  	// QC for the block. We know that we have such a QC, once a valid child block is incorporated.
   462  	// Vice-versa, once a block is incorporated, we know that _its parent_ has a valid child, i.e.
   463  	// the parent's source of randomness is now know.
   464  
   465  	incorporatedBlock, err := e.headers.ByBlockID(incorporatedBlockID)
   466  	if err != nil {
   467  		return fmt.Errorf("could not retrieve header for block %v", incorporatedBlockID)
   468  	}
   469  
   470  	e.log.Info().Msgf("processing incorporated block %v at height %d", incorporatedBlockID, incorporatedBlock.Height)
   471  
   472  	// we are interested in blocks with height strictly larger than root block
   473  	if incorporatedBlock.Height <= e.rootHeader.Height {
   474  		return nil
   475  	}
   476  
   477  	index, err := e.index.ByBlockID(incorporatedBlock.ParentID)
   478  	if err != nil {
   479  		return fmt.Errorf("could not retrieve payload index for block %v", incorporatedBlock.ParentID)
   480  	}
   481  
   482  	for _, resultID := range index.ResultIDs {
   483  		result, err := e.results.ByID(resultID)
   484  		if err != nil {
   485  			return fmt.Errorf("could not retrieve receipt incorporated in block %v: %w", incorporatedBlock.ParentID, err)
   486  		}
   487  
   488  		incorporatedResult := flow.NewIncorporatedResult(incorporatedBlock.ParentID, result)
   489  		added := e.pendingIncorporatedResults.Push(incorporatedResult)
   490  		if !added {
   491  			// Not being able to queue an incorporated result is a fatal edge case. It might happen, if the
   492  			// queue capacity is depleted. However, we cannot drop incorporated results, because there
   493  			// is no way that an incorporated result can be re-added later once dropped.
   494  			return fmt.Errorf("failed to queue incorporated result")
   495  		}
   496  	}
   497  	e.inboundEventsNotifier.Notify()
   498  	return nil
   499  }
   500  
   501  // processBlockIncorporatedEvents performs processing of block incorporated hot stuff events
   502  // No errors expected during normal operations.
   503  func (e *Engine) processBlockIncorporatedEvents() error {
   504  	for {
   505  		select {
   506  		case <-e.unit.Quit():
   507  			return nil
   508  		default:
   509  		}
   510  
   511  		msg, ok := e.pendingIncorporatedBlocks.Pop()
   512  		if ok {
   513  			err := e.processIncorporatedBlock(msg.(flow.Identifier))
   514  			if err != nil {
   515  				return fmt.Errorf("could not process incorporated block: %w", err)
   516  			}
   517  			continue
   518  		}
   519  
   520  		// when there is no more messages in the queue, back to the loop to wait
   521  		// for the next incoming message to arrive.
   522  		return nil
   523  	}
   524  }