github.com/koko1123/flow-go-1@v0.29.6/engine/consensus/sealing/engine.go (about)

     1  package sealing
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/gammazero/workerpool"
     7  	"github.com/rs/zerolog"
     8  
     9  	"github.com/koko1123/flow-go-1/consensus/hotstuff/model"
    10  	"github.com/koko1123/flow-go-1/engine"
    11  	"github.com/koko1123/flow-go-1/engine/common/fifoqueue"
    12  	"github.com/koko1123/flow-go-1/engine/consensus"
    13  	"github.com/koko1123/flow-go-1/model/flow"
    14  	"github.com/koko1123/flow-go-1/model/messages"
    15  	"github.com/koko1123/flow-go-1/module"
    16  	"github.com/koko1123/flow-go-1/module/mempool"
    17  	"github.com/koko1123/flow-go-1/module/metrics"
    18  	msig "github.com/koko1123/flow-go-1/module/signature"
    19  	"github.com/koko1123/flow-go-1/network"
    20  	"github.com/koko1123/flow-go-1/network/channels"
    21  	"github.com/koko1123/flow-go-1/state/protocol"
    22  	"github.com/koko1123/flow-go-1/storage"
    23  )
    24  
    25  type Event struct {
    26  	OriginID flow.Identifier
    27  	Msg      interface{}
    28  }
    29  
    30  // defaultApprovalQueueCapacity maximum capacity of approvals queue
    31  const defaultApprovalQueueCapacity = 10000
    32  
    33  // defaultApprovalResponseQueueCapacity maximum capacity of approval requests queue
    34  const defaultApprovalResponseQueueCapacity = 10000
    35  
    36  // defaultSealingEngineWorkers number of workers to dispatch events for sealing core
    37  const defaultSealingEngineWorkers = 8
    38  
    39  // defaultAssignmentCollectorsWorkerPoolCapacity is the default number of workers that is available for worker pool which is used
    40  // by assignment collector state machine to do transitions
    41  const defaultAssignmentCollectorsWorkerPoolCapacity = 4
    42  
    43  // defaultIncorporatedBlockQueueCapacity maximum capacity for queuing incorporated blocks
    44  // Caution: We cannot drop incorporated blocks, as there is no way that results included in the block
    45  // can be re-added later once dropped. Missing any incorporated result can undermine sealing liveness!
    46  // Therefore, the queue capacity should be large _and_ there should be logic for crashing the node
    47  // in case queueing an incorporated block fails.
    48  const defaultIncorporatedBlockQueueCapacity = 10000
    49  
    50  // defaultIncorporatedResultQueueCapacity maximum capacity for queuing incorporated results
    51  // Caution: We cannot drop incorporated results, as there is no way that an incorporated result
    52  // can be re-added later once dropped. Missing incorporated results can undermine sealing liveness!
    53  // Therefore, the queue capacity should be large _and_ there should be logic for crashing the node
    54  // in case queueing an incorporated result fails.
    55  const defaultIncorporatedResultQueueCapacity = 80000
    56  
    57  type (
    58  	EventSink chan *Event // Channel to push pending events
    59  )
    60  
    61  // Engine is a wrapper for approval processing `Core` which implements logic for
    62  // queuing and filtering network messages which later will be processed by sealing engine.
    63  // Purpose of this struct is to provide an efficient way how to consume messages from network layer and pass
    64  // them to `Core`. Engine runs 2 separate gorourtines that perform pre-processing and consuming messages by Core.
    65  type Engine struct {
    66  	unit                       *engine.Unit
    67  	workerPool                 *workerpool.WorkerPool
    68  	core                       consensus.SealingCore
    69  	log                        zerolog.Logger
    70  	me                         module.Local
    71  	headers                    storage.Headers
    72  	results                    storage.ExecutionResults
    73  	index                      storage.Index
    74  	state                      protocol.State
    75  	cacheMetrics               module.MempoolMetrics
    76  	engineMetrics              module.EngineMetrics
    77  	pendingApprovals           engine.MessageStore
    78  	pendingRequestedApprovals  engine.MessageStore
    79  	pendingIncorporatedResults *fifoqueue.FifoQueue
    80  	pendingIncorporatedBlocks  *fifoqueue.FifoQueue
    81  	inboundEventsNotifier      engine.Notifier
    82  	finalizationEventsNotifier engine.Notifier
    83  	blockIncorporatedNotifier  engine.Notifier
    84  	messageHandler             *engine.MessageHandler
    85  	rootHeader                 *flow.Header
    86  }
    87  
    88  // NewEngine constructs new `Engine` which runs on it's own unit.
    89  func NewEngine(log zerolog.Logger,
    90  	tracer module.Tracer,
    91  	conMetrics module.ConsensusMetrics,
    92  	engineMetrics module.EngineMetrics,
    93  	mempool module.MempoolMetrics,
    94  	sealingTracker consensus.SealingTracker,
    95  	net network.Network,
    96  	me module.Local,
    97  	headers storage.Headers,
    98  	payloads storage.Payloads,
    99  	results storage.ExecutionResults,
   100  	index storage.Index,
   101  	state protocol.State,
   102  	sealsDB storage.Seals,
   103  	assigner module.ChunkAssigner,
   104  	sealsMempool mempool.IncorporatedResultSeals,
   105  	requiredApprovalsForSealConstructionGetter module.SealingConfigsGetter,
   106  ) (*Engine, error) {
   107  	rootHeader, err := state.Params().Root()
   108  	if err != nil {
   109  		return nil, fmt.Errorf("could not retrieve root block: %w", err)
   110  	}
   111  
   112  	unit := engine.NewUnit()
   113  	e := &Engine{
   114  		unit:          unit,
   115  		workerPool:    workerpool.New(defaultAssignmentCollectorsWorkerPoolCapacity),
   116  		log:           log.With().Str("engine", "sealing.Engine").Logger(),
   117  		me:            me,
   118  		state:         state,
   119  		engineMetrics: engineMetrics,
   120  		cacheMetrics:  mempool,
   121  		headers:       headers,
   122  		results:       results,
   123  		index:         index,
   124  		rootHeader:    rootHeader,
   125  	}
   126  
   127  	err = e.setupTrustedInboundQueues()
   128  	if err != nil {
   129  		return nil, fmt.Errorf("initialization of inbound queues for trusted inputs failed: %w", err)
   130  	}
   131  
   132  	err = e.setupMessageHandler(requiredApprovalsForSealConstructionGetter)
   133  	if err != nil {
   134  		return nil, fmt.Errorf("could not initialize message handler for untrusted inputs: %w", err)
   135  	}
   136  
   137  	// register engine with the approval provider
   138  	_, err = net.Register(channels.ReceiveApprovals, e)
   139  	if err != nil {
   140  		return nil, fmt.Errorf("could not register for approvals: %w", err)
   141  	}
   142  
   143  	// register engine to the channel for requesting missing approvals
   144  	approvalConduit, err := net.Register(channels.RequestApprovalsByChunk, e)
   145  	if err != nil {
   146  		return nil, fmt.Errorf("could not register for requesting approvals: %w", err)
   147  	}
   148  
   149  	signatureHasher := msig.NewBLSHasher(msig.ResultApprovalTag)
   150  	core, err := NewCore(log, e.workerPool, tracer, conMetrics, sealingTracker, unit, headers, state, sealsDB, assigner, signatureHasher, sealsMempool, approvalConduit, requiredApprovalsForSealConstructionGetter)
   151  	if err != nil {
   152  		return nil, fmt.Errorf("failed to init sealing engine: %w", err)
   153  	}
   154  
   155  	err = core.RepopulateAssignmentCollectorTree(payloads)
   156  	if err != nil {
   157  		return nil, fmt.Errorf("could not repopulate assignment collectors tree: %w", err)
   158  	}
   159  	e.core = core
   160  
   161  	return e, nil
   162  }
   163  
   164  // setupTrustedInboundQueues initializes inbound queues for TRUSTED INPUTS (from other components within the
   165  // consensus node). We deliberately separate the queues for trusted inputs from the MessageHandler, which
   166  // handles external, untrusted inputs. This reduces the attack surface, as it makes it impossible for an external
   167  // attacker to feed values into the inbound channels for trusted inputs, even in the presence of bugs in
   168  // the networking layer or message handler
   169  func (e *Engine) setupTrustedInboundQueues() error {
   170  	e.finalizationEventsNotifier = engine.NewNotifier()
   171  	e.blockIncorporatedNotifier = engine.NewNotifier()
   172  	var err error
   173  	e.pendingIncorporatedResults, err = fifoqueue.NewFifoQueue(defaultIncorporatedResultQueueCapacity)
   174  	if err != nil {
   175  		return fmt.Errorf("failed to create queue for incorporated results: %w", err)
   176  	}
   177  	e.pendingIncorporatedBlocks, err = fifoqueue.NewFifoQueue(defaultIncorporatedBlockQueueCapacity)
   178  	if err != nil {
   179  		return fmt.Errorf("failed to create queue for incorporated blocks: %w", err)
   180  	}
   181  	return nil
   182  }
   183  
   184  // setupMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED INPUTS.
   185  func (e *Engine) setupMessageHandler(getSealingConfigs module.SealingConfigsGetter) error {
   186  	// FIFO queue for broadcasted approvals
   187  	pendingApprovalsQueue, err := fifoqueue.NewFifoQueue(
   188  		defaultApprovalQueueCapacity,
   189  		fifoqueue.WithLengthObserver(func(len int) { e.cacheMetrics.MempoolEntries(metrics.ResourceApprovalQueue, uint(len)) }),
   190  	)
   191  	if err != nil {
   192  		return fmt.Errorf("failed to create queue for inbound approvals: %w", err)
   193  	}
   194  	e.pendingApprovals = &engine.FifoMessageStore{
   195  		FifoQueue: pendingApprovalsQueue,
   196  	}
   197  
   198  	// FiFo queue for requested approvals
   199  	pendingRequestedApprovalsQueue, err := fifoqueue.NewFifoQueue(
   200  		defaultApprovalResponseQueueCapacity,
   201  		fifoqueue.WithLengthObserver(func(len int) { e.cacheMetrics.MempoolEntries(metrics.ResourceApprovalResponseQueue, uint(len)) }),
   202  	)
   203  	if err != nil {
   204  		return fmt.Errorf("failed to create queue for requested approvals: %w", err)
   205  	}
   206  	e.pendingRequestedApprovals = &engine.FifoMessageStore{
   207  		FifoQueue: pendingRequestedApprovalsQueue,
   208  	}
   209  
   210  	e.inboundEventsNotifier = engine.NewNotifier()
   211  	// define message queueing behaviour
   212  	e.messageHandler = engine.NewMessageHandler(
   213  		e.log,
   214  		e.inboundEventsNotifier,
   215  		engine.Pattern{
   216  			Match: func(msg *engine.Message) bool {
   217  				_, ok := msg.Payload.(*flow.ResultApproval)
   218  				if ok {
   219  					e.engineMetrics.MessageReceived(metrics.EngineSealing, metrics.MessageResultApproval)
   220  				}
   221  				return ok
   222  			},
   223  			Map: func(msg *engine.Message) (*engine.Message, bool) {
   224  				if getSealingConfigs.RequireApprovalsForSealConstructionDynamicValue() < 1 {
   225  					// if we don't require approvals to construct a seal, don't even process approvals.
   226  					return nil, false
   227  				}
   228  
   229  				return msg, true
   230  			},
   231  			Store: e.pendingApprovals,
   232  		},
   233  		engine.Pattern{
   234  			Match: func(msg *engine.Message) bool {
   235  				_, ok := msg.Payload.(*messages.ApprovalResponse)
   236  				if ok {
   237  					e.engineMetrics.MessageReceived(metrics.EngineSealing, metrics.MessageResultApproval)
   238  				}
   239  				return ok
   240  			},
   241  			Map: func(msg *engine.Message) (*engine.Message, bool) {
   242  				if getSealingConfigs.RequireApprovalsForSealConstructionDynamicValue() < 1 {
   243  					// if we don't require approvals to construct a seal, don't even process approvals.
   244  					return nil, false
   245  				}
   246  
   247  				approval := msg.Payload.(*messages.ApprovalResponse).Approval
   248  				return &engine.Message{
   249  					OriginID: msg.OriginID,
   250  					Payload:  &approval,
   251  				}, true
   252  			},
   253  			Store: e.pendingRequestedApprovals,
   254  		},
   255  	)
   256  
   257  	return nil
   258  }
   259  
   260  // Process sends event into channel with pending events. Generally speaking shouldn't lock for too long.
   261  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   262  	err := e.messageHandler.Process(originID, event)
   263  	if err != nil {
   264  		if engine.IsIncompatibleInputTypeError(err) {
   265  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   266  			return nil
   267  		}
   268  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   269  	}
   270  	return nil
   271  }
   272  
   273  // processAvailableMessages is processor of pending events which drives events from networking layer to business logic in `Core`.
   274  // Effectively consumes messages from networking layer and dispatches them into corresponding sinks which are connected with `Core`.
   275  func (e *Engine) processAvailableMessages() error {
   276  	for {
   277  		select {
   278  		case <-e.unit.Quit():
   279  			return nil
   280  		default:
   281  		}
   282  
   283  		event, ok := e.pendingIncorporatedResults.Pop()
   284  		if ok {
   285  			e.log.Debug().Msg("got new incorporated result")
   286  
   287  			err := e.processIncorporatedResult(event.(*flow.IncorporatedResult))
   288  			if err != nil {
   289  				return fmt.Errorf("could not process incorporated result: %w", err)
   290  			}
   291  			continue
   292  		}
   293  
   294  		// TODO prioritization
   295  		// eg: msg := engine.SelectNextMessage()
   296  		msg, ok := e.pendingRequestedApprovals.Get()
   297  		if !ok {
   298  			msg, ok = e.pendingApprovals.Get()
   299  		}
   300  		if ok {
   301  			e.log.Debug().Msg("got new result approval")
   302  
   303  			err := e.onApproval(msg.OriginID, msg.Payload.(*flow.ResultApproval))
   304  			if err != nil {
   305  				return fmt.Errorf("could not process result approval: %w", err)
   306  			}
   307  			continue
   308  		}
   309  
   310  		// when there is no more messages in the queue, back to the loop to wait
   311  		// for the next incoming message to arrive.
   312  		return nil
   313  	}
   314  }
   315  
   316  // finalizationProcessingLoop is a separate goroutine that performs processing of finalization events
   317  func (e *Engine) finalizationProcessingLoop() {
   318  	finalizationNotifier := e.finalizationEventsNotifier.Channel()
   319  	for {
   320  		select {
   321  		case <-e.unit.Quit():
   322  			return
   323  		case <-finalizationNotifier:
   324  			finalized, err := e.state.Final().Head()
   325  			if err != nil {
   326  				e.log.Fatal().Err(err).Msg("could not retrieve last finalized block")
   327  			}
   328  			err = e.core.ProcessFinalizedBlock(finalized.ID())
   329  			if err != nil {
   330  				e.log.Fatal().Err(err).Msgf("could not process finalized block %v", finalized.ID())
   331  			}
   332  		}
   333  	}
   334  }
   335  
   336  // blockIncorporatedEventsProcessingLoop is a separate goroutine for processing block incorporated events
   337  func (e *Engine) blockIncorporatedEventsProcessingLoop() {
   338  	c := e.blockIncorporatedNotifier.Channel()
   339  
   340  	for {
   341  		select {
   342  		case <-e.unit.Quit():
   343  			return
   344  		case <-c:
   345  			err := e.processBlockIncorporatedEvents()
   346  			if err != nil {
   347  				e.log.Fatal().Err(err).Msg("internal error processing block incorporated queued message")
   348  			}
   349  		}
   350  	}
   351  }
   352  
   353  func (e *Engine) loop() {
   354  	notifier := e.inboundEventsNotifier.Channel()
   355  	for {
   356  		select {
   357  		case <-e.unit.Quit():
   358  			return
   359  		case <-notifier:
   360  			err := e.processAvailableMessages()
   361  			if err != nil {
   362  				e.log.Fatal().Err(err).Msg("internal error processing queued message")
   363  			}
   364  		}
   365  	}
   366  }
   367  
   368  // processIncorporatedResult is a function that creates incorporated result and submits it for processing
   369  // to sealing core. In phase 2, incorporated result is incorporated at same block that is being executed.
   370  // This will be changed in phase 3.
   371  func (e *Engine) processIncorporatedResult(incorporatedResult *flow.IncorporatedResult) error {
   372  	err := e.core.ProcessIncorporatedResult(incorporatedResult)
   373  	e.engineMetrics.MessageHandled(metrics.EngineSealing, metrics.MessageExecutionReceipt)
   374  	return err
   375  }
   376  
   377  func (e *Engine) onApproval(originID flow.Identifier, approval *flow.ResultApproval) error {
   378  	// don't process approval if originID is mismatched
   379  	if originID != approval.Body.ApproverID {
   380  		return nil
   381  	}
   382  
   383  	err := e.core.ProcessApproval(approval)
   384  	e.engineMetrics.MessageHandled(metrics.EngineSealing, metrics.MessageResultApproval)
   385  	if err != nil {
   386  		return fmt.Errorf("fatal internal error in sealing core logic")
   387  	}
   388  	return nil
   389  }
   390  
   391  // SubmitLocal submits an event originating on the local node.
   392  func (e *Engine) SubmitLocal(event interface{}) {
   393  	err := e.ProcessLocal(event)
   394  	if err != nil {
   395  		// receiving an input of incompatible type from a trusted internal component is fatal
   396  		e.log.Fatal().Err(err).Msg("internal error processing event")
   397  	}
   398  }
   399  
   400  // Submit submits the given event from the node with the given origin ID
   401  // for processing in a non-blocking manner. It returns instantly and logs
   402  // a potential processing error internally when done.
   403  func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) {
   404  	err := e.Process(channel, originID, event)
   405  	if err != nil {
   406  		e.log.Fatal().Err(err).Msg("internal error processing event")
   407  	}
   408  }
   409  
   410  // ProcessLocal processes an event originating on the local node.
   411  func (e *Engine) ProcessLocal(event interface{}) error {
   412  	return e.messageHandler.Process(e.me.NodeID(), event)
   413  }
   414  
   415  // Ready returns a ready channel that is closed once the engine has fully
   416  // started. For the propagation engine, we consider the engine up and running
   417  // upon initialization.
   418  func (e *Engine) Ready() <-chan struct{} {
   419  	// launch as many workers as we need
   420  	for i := 0; i < defaultSealingEngineWorkers; i++ {
   421  		e.unit.Launch(e.loop)
   422  	}
   423  	e.unit.Launch(e.finalizationProcessingLoop)
   424  	e.unit.Launch(e.blockIncorporatedEventsProcessingLoop)
   425  	return e.unit.Ready()
   426  }
   427  
   428  func (e *Engine) Done() <-chan struct{} {
   429  	return e.unit.Done(func() {
   430  		e.workerPool.StopWait()
   431  	})
   432  }
   433  
   434  // OnFinalizedBlock implements the `OnFinalizedBlock` callback from the `hotstuff.FinalizationConsumer`
   435  // (1) Informs sealing.Core about finalization of respective block.
   436  //
   437  // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages
   438  // from external nodes cannot be considered as inputs to this function
   439  func (e *Engine) OnFinalizedBlock(*model.Block) {
   440  	e.finalizationEventsNotifier.Notify()
   441  }
   442  
   443  // OnBlockIncorporated implements `OnBlockIncorporated` from the `hotstuff.FinalizationConsumer`
   444  // (1) Processes all execution results that were incorporated in parent block payload.
   445  //
   446  // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages
   447  // from external nodes cannot be considered as inputs to this function
   448  func (e *Engine) OnBlockIncorporated(incorporatedBlock *model.Block) {
   449  	added := e.pendingIncorporatedBlocks.Push(incorporatedBlock.BlockID)
   450  	if !added {
   451  		// Not being able to queue an incorporated block is a fatal edge case. It might happen, if the
   452  		// queue capacity is depleted. However, we cannot drop incorporated blocks, because there
   453  		// is no way that any contained incorporated result would be re-added later once dropped.
   454  		e.log.Fatal().Msgf("failed to queue incorporated block %v", incorporatedBlock.BlockID)
   455  	}
   456  	e.blockIncorporatedNotifier.Notify()
   457  }
   458  
   459  // processIncorporatedBlock selects receipts that were included into incorporated block and submits them
   460  // for further processing to sealing core. No errors expected during normal operations.
   461  func (e *Engine) processIncorporatedBlock(incorporatedBlockID flow.Identifier) error {
   462  	// In order to process a block within the sealing engine, we need the block's source of
   463  	// randomness (to compute the chunk assignment). The source of randomness can be taken from _any_
   464  	// QC for the block. We know that we have such a QC, once a valid child block is incorporated.
   465  	// Vice-versa, once a block is incorporated, we know that _its parent_ has a valid child, i.e.
   466  	// the parent's source of randomness is now know.
   467  
   468  	incorporatedBlock, err := e.headers.ByBlockID(incorporatedBlockID)
   469  	if err != nil {
   470  		return fmt.Errorf("could not retrieve header for block %v", incorporatedBlockID)
   471  	}
   472  
   473  	e.log.Info().Msgf("processing incorporated block %v at height %d", incorporatedBlockID, incorporatedBlock.Height)
   474  
   475  	// we are interested in blocks with height strictly larger than root block
   476  	if incorporatedBlock.Height <= e.rootHeader.Height {
   477  		return nil
   478  	}
   479  
   480  	index, err := e.index.ByBlockID(incorporatedBlock.ParentID)
   481  	if err != nil {
   482  		return fmt.Errorf("could not retrieve payload index for block %v", incorporatedBlock.ParentID)
   483  	}
   484  
   485  	for _, resultID := range index.ResultIDs {
   486  		result, err := e.results.ByID(resultID)
   487  		if err != nil {
   488  			return fmt.Errorf("could not retrieve receipt incorporated in block %v: %w", incorporatedBlock.ParentID, err)
   489  		}
   490  
   491  		incorporatedResult := flow.NewIncorporatedResult(incorporatedBlock.ParentID, result)
   492  		added := e.pendingIncorporatedResults.Push(incorporatedResult)
   493  		if !added {
   494  			// Not being able to queue an incorporated result is a fatal edge case. It might happen, if the
   495  			// queue capacity is depleted. However, we cannot drop incorporated results, because there
   496  			// is no way that an incorporated result can be re-added later once dropped.
   497  			return fmt.Errorf("failed to queue incorporated result")
   498  		}
   499  	}
   500  	e.inboundEventsNotifier.Notify()
   501  	return nil
   502  }
   503  
   504  // processBlockIncorporatedEvents performs processing of block incorporated hot stuff events
   505  // No errors expected during normal operations.
   506  func (e *Engine) processBlockIncorporatedEvents() error {
   507  	for {
   508  		select {
   509  		case <-e.unit.Quit():
   510  			return nil
   511  		default:
   512  		}
   513  
   514  		msg, ok := e.pendingIncorporatedBlocks.Pop()
   515  		if ok {
   516  			err := e.processIncorporatedBlock(msg.(flow.Identifier))
   517  			if err != nil {
   518  				return fmt.Errorf("could not process incorporated block: %w", err)
   519  			}
   520  			continue
   521  		}
   522  
   523  		// when there is no more messages in the queue, back to the loop to wait
   524  		// for the next incoming message to arrive.
   525  		return nil
   526  	}
   527  }